From 982a03fa2c92a09e35903a2bd814666a1e2545a0 Mon Sep 17 00:00:00 2001 From: dim Date: Tue, 22 May 2012 21:30:23 +0000 Subject: Vendor import of llvm release_31 final r156748: http://llvm.org/svn/llvm-project/llvm/tags/RELEASE_31/final@156748 --- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 86 +++++++++++++++++-------- 1 file changed, 58 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 8ec1ae8..748668c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -131,30 +131,16 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } } -static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { - SmallVector VTs; - SDNode *GlueDestNode = Glue.getNode(); - - // Don't add glue from a node to itself. - if (GlueDestNode == N) return; - - // Don't add glue to something that already has it, either as a use or value. - if (N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue || - N->getValueType(N->getNumValues() - 1) == MVT::Glue) { - return; - } - for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) - VTs.push_back(N->getValueType(I)); - - if (AddGlue) - VTs.push_back(MVT::Glue); - +// Helper for AddGlue to clone node operands. +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, + SmallVectorImpl &VTs, + SDValue ExtraOper = SDValue()) { SmallVector Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); - if (GlueDestNode) - Ops.push_back(Glue); + if (ExtraOper.getNode()) + Ops.push_back(ExtraOper); SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); MachineSDNode::mmo_iterator Begin = 0, End = 0; @@ -173,6 +159,46 @@ static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { MN->setMemRefs(Begin, End); } +static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { + SmallVector VTs; + SDNode *GlueDestNode = Glue.getNode(); + + // Don't add glue from a node to itself. + if (GlueDestNode == N) return false; + + // Don't add a glue operand to something that already uses glue. + if (GlueDestNode && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + return false; + } + // Don't add glue to something that already has a glue value. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + + if (AddGlue) + VTs.push_back(MVT::Glue); + + CloneNodeWithValues(N, DAG, VTs, Glue); + + return true; +} + +// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the +// node even though simply shrinking the value list is sufficient. +static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { + assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && + !N->hasAnyUseOfValue(N->getNumValues() - 1)) && + "expected an unused glue value"); + + SmallVector VTs; + for (unsigned I = 0, E = N->getNumValues()-1; I != E; ++I) + VTs.push_back(N->getValueType(I)); + + CloneNodeWithValues(N, DAG, VTs); +} + /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. /// This function finds loads of the same base and different offsets. If the /// offsets are not far apart (target specific), it add MVT::Glue inputs and @@ -240,19 +266,23 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - AddGlue(Lead, SDValue(0, 0), true, DAG); - - SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1); + SDValue InGlue = SDValue(0, 0); + if (AddGlue(Lead, InGlue, true, DAG)) + InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { bool OutGlue = I < E - 1; SDNode *Load = Loads[I]; - AddGlue(Load, InGlue, OutGlue, DAG); + // If AddGlue fails, we could leave an unsused glue value. This should not + // cause any + if (AddGlue(Load, InGlue, OutGlue, DAG)) { + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); - if (OutGlue) - InGlue = SDValue(Load, Load->getNumValues() - 1); - - ++LoadsClustered; + ++LoadsClustered; + } + else if (!OutGlue && InGlue.getNode()) + RemoveUnusedGlue(InGlue.getNode(), DAG); } } -- cgit v1.1 From e6d4a67235c1e62e3e292b1a167c5c6b9610a706 Mon Sep 17 00:00:00 2001 From: dim Date: Tue, 22 May 2012 21:36:38 +0000 Subject: Vendor import of clang release_31 final r156748: http://llvm.org/svn/llvm-project/cfe/tags/RELEASE_31/final@156748 --- lib/Analysis/UninitializedValues.cpp | 3 +- lib/Basic/Version.cpp | 2 +- lib/Driver/ToolChains.cpp | 4 +- lib/Parse/ParseCXXInlineMethods.cpp | 72 +------------------- lib/Parse/ParseDecl.cpp | 14 +--- lib/Parse/ParseDeclCXX.cpp | 90 ++++--------------------- lib/Parse/ParseExpr.cpp | 2 +- lib/Parse/ParseExprCXX.cpp | 9 +-- lib/Sema/DeclSpec.cpp | 5 -- lib/Sema/SemaDecl.cpp | 17 +---- lib/Sema/SemaDeclCXX.cpp | 126 +++++++++++++++-------------------- lib/Sema/SemaLookup.cpp | 45 ++++--------- lib/Sema/SemaType.cpp | 5 +- lib/Serialization/ASTReader.cpp | 3 + lib/Serialization/ASTWriter.cpp | 3 + 15 files changed, 102 insertions(+), 298 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/UninitializedValues.cpp b/lib/Analysis/UninitializedValues.cpp index 6e5da25..1c7e6b6 100644 --- a/lib/Analysis/UninitializedValues.cpp +++ b/lib/Analysis/UninitializedValues.cpp @@ -168,7 +168,8 @@ static const BinaryOperator *getLogicalOperatorInChain(const CFGBlock *block) { if (block->empty()) return 0; - const CFGStmt *cstmt = block->front().getAs(); + CFGElement front = block->front(); + const CFGStmt *cstmt = front.getAs(); if (!cstmt) return 0; diff --git a/lib/Basic/Version.cpp b/lib/Basic/Version.cpp index 8cb2386..cd16b4b 100644 --- a/lib/Basic/Version.cpp +++ b/lib/Basic/Version.cpp @@ -32,7 +32,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/branches/release_31/lib/Basic/Version.cpp $"); + static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/tags/RELEASE_31/final/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp index 81657d8..7f9ed9a 100644 --- a/lib/Driver/ToolChains.cpp +++ b/lib/Driver/ToolChains.cpp @@ -1830,6 +1830,7 @@ enum LinuxDistro { OpenSuse11_3, OpenSuse11_4, OpenSuse12_1, + OpenSuse12_2, UbuntuHardy, UbuntuIntrepid, UbuntuJaunty, @@ -1848,7 +1849,7 @@ static bool IsRedhat(enum LinuxDistro Distro) { } static bool IsOpenSuse(enum LinuxDistro Distro) { - return Distro >= OpenSuse11_3 && Distro <= OpenSuse12_1; + return Distro >= OpenSuse11_3 && Distro <= OpenSuse12_2; } static bool IsDebian(enum LinuxDistro Distro) { @@ -1925,6 +1926,7 @@ static LinuxDistro DetectLinuxDistro(llvm::Triple::ArchType Arch) { .StartsWith("openSUSE 11.3", OpenSuse11_3) .StartsWith("openSUSE 11.4", OpenSuse11_4) .StartsWith("openSUSE 12.1", OpenSuse12_1) + .StartsWith("openSUSE 12.2", OpenSuse12_2) .Default(UnknownDistro); bool Exists; diff --git a/lib/Parse/ParseCXXInlineMethods.cpp b/lib/Parse/ParseCXXInlineMethods.cpp index f04d767..c7b29d9 100644 --- a/lib/Parse/ParseCXXInlineMethods.cpp +++ b/lib/Parse/ParseCXXInlineMethods.cpp @@ -348,77 +348,7 @@ void Parser::ParseLexedMethodDeclaration(LateParsedMethodDeclaration &LM) { LM.DefaultArgs[I].Toks = 0; } } - - // Parse a delayed exception-specification, if there is one. - if (CachedTokens *Toks = LM.ExceptionSpecTokens) { - // Save the current token position. - SourceLocation origLoc = Tok.getLocation(); - - // Parse the default argument from its saved token stream. - Toks->push_back(Tok); // So that the current token doesn't get lost - PP.EnterTokenStream(&Toks->front(), Toks->size(), true, false); - - // Consume the previously-pushed token. - ConsumeAnyToken(); - - // C++11 [expr.prim.general]p3: - // If a declaration declares a member function or member function - // template of a class X, the expression this is a prvalue of type - // "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq - // and the end of the function-definition, member-declarator, or - // declarator. - CXXMethodDecl *Method; - if (FunctionTemplateDecl *FunTmpl - = dyn_cast(LM.Method)) - Method = cast(FunTmpl->getTemplatedDecl()); - else - Method = cast(LM.Method); - - Sema::CXXThisScopeRAII ThisScope(Actions, Method->getParent(), - Method->getTypeQualifiers(), - getLangOpts().CPlusPlus0x); - - // Parse the exception-specification. - SourceRange SpecificationRange; - SmallVector DynamicExceptions; - SmallVector DynamicExceptionRanges; - ExprResult NoexceptExpr; - CachedTokens *ExceptionSpecTokens; - - ExceptionSpecificationType EST - = tryParseExceptionSpecification(/*Delayed=*/false, SpecificationRange, - DynamicExceptions, - DynamicExceptionRanges, NoexceptExpr, - ExceptionSpecTokens); - - // Clean up the remaining tokens. - if (Tok.is(tok::cxx_exceptspec_end)) - ConsumeToken(); - else if (EST != EST_None) - Diag(Tok.getLocation(), diag::err_except_spec_unparsed); - - // Attach the exception-specification to the method. - if (EST != EST_None) - Actions.actOnDelayedExceptionSpecification(LM.Method, EST, - SpecificationRange, - DynamicExceptions, - DynamicExceptionRanges, - NoexceptExpr.isUsable()? - NoexceptExpr.get() : 0); - - assert(!PP.getSourceManager().isBeforeInTranslationUnit(origLoc, - Tok.getLocation()) && - "tryParseExceptionSpecification went over the exception tokens!"); - - // There could be leftover tokens (e.g. because of an error). - // Skip through until we reach the original token position. - while (Tok.getLocation() != origLoc && Tok.isNot(tok::eof)) - ConsumeAnyToken(); - - delete LM.ExceptionSpecTokens; - LM.ExceptionSpecTokens = 0; - } - + PrototypeScope.Exit(); // Finish the delayed C++ method declaration. diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 932ffb4..7995e68 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -4197,7 +4197,6 @@ void Parser::ParseFunctionDeclarator(Declarator &D, SmallVector DynamicExceptions; SmallVector DynamicExceptionRanges; ExprResult NoexceptExpr; - CachedTokens *ExceptionSpecTokens = 0; ParsedAttributes FnAttrs(AttrFactory); ParsedType TrailingReturnType; @@ -4264,18 +4263,12 @@ void Parser::ParseFunctionDeclarator(Declarator &D, dyn_cast(Actions.CurContext), DS.getTypeQualifiers(), IsCXX11MemberFunction); - + // Parse exception-specification[opt]. - bool Delayed = (D.getContext() == Declarator::MemberContext && - D.getDeclSpec().getStorageClassSpec() - != DeclSpec::SCS_typedef && - !D.getDeclSpec().isFriendSpecified()); - ESpecType = tryParseExceptionSpecification(Delayed, - ESpecRange, + ESpecType = tryParseExceptionSpecification(ESpecRange, DynamicExceptions, DynamicExceptionRanges, - NoexceptExpr, - ExceptionSpecTokens); + NoexceptExpr); if (ESpecType != EST_None) EndLoc = ESpecRange.getEnd(); @@ -4310,7 +4303,6 @@ void Parser::ParseFunctionDeclarator(Declarator &D, DynamicExceptions.size(), NoexceptExpr.isUsable() ? NoexceptExpr.get() : 0, - ExceptionSpecTokens, Tracker.getOpenLocation(), EndLoc, D, TrailingReturnType), diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp index b9b51d7..5e6c4f5 100644 --- a/lib/Parse/ParseDeclCXX.cpp +++ b/lib/Parse/ParseDeclCXX.cpp @@ -1535,34 +1535,16 @@ AccessSpecifier Parser::getAccessSpecifierIfPresent() const { } /// \brief If the given declarator has any parts for which parsing has to be -/// delayed, e.g., default arguments or an exception-specification, create a -/// late-parsed method declaration record to handle the parsing at the end of -/// the class definition. +/// delayed, e.g., default arguments, create a late-parsed method declaration +/// record to handle the parsing at the end of the class definition. void Parser::HandleMemberFunctionDeclDelays(Declarator& DeclaratorInfo, Decl *ThisDecl) { // We just declared a member function. If this member function - // has any default arguments or an exception-specification, we'll need to - // parse them later. + // has any default arguments, we'll need to parse them later. LateParsedMethodDeclaration *LateMethod = 0; DeclaratorChunk::FunctionTypeInfo &FTI = DeclaratorInfo.getFunctionTypeInfo(); - - // If there was a delayed exception-specification, hold onto its tokens. - if (FTI.getExceptionSpecType() == EST_Delayed) { - // Push this method onto the stack of late-parsed method - // declarations. - LateMethod = new LateParsedMethodDeclaration(this, ThisDecl); - getCurrentClass().LateParsedDeclarations.push_back(LateMethod); - LateMethod->TemplateScope = getCurScope()->isTemplateParamScope(); - - // Stash the exception-specification tokens in the late-pased mthod. - LateMethod->ExceptionSpecTokens = FTI.ExceptionSpecTokens; - FTI.ExceptionSpecTokens = 0; - // Reserve space for the parameters. - LateMethod->DefaultArgs.reserve(FTI.NumArgs); - } - for (unsigned ParamIdx = 0; ParamIdx < FTI.NumArgs; ++ParamIdx) { if (LateMethod || FTI.ArgInfo[ParamIdx].DefaultArgTokens) { if (!LateMethod) { @@ -1846,7 +1828,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, // Parse the first declarator. ParseDeclarator(DeclaratorInfo); - // Error parsin g the declarator? + // Error parsing the declarator? if (!DeclaratorInfo.hasName()) { // If so, skip until the semi-colon or a }. SkipUntil(tok::r_brace, true, true); @@ -2065,7 +2047,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, DeclsInGroup.push_back(ThisDecl); } - if (DeclaratorInfo.isFunctionDeclarator() && + if (ThisDecl && DeclaratorInfo.isFunctionDeclarator() && DeclaratorInfo.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef) { HandleMemberFunctionDeclDelays(DeclaratorInfo, ThisDecl); @@ -2358,7 +2340,7 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc, // C++11 [class.mem]p2: // Within the class member-specification, the class is regarded as complete - // within function bodies, default arguments, exception-specifications, and + // within function bodies, default arguments, and // brace-or-equal-initializers for non-static data members (including such // things in nested classes). if (TagDecl && NonNestedClass) { @@ -2369,6 +2351,10 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc, SourceLocation SavedPrevTokLocation = PrevTokLocation; ParseLexedAttributes(getCurrentClass()); ParseLexedMethodDeclarations(getCurrentClass()); + + // We've finished with all pending member declarations. + Actions.ActOnFinishCXXMemberDecls(); + ParseLexedMemberInitializers(getCurrentClass()); ParseLexedMethodDefs(getCurrentClass()); PrevTokLocation = SavedPrevTokLocation; @@ -2555,63 +2541,13 @@ Parser::MemInitResult Parser::ParseMemInitializer(Decl *ConstructorDecl) { /// 'noexcept' /// 'noexcept' '(' constant-expression ')' ExceptionSpecificationType -Parser::tryParseExceptionSpecification(bool Delayed, +Parser::tryParseExceptionSpecification( SourceRange &SpecificationRange, SmallVectorImpl &DynamicExceptions, SmallVectorImpl &DynamicExceptionRanges, - ExprResult &NoexceptExpr, - CachedTokens *&ExceptionSpecTokens) { + ExprResult &NoexceptExpr) { ExceptionSpecificationType Result = EST_None; - ExceptionSpecTokens = 0; - - // Handle delayed parsing of exception-specifications. - if (Delayed) { - if (Tok.isNot(tok::kw_throw) && Tok.isNot(tok::kw_noexcept)) - return EST_None; - - // Consume and cache the starting token. - bool IsNoexcept = Tok.is(tok::kw_noexcept); - Token StartTok = Tok; - SpecificationRange = SourceRange(ConsumeToken()); - - // Check for a '('. - if (!Tok.is(tok::l_paren)) { - // If this is a bare 'noexcept', we're done. - if (IsNoexcept) { - Diag(Tok, diag::warn_cxx98_compat_noexcept_decl); - NoexceptExpr = 0; - return EST_BasicNoexcept; - } - - Diag(Tok, diag::err_expected_lparen_after) << "throw"; - return EST_DynamicNone; - } - - // Cache the tokens for the exception-specification. - ExceptionSpecTokens = new CachedTokens; - ExceptionSpecTokens->push_back(StartTok); // 'throw' or 'noexcept' - ExceptionSpecTokens->push_back(Tok); // '(' - SpecificationRange.setEnd(ConsumeParen()); // '(' - - if (!ConsumeAndStoreUntil(tok::r_paren, *ExceptionSpecTokens, - /*StopAtSemi=*/true, - /*ConsumeFinalToken=*/true)) { - NoexceptExpr = 0; - delete ExceptionSpecTokens; - ExceptionSpecTokens = 0; - return IsNoexcept? EST_BasicNoexcept : EST_DynamicNone; - } - SpecificationRange.setEnd(Tok.getLocation()); - - // Add the 'stop' token. - Token End; - End.startToken(); - End.setKind(tok::cxx_exceptspec_end); - End.setLocation(Tok.getLocation()); - ExceptionSpecTokens->push_back(End); - return EST_Delayed; - } - + // See if there's a dynamic specification. if (Tok.is(tok::kw_throw)) { Result = ParseDynamicExceptionSpecification(SpecificationRange, diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index b6a027b..6d31396 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -2392,7 +2392,7 @@ ExprResult Parser::ParseBlockLiteralExpression() { SourceLocation(), EST_None, SourceLocation(), - 0, 0, 0, 0, 0, + 0, 0, 0, 0, CaretLoc, CaretLoc, ParamInfo), attrs, CaretLoc); diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp index ae6ad0b..7152184 100644 --- a/lib/Parse/ParseExprCXX.cpp +++ b/lib/Parse/ParseExprCXX.cpp @@ -780,13 +780,10 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( llvm::SmallVector DynamicExceptions; llvm::SmallVector DynamicExceptionRanges; ExprResult NoexceptExpr; - CachedTokens *ExceptionSpecTokens; - ESpecType = tryParseExceptionSpecification(/*Delayed=*/false, - ESpecRange, + ESpecType = tryParseExceptionSpecification(ESpecRange, DynamicExceptions, DynamicExceptionRanges, - NoexceptExpr, - ExceptionSpecTokens); + NoexceptExpr); if (ESpecType != EST_None) DeclEndLoc = ESpecRange.getEnd(); @@ -821,7 +818,6 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( DynamicExceptions.size(), NoexceptExpr.isUsable() ? NoexceptExpr.get() : 0, - 0, DeclLoc, DeclEndLoc, D, TrailingReturnType), Attr, DeclEndLoc); @@ -867,7 +863,6 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( /*ExceptionRanges=*/0, /*NumExceptions=*/0, /*NoexceptExpr=*/0, - /*ExceptionSpecTokens=*/0, DeclLoc, DeclEndLoc, D, TrailingReturnType), Attr, DeclEndLoc); diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp index fe63e35..b531acc 100644 --- a/lib/Sema/DeclSpec.cpp +++ b/lib/Sema/DeclSpec.cpp @@ -162,7 +162,6 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, SourceRange *ExceptionRanges, unsigned NumExceptions, Expr *NoexceptExpr, - CachedTokens *ExceptionSpecTokens, SourceLocation LocalRangeBegin, SourceLocation LocalRangeEnd, Declarator &TheDeclarator, @@ -227,10 +226,6 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, case EST_ComputedNoexcept: I.Fun.NoexceptExpr = NoexceptExpr; break; - - case EST_Delayed: - I.Fun.ExceptionSpecTokens = ExceptionSpecTokens; - break; } return I; } diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp index 1550993..1227e92 100644 --- a/lib/Sema/SemaDecl.cpp +++ b/lib/Sema/SemaDecl.cpp @@ -7635,7 +7635,7 @@ NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc, SourceLocation(), SourceLocation(), SourceLocation(), EST_None, SourceLocation(), - 0, 0, 0, 0, 0, Loc, Loc, D), + 0, 0, 0, 0, Loc, Loc, D), DS.getAttributes(), SourceLocation()); D.SetIdentifier(&II, Loc); @@ -9784,21 +9784,6 @@ void Sema::ActOnFields(Scope* S, if (!Completed) Record->completeDefinition(); - // Now that the record is complete, do any delayed exception spec checks - // we were missing. - while (!DelayedDestructorExceptionSpecChecks.empty()) { - const CXXDestructorDecl *Dtor = - DelayedDestructorExceptionSpecChecks.back().first; - if (Dtor->getParent() != Record) - break; - - assert(!Dtor->getParent()->isDependentType() && - "Should not ever add destructors of templates into the list."); - CheckOverridingFunctionExceptionSpec(Dtor, - DelayedDestructorExceptionSpecChecks.back().second); - DelayedDestructorExceptionSpecChecks.pop_back(); - } - } else { ObjCIvarDecl **ClsFields = reinterpret_cast(RecFields.data()); diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index 1d251b9..c861072 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -7319,15 +7319,42 @@ void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation, } } +/// \brief Perform any semantic analysis which needs to be delayed until all +/// pending class member declarations have been parsed. +void Sema::ActOnFinishCXXMemberDecls() { + // Now we have parsed all exception specifications, determine the implicit + // exception specifications for destructors. + for (unsigned i = 0, e = DelayedDestructorExceptionSpecs.size(); + i != e; ++i) { + CXXDestructorDecl *Dtor = DelayedDestructorExceptionSpecs[i]; + AdjustDestructorExceptionSpec(Dtor->getParent(), Dtor, true); + } + DelayedDestructorExceptionSpecs.clear(); + + // Perform any deferred checking of exception specifications for virtual + // destructors. + for (unsigned i = 0, e = DelayedDestructorExceptionSpecChecks.size(); + i != e; ++i) { + const CXXDestructorDecl *Dtor = + DelayedDestructorExceptionSpecChecks[i].first; + assert(!Dtor->getParent()->isDependentType() && + "Should not ever add destructors of templates into the list."); + CheckOverridingFunctionExceptionSpec(Dtor, + DelayedDestructorExceptionSpecChecks[i].second); + } + DelayedDestructorExceptionSpecChecks.clear(); +} + void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *classDecl, - CXXDestructorDecl *destructor) { + CXXDestructorDecl *destructor, + bool WasDelayed) { // C++11 [class.dtor]p3: // A declaration of a destructor that does not have an exception- // specification is implicitly considered to have the same exception- // specification as an implicit declaration. const FunctionProtoType *dtorType = destructor->getType()-> getAs(); - if (dtorType->hasExceptionSpec()) + if (!WasDelayed && dtorType->hasExceptionSpec()) return; ImplicitExceptionSpecification exceptSpec = @@ -7344,6 +7371,14 @@ void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *classDecl, destructor->setType(ty); + // If we can't compute the exception specification for this destructor yet + // (because it depends on an exception specification which we have not parsed + // yet), make a note that we need to try again when the class is complete. + if (epi.ExceptionSpecType == EST_Delayed) { + assert(!WasDelayed && "couldn't compute destructor exception spec"); + DelayedDestructorExceptionSpecs.push_back(destructor); + } + // FIXME: If the destructor has a body that could throw, and the newly created // spec doesn't allow exceptions, we should emit a warning, because this // change in behavior can break conforming C++03 programs at runtime. @@ -7579,8 +7614,9 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( assert(!Base->getType()->isDependentType() && "Cannot generate implicit members for class with dependent bases."); CXXRecordDecl *BaseClassDecl = Base->getType()->getAsCXXRecordDecl(); - LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, false, 0, - &HasConstCopyAssignment); + HasConstCopyAssignment &= + (bool)LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, + false, 0); } // In C++11, the above citation has "or virtual" added @@ -7591,8 +7627,9 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( assert(!Base->getType()->isDependentType() && "Cannot generate implicit members for class with dependent bases."); CXXRecordDecl *BaseClassDecl = Base->getType()->getAsCXXRecordDecl(); - LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, false, 0, - &HasConstCopyAssignment); + HasConstCopyAssignment &= + (bool)LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, + false, 0); } } @@ -7606,8 +7643,9 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( ++Field) { QualType FieldType = Context.getBaseElementType((*Field)->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { - LookupCopyingAssignment(FieldClassDecl, Qualifiers::Const, false, 0, - &HasConstCopyAssignment); + HasConstCopyAssignment &= + (bool)LookupCopyingAssignment(FieldClassDecl, Qualifiers::Const, + false, 0); } } @@ -8610,8 +8648,8 @@ Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); - LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const, - &HasConstCopyConstructor); + HasConstCopyConstructor &= + (bool)LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const); } for (CXXRecordDecl::base_class_iterator Base = ClassDecl->vbases_begin(), @@ -8620,8 +8658,8 @@ Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { ++Base) { CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); - LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const, - &HasConstCopyConstructor); + HasConstCopyConstructor &= + (bool)LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const); } // -- for all the nonstatic data members of X that are of a @@ -8634,8 +8672,8 @@ Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { ++Field) { QualType FieldType = Context.getBaseElementType((*Field)->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { - LookupCopyingConstructor(FieldClassDecl, Qualifiers::Const, - &HasConstCopyConstructor); + HasConstCopyConstructor &= + (bool)LookupCopyingConstructor(FieldClassDecl, Qualifiers::Const); } } // Otherwise, the implicitly declared copy constructor will have @@ -11260,66 +11298,6 @@ Sema::checkExceptionSpecification(ExceptionSpecificationType EST, } } -void Sema::actOnDelayedExceptionSpecification(Decl *MethodD, - ExceptionSpecificationType EST, - SourceRange SpecificationRange, - ArrayRef DynamicExceptions, - ArrayRef DynamicExceptionRanges, - Expr *NoexceptExpr) { - if (!MethodD) - return; - - // Dig out the method we're referring to. - CXXMethodDecl *Method = 0; - if (FunctionTemplateDecl *FunTmpl = dyn_cast(MethodD)) - Method = dyn_cast(FunTmpl->getTemplatedDecl()); - else - Method = dyn_cast(MethodD); - - if (!Method) - return; - - // Dig out the prototype. This should never fail. - const FunctionProtoType *Proto - = dyn_cast(Method->getType()); - if (!Proto) - return; - - // Check the exception specification. - llvm::SmallVector Exceptions; - FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo(); - checkExceptionSpecification(EST, DynamicExceptions, DynamicExceptionRanges, - NoexceptExpr, Exceptions, EPI); - - // Rebuild the function type. - QualType T = Context.getFunctionType(Proto->getResultType(), - Proto->arg_type_begin(), - Proto->getNumArgs(), - EPI); - if (TypeSourceInfo *TSInfo = Method->getTypeSourceInfo()) { - // FIXME: When we get proper type location information for exceptions, - // we'll also have to rebuild the TypeSourceInfo. For now, we just patch - // up the TypeSourceInfo; - assert(TypeLoc::getFullDataSizeForType(T) - == TypeLoc::getFullDataSizeForType(Method->getType()) && - "TypeLoc size mismatch with delayed exception specification"); - TSInfo->overrideType(T); - } - - Method->setType(T); - - if (Method->isStatic()) - checkThisInStaticMemberFunctionExceptionSpec(Method); - - if (Method->isVirtual()) { - // Check overrides, which we previously had to delay. - for (CXXMethodDecl::method_iterator O = Method->begin_overridden_methods(), - OEnd = Method->end_overridden_methods(); - O != OEnd; ++O) - CheckOverridingFunctionExceptionSpec(Method, *O); - } -} - /// IdentifyCUDATarget - Determine the CUDA compilation target for this function Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { // Implicitly declared functions (e.g. copy constructors) are diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index f003bdd..9f5138b 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -2277,7 +2277,7 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, Result->setMethod(DD); Result->setKind(DD->isDeleted() ? SpecialMemberOverloadResult::NoMemberOrDeleted : - SpecialMemberOverloadResult::SuccessNonConst); + SpecialMemberOverloadResult::Success); return Result; } @@ -2288,6 +2288,9 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, Expr *Arg = 0; unsigned NumArgs; + QualType ArgType = CanTy; + ExprValueKind VK = VK_LValue; + if (SM == CXXDefaultConstructor) { Name = Context.DeclarationNames.getCXXConstructorName(CanTy); NumArgs = 0; @@ -2308,7 +2311,6 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, DeclareImplicitMoveAssignment(RD); } - QualType ArgType = CanTy; if (ConstArg) ArgType.addConst(); if (VolatileArg) @@ -2321,14 +2323,17 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, // Possibly an XValue is actually correct in the case of move, but // there is no semantic difference for class types in this restricted // case. - ExprValueKind VK; if (SM == CXXCopyConstructor || SM == CXXCopyAssignment) VK = VK_LValue; else VK = VK_RValue; + } + OpaqueValueExpr FakeArg(SourceLocation(), ArgType, VK); + + if (SM != CXXDefaultConstructor) { NumArgs = 1; - Arg = new (Context) OpaqueValueExpr(SourceLocation(), ArgType, VK); + Arg = &FakeArg; } // Create the object argument @@ -2338,17 +2343,14 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, if (VolatileThis) ThisTy.addVolatile(); Expr::Classification Classification = - (new (Context) OpaqueValueExpr(SourceLocation(), ThisTy, - RValueThis ? VK_RValue : VK_LValue))-> - Classify(Context); + OpaqueValueExpr(SourceLocation(), ThisTy, + RValueThis ? VK_RValue : VK_LValue).Classify(Context); // Now we perform lookup on the name we computed earlier and do overload // resolution. Lookup is only performed directly into the class since there // will always be a (possibly implicit) declaration to shadow any others. OverloadCandidateSet OCS((SourceLocation())); DeclContext::lookup_iterator I, E; - SpecialMemberOverloadResult::Kind SuccessKind = - SpecialMemberOverloadResult::SuccessNonConst; llvm::tie(I, E) = RD->lookup(Name); assert((I != E) && @@ -2378,17 +2380,6 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, else AddOverloadCandidate(M, DeclAccessPair::make(M, AS_public), llvm::makeArrayRef(&Arg, NumArgs), OCS, true); - - // Here we're looking for a const parameter to speed up creation of - // implicit copy methods. - if ((SM == CXXCopyAssignment && M->isCopyAssignmentOperator()) || - (SM == CXXCopyConstructor && - cast(M)->isCopyConstructor())) { - QualType ArgType = M->getType()->getAs()->getArgType(0); - if (!ArgType->isReferenceType() || - ArgType->getPointeeType().isConstQualified()) - SuccessKind = SpecialMemberOverloadResult::SuccessConst; - } } else if (FunctionTemplateDecl *Tmpl = dyn_cast(Cand)) { if (SM == CXXCopyAssignment || SM == CXXMoveAssignment) @@ -2409,7 +2400,7 @@ Sema::SpecialMemberOverloadResult *Sema::LookupSpecialMember(CXXRecordDecl *RD, switch (OCS.BestViableFunction(*this, SourceLocation(), Best)) { case OR_Success: Result->setMethod(cast(Best->Function)); - Result->setKind(SuccessKind); + Result->setKind(SpecialMemberOverloadResult::Success); break; case OR_Deleted: @@ -2442,17 +2433,13 @@ CXXConstructorDecl *Sema::LookupDefaultConstructor(CXXRecordDecl *Class) { /// \brief Look up the copying constructor for the given class. CXXConstructorDecl *Sema::LookupCopyingConstructor(CXXRecordDecl *Class, - unsigned Quals, - bool *ConstParamMatch) { + unsigned Quals) { assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy ctor arg"); SpecialMemberOverloadResult *Result = LookupSpecialMember(Class, CXXCopyConstructor, Quals & Qualifiers::Const, Quals & Qualifiers::Volatile, false, false, false); - if (ConstParamMatch) - *ConstParamMatch = Result->hasConstParamMatch(); - return cast_or_null(Result->getMethod()); } @@ -2485,8 +2472,7 @@ DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) { /// \brief Look up the copying assignment operator for the given class. CXXMethodDecl *Sema::LookupCopyingAssignment(CXXRecordDecl *Class, unsigned Quals, bool RValueThis, - unsigned ThisQuals, - bool *ConstParamMatch) { + unsigned ThisQuals) { assert(!(Quals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy assignment arg"); assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) && @@ -2497,9 +2483,6 @@ CXXMethodDecl *Sema::LookupCopyingAssignment(CXXRecordDecl *Class, ThisQuals & Qualifiers::Const, ThisQuals & Qualifiers::Volatile); - if (ConstParamMatch) - *ConstParamMatch = Result->hasConstParamMatch(); - return Result->getMethod(); } diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp index d0906de..1400e7e 100644 --- a/lib/Sema/SemaType.cpp +++ b/lib/Sema/SemaType.cpp @@ -561,7 +561,7 @@ static void maybeSynthesizeBlockSignature(TypeProcessingState &state, /*const qualifier*/SourceLocation(), /*volatile qualifier*/SourceLocation(), /*mutable qualifier*/SourceLocation(), - /*EH*/ EST_None, SourceLocation(), 0, 0, 0, 0, 0, + /*EH*/ EST_None, SourceLocation(), 0, 0, 0, 0, /*parens*/ loc, loc, declarator)); @@ -4195,7 +4195,8 @@ bool Sema::RequireCompleteType(SourceLocation Loc, QualType T, // class template specialization, or an array with known size of such, // try to instantiate it. QualType MaybeTemplate = T; - if (const ConstantArrayType *Array = Context.getAsConstantArrayType(T)) + while (const ConstantArrayType *Array + = Context.getAsConstantArrayType(MaybeTemplate)) MaybeTemplate = Array->getElementType(); if (const RecordType *Record = MaybeTemplate->getAs()) { if (ClassTemplateSpecializationDecl *ClassTemplateSpec diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 06b42f3..fd0c171 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -3866,6 +3866,9 @@ QualType ASTReader::readTypeRecord(unsigned Index) { EPI.Exceptions = Exceptions.data(); } else if (EST == EST_ComputedNoexcept) { EPI.NoexceptExpr = ReadExpr(*Loc.F); + } else if (EST == EST_Uninstantiated) { + EPI.ExceptionSpecDecl = ReadDeclAs(*Loc.F, Record, Idx); + EPI.ExceptionSpecTemplate = ReadDeclAs(*Loc.F, Record, Idx); } return Context.getFunctionType(ResultType, ParamTypes.data(), NumParams, EPI); diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 81c0a9d..36933a9 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -195,6 +195,9 @@ void ASTTypeWriter::VisitFunctionProtoType(const FunctionProtoType *T) { Writer.AddTypeRef(T->getExceptionType(I), Record); } else if (T->getExceptionSpecType() == EST_ComputedNoexcept) { Writer.AddStmt(T->getNoexceptExpr()); + } else if (T->getExceptionSpecType() == EST_Uninstantiated) { + Writer.AddDeclRef(T->getExceptionSpecDecl(), Record); + Writer.AddDeclRef(T->getExceptionSpecTemplate(), Record); } Code = TYPE_FUNCTION_PROTO; } -- cgit v1.1 From 2b2816e083a455f7a656ae88b0fd059d1688bb36 Mon Sep 17 00:00:00 2001 From: dim Date: Wed, 23 May 2012 21:37:39 +0000 Subject: Vendor import of llvm release_31 r156863 (the actual 3.1 release): http://llvm.org/svn/llvm-project/llvm/branches/release_31@156863 --- lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt | 2 +- lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt index 80d2273..9c06fda 100644 --- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt +++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt @@ -18,6 +18,6 @@ [common] [component_0] -type = Library +type = OptionalLibrary name = IntelJITEvents parent = ExecutionEngine diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt index 4516dfa..e30516e 100644 --- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt @@ -18,6 +18,6 @@ [common] [component_0] -type = Library +type = OptionalLibrary name = OProfileJIT parent = ExecutionEngine -- cgit v1.1 From bb67ca86b31f67faee50bd10c3b036d65751745a Mon Sep 17 00:00:00 2001 From: dim Date: Wed, 23 May 2012 21:39:13 +0000 Subject: Vendor import of clang release_31 r156863 (the actual 3.1 release): http://llvm.org/svn/llvm-project/cfe/branches/release_31@156863 --- lib/Basic/Version.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Basic/Version.cpp b/lib/Basic/Version.cpp index cd16b4b..8cb2386 100644 --- a/lib/Basic/Version.cpp +++ b/lib/Basic/Version.cpp @@ -32,7 +32,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/tags/RELEASE_31/final/lib/Basic/Version.cpp $"); + static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/branches/release_31/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); -- cgit v1.1 From 14199a8fd5efcdf5ce7f371c2e51e9ec66c55c84 Mon Sep 17 00:00:00 2001 From: pfg Date: Fri, 6 Jul 2012 16:43:56 +0000 Subject: Fix issue resizing bin/sh This partially reverts some changes from r237448 that are causing breakage when resizing under bin/sh . Reverted changes from NetBSD are: Mar 10 20:46:15 2009 - editline.3 read.c make el_gets set the count to -1 on error to distinguish between EOF and error. Feb 19 15:20:22 2009 - read.c sig.c sig.h reset and redraw on sigcont. From Anon Ymous. Feb 15 21:24:13 2009 don't restart on EINTR, instead return NULL immediately. From Anon Ymous PR: 169603 Reported by: Peter Jeremy, David Shao MFC after: 3 days --- lib/libedit/editline.3 | 5 ---- lib/libedit/el.h | 1 - lib/libedit/read.c | 75 ++++++++++++++------------------------------------ lib/libedit/sig.c | 6 ++-- lib/libedit/sig.h | 3 +- 5 files changed, 24 insertions(+), 66 deletions(-) (limited to 'lib') diff --git a/lib/libedit/editline.3 b/lib/libedit/editline.3 index fe58321..dd4c874 100644 --- a/lib/libedit/editline.3 +++ b/lib/libedit/editline.3 @@ -162,11 +162,6 @@ is modified to contain the number of characters read. Returns the line read if successful, or .Dv NULL if no characters were read or if an error occurred. -If an error occurred, -.Fa count -is set to \-1 and -.Dv errno -contains the error code that caused it. The return value may not remain valid across calls to .Fn el_gets and must be copied if the data is to be retained. diff --git a/lib/libedit/el.h b/lib/libedit/el.h index 67d01ff..8d0d152 100644 --- a/lib/libedit/el.h +++ b/lib/libedit/el.h @@ -115,7 +115,6 @@ struct editline { FILE *el_errfile; /* Stdio stuff */ int el_infd; /* Input file descriptor */ int el_flags; /* Various flags. */ - int el_errno; /* Local copy of errno */ coord_t el_cursor; /* Cursor location */ char **el_display; /* Real screen image = what is there */ char **el_vdisplay; /* Virtual screen image = what we see */ diff --git a/lib/libedit/read.c b/lib/libedit/read.c index ed2b609..ea40e50 100644 --- a/lib/libedit/read.c +++ b/lib/libedit/read.c @@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include "el.h" -#define OKCMD -1 /* must be -1! */ +#define OKCMD -1 private int read__fixio(int, int); private int read_preread(EditLine *); @@ -170,7 +170,7 @@ read__fixio(int fd __unused, int e) return (e ? 0 : -1); case EINTR: - return (-1); + return (0); default: return (-1); @@ -235,12 +235,9 @@ read_getcmd(EditLine *el, el_action_t *cmdnum, char *ch) el_action_t cmd; int num; - el->el_errno = 0; do { - if ((num = el_getc(el, ch)) != 1) { /* if EOF or error */ - el->el_errno = num == 0 ? 0 : errno; + if ((num = el_getc(el, ch)) != 1) /* if EOF or error */ return (num); - } #ifdef KANJI if ((*ch & 0200)) { @@ -292,21 +289,14 @@ read_char(EditLine *el, char *cp) ssize_t num_read; int tried = 0; - again: - el->el_signal->sig_no = 0; - while ((num_read = read(el->el_infd, cp, 1)) == -1) { - if (el->el_signal->sig_no == SIGCONT) { - sig_set(el); - el_set(el, EL_REFRESH); - goto again; - } + while ((num_read = read(el->el_infd, cp, 1)) == -1) if (!tried && read__fixio(el->el_infd, errno) == 0) tried = 1; else { *cp = '\0'; return (-1); } - } + return (int)num_read; } @@ -413,20 +403,17 @@ el_gets(EditLine *el, int *nread) int num; /* how many chars we have read at NL */ char ch; int crlf = 0; - int nrb; #ifdef FIONREAD c_macro_t *ma = &el->el_chared.c_macro; #endif /* FIONREAD */ - if (nread == NULL) - nread = &nrb; *nread = 0; if (el->el_flags & NO_TTY) { char *cp = el->el_line.buffer; size_t idx; - while ((num = (*el->el_read.read_char)(el, cp)) == 1) { + while ((*el->el_read.read_char)(el, cp) == 1) { /* make sure there is space for next character */ if (cp + 1 >= el->el_line.limit) { idx = (cp - el->el_line.buffer); @@ -440,16 +427,12 @@ el_gets(EditLine *el, int *nread) if (cp[-1] == '\r' || cp[-1] == '\n') break; } - if (num == -1) { - if (errno == EINTR) - cp = el->el_line.buffer; - el->el_errno = errno; - } el->el_line.cursor = el->el_line.lastchar = cp; *cp = '\0'; - *nread = (int)(el->el_line.cursor - el->el_line.buffer); - goto done; + if (nread) + *nread = (int)(el->el_line.cursor - el->el_line.buffer); + return (*nread ? el->el_line.buffer : NULL); } @@ -460,8 +443,8 @@ el_gets(EditLine *el, int *nread) (void) ioctl(el->el_infd, FIONREAD, (ioctl_t) & chrs); if (chrs == 0) { if (tty_rawmode(el) < 0) { - errno = 0; - *nread = 0; + if (nread) + *nread = 0; return (NULL); } } @@ -474,7 +457,6 @@ el_gets(EditLine *el, int *nread) if (el->el_flags & EDIT_DISABLED) { char *cp; size_t idx; - if ((el->el_flags & UNBUFFERED) == 0) cp = el->el_line.buffer; else @@ -482,7 +464,7 @@ el_gets(EditLine *el, int *nread) term__flush(el); - while ((num = (*el->el_read.read_char)(el, cp)) == 1) { + while ((*el->el_read.read_char)(el, cp) == 1) { /* make sure there is space next character */ if (cp + 1 >= el->el_line.limit) { idx = (cp - el->el_line.buffer); @@ -498,15 +480,11 @@ el_gets(EditLine *el, int *nread) break; } - if (num == -1) { - if (errno == EINTR) - cp = el->el_line.buffer; - el->el_errno = errno; - } - el->el_line.cursor = el->el_line.lastchar = cp; *cp = '\0'; - goto done; + if (nread) + *nread = (int)(el->el_line.cursor - el->el_line.buffer); + return (*nread ? el->el_line.buffer : NULL); } for (num = OKCMD; num == OKCMD;) { /* while still editing this @@ -522,12 +500,6 @@ el_gets(EditLine *el, int *nread) #endif /* DEBUG_READ */ break; } - if (el->el_errno == EINTR) { - el->el_line.buffer[0] = '\0'; - el->el_line.lastchar = - el->el_line.cursor = el->el_line.buffer; - break; - } if ((unsigned int)cmdnum >= (unsigned int)el->el_map.nfunc) { /* BUG CHECK command */ #ifdef DEBUG_EDIT (void) fprintf(el->el_errfile, @@ -645,17 +617,12 @@ el_gets(EditLine *el, int *nread) /* make sure the tty is set up correctly */ if ((el->el_flags & UNBUFFERED) == 0) { read_finish(el); - *nread = num != -1 ? num : 0; + if (nread) + *nread = num; } else { - *nread = (int)(el->el_line.lastchar - el->el_line.buffer); + if (nread) + *nread = + (int)(el->el_line.lastchar - el->el_line.buffer); } -done: - if (*nread == 0) { - if (num == -1) { - *nread = -1; - errno = el->el_errno; - } - return NULL; - } else - return el->el_line.buffer; + return (num ? el->el_line.buffer : NULL); } diff --git a/lib/libedit/sig.c b/lib/libedit/sig.c index e48dbef..218a32e 100644 --- a/lib/libedit/sig.c +++ b/lib/libedit/sig.c @@ -29,7 +29,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $NetBSD: sig.c,v 1.15 2009/02/19 15:20:22 christos Exp $ + * $NetBSD: sig.c,v 1.14 2009/02/18 15:04:40 christos Exp $ */ #if !defined(lint) && !defined(SCCSID) @@ -73,8 +73,6 @@ sig_handler(int signo) (void) sigaddset(&nset, signo); (void) sigprocmask(SIG_BLOCK, &nset, &oset); - sel->el_signal->sig_no = signo; - switch (signo) { case SIGCONT: tty_rawmode(sel); @@ -160,12 +158,12 @@ sig_set(EditLine *el) struct sigaction osa, nsa; nsa.sa_handler = sig_handler; - nsa.sa_flags = 0; sigemptyset(&nsa.sa_mask); (void) sigprocmask(SIG_BLOCK, &el->el_signal->sig_set, &oset); for (i = 0; sighdl[i] != -1; i++) { + nsa.sa_flags = SIGINT ? 0 : SA_RESTART; /* This could happen if we get interrupted */ if (sigaction(sighdl[i], &nsa, &osa) != -1 && osa.sa_handler != sig_handler) diff --git a/lib/libedit/sig.h b/lib/libedit/sig.h index 6c1260d..8773ddc 100644 --- a/lib/libedit/sig.h +++ b/lib/libedit/sig.h @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * @(#)sig.h 8.1 (Berkeley) 6/4/93 - * $NetBSD: sig.h,v 1.5 2003/08/07 16:44:33 agc Exp $ + * $NetBSD: sig.h,v 1.7 2009/02/15 21:25:01 christos Exp $ * $FreeBSD$ */ @@ -61,7 +61,6 @@ typedef struct { struct sigaction sig_action[ALLSIGSNO]; sigset_t sig_set; - volatile sig_atomic_t sig_no; } *el_signal_t; protected void sig_end(EditLine*); -- cgit v1.1 From b4995ae7cbe2904f2627a580e63b1528e34bdfe8 Mon Sep 17 00:00:00 2001 From: pfg Date: Fri, 6 Jul 2012 19:30:50 +0000 Subject: Merge a small update from NetBSD. Feb 15 21:55:23 2009 - chared.c chared.h pass lint on _LP64. MFC after: 1 week --- lib/libedit/chared.c | 33 +++++++++++++++------------------ lib/libedit/chared.h | 4 ++-- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/libedit/chared.c b/lib/libedit/chared.c index ffaaaa7..6a4f3f6 100644 --- a/lib/libedit/chared.c +++ b/lib/libedit/chared.c @@ -29,7 +29,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $NetBSD: chared.c,v 1.25 2005/08/08 01:41:30 christos Exp $ + * $NetBSD: chared.c,v 1.27 2009/02/15 21:55:23 christos Exp $ */ #if !defined(lint) && !defined(SCCSID) @@ -59,12 +59,12 @@ cv_undo(EditLine *el) { c_undo_t *vu = &el->el_chared.c_undo; c_redo_t *r = &el->el_chared.c_redo; - unsigned int size; + size_t size; /* Save entire line for undo */ size = el->el_line.lastchar - el->el_line.buffer; vu->len = size; - vu->cursor = el->el_line.cursor - el->el_line.buffer; + vu->cursor = (int)(el->el_line.cursor - el->el_line.buffer); memcpy(vu->buf, el->el_line.buffer, size); /* save command info for redo */ @@ -83,7 +83,7 @@ cv_yank(EditLine *el, const char *ptr, int size) { c_kill_t *k = &el->el_chared.c_kill; - memcpy(k->buf, ptr, size +0u); + memcpy(k->buf, ptr, (size_t)size); k->last = k->buf + size; } @@ -97,7 +97,7 @@ c_insert(EditLine *el, int num) char *cp; if (el->el_line.lastchar + num >= el->el_line.limit) { - if (!ch_enlargebufs(el, num +0u)) + if (!ch_enlargebufs(el, (size_t)num)) return; /* can't go past end of buffer */ } @@ -118,7 +118,7 @@ c_delafter(EditLine *el, int num) { if (el->el_line.cursor + num > el->el_line.lastchar) - num = el->el_line.lastchar - el->el_line.cursor; + num = (int)(el->el_line.lastchar - el->el_line.cursor); if (el->el_map.current != el->el_map.emacs) { cv_undo(el); @@ -159,7 +159,7 @@ c_delbefore(EditLine *el, int num) { if (el->el_line.cursor - num < el->el_line.buffer) - num = el->el_line.cursor - el->el_line.buffer; + num = (int)(el->el_line.cursor - el->el_line.buffer); if (el->el_map.current != el->el_map.emacs) { cv_undo(el); @@ -375,7 +375,7 @@ cv_delfini(EditLine *el) /* sanity */ return; - size = el->el_line.cursor - el->el_chared.c_vcmd.pos; + size = (int)(el->el_line.cursor - el->el_chared.c_vcmd.pos); if (size == 0) size = 1; el->el_line.cursor = el->el_chared.c_vcmd.pos; @@ -529,8 +529,7 @@ ch_reset(EditLine *el, int mclear) } private void -ch__clearmacro(el) - EditLine *el; +ch__clearmacro(EditLine *el) { c_macro_t *ma = &el->el_chared.c_macro; while (ma->level >= 0) @@ -542,9 +541,7 @@ ch__clearmacro(el) * Returns 1 if successful, 0 if not. */ protected int -ch_enlargebufs(el, addlen) - EditLine *el; - size_t addlen; +ch_enlargebufs(EditLine *el, size_t addlen) { size_t sz, newsz; char *newbuffer, *oldbuf, *oldkbuf; @@ -695,12 +692,12 @@ protected int c_gets(EditLine *el, char *buf, const char *prompt) { char ch; - int len; + ssize_t len; char *cp = el->el_line.buffer; if (prompt) { len = strlen(prompt); - memcpy(cp, prompt, len + 0u); + memcpy(cp, prompt, (size_t)len); cp += len; } len = 0; @@ -721,7 +718,7 @@ c_gets(EditLine *el, char *buf, const char *prompt) case '\010': /* Delete and backspace */ case '\177': - if (len <= 0) { + if (len == 0) { len = -1; break; } @@ -749,7 +746,7 @@ c_gets(EditLine *el, char *buf, const char *prompt) el->el_line.buffer[0] = '\0'; el->el_line.lastchar = el->el_line.buffer; el->el_line.cursor = el->el_line.buffer; - return len; + return (int)len; } @@ -771,6 +768,6 @@ c_hpos(EditLine *el) ptr >= el->el_line.buffer && *ptr != '\n'; ptr--) continue; - return (el->el_line.cursor - ptr - 1); + return (int)(el->el_line.cursor - ptr - 1); } } diff --git a/lib/libedit/chared.h b/lib/libedit/chared.h index 6636fc7..153fceb 100644 --- a/lib/libedit/chared.h +++ b/lib/libedit/chared.h @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * @(#)chared.h 8.1 (Berkeley) 6/4/93 - * $NetBSD: chared.h,v 1.17 2006/03/06 21:11:56 christos Exp $ + * $NetBSD: chared.h,v 1.18 2009/02/15 21:55:23 christos Exp $ * $FreeBSD$ */ @@ -70,7 +70,7 @@ typedef struct c_macro_t { * Undo information for vi - no undo in emacs (yet) */ typedef struct c_undo_t { - int len; /* length of saved line */ + ssize_t len; /* length of saved line */ int cursor; /* position of saved cursor */ char *buf; /* full saved text */ } c_undo_t; -- cgit v1.1 From 3fd98197832d76ffc7a77c635b9cd424b6aa4341 Mon Sep 17 00:00:00 2001 From: theraven Date: Fri, 6 Jul 2012 20:16:22 +0000 Subject: Restore the __collate_load_error global that was accidentally removed in the xlocale refactoring. MFC after: 1 week --- lib/libc/locale/collate.c | 10 ++++++---- lib/libc/locale/setrunelocale.c | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c index 448e605..676d41c 100644 --- a/lib/libc/locale/collate.c +++ b/lib/libc/locale/collate.c @@ -56,11 +56,11 @@ __FBSDID("$FreeBSD$"); * We also modify the collation table test functions to search the thread-local * table first and the global table second. */ -#define __collate_load_error (table->__collate_load_error) #define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial) #define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr) #define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr) #define __collate_chain_pri_table (table->__collate_chain_pri_table) +int __collate_load_error; struct xlocale_collate __xlocale_global_collate = { @@ -109,7 +109,9 @@ __collate_load(const char *encoding, locale_t unused) int __collate_load_tables(const char *encoding) { - return __collate_load_tables_l(encoding, &__xlocale_global_collate); + int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate); + __collate_load_error = __xlocale_global_collate.__collate_load_error; + return ret; } int @@ -123,7 +125,7 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { - __collate_load_error = 1; + table->__collate_load_error = 1; return (_LDP_CACHE); } @@ -240,7 +242,7 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) break; } } - __collate_load_error = 0; + table->__collate_load_error = 0; return (_LDP_LOADED); } diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index f036bbc..3ffb649 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -67,7 +67,6 @@ extern _RuneLocale *_Read_RuneMagi(FILE *); static int __setrunelocale(struct xlocale_ctype *l, const char *); -#define __collate_load_error (table->__collate_load_error) #define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial) #define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr) #define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr) -- cgit v1.1 From bc71506d20005074684c1f3cc0fb5a60aeb26953 Mon Sep 17 00:00:00 2001 From: davidxu Date: Tue, 10 Jul 2012 01:47:11 +0000 Subject: Executing CPUID with EAX set to 1 to actually get feature flags. PR: 169730 --- lib/libc/i386/gen/getcontextx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/i386/gen/getcontextx.c b/lib/libc/i386/gen/getcontextx.c index 5908ce7..2d3b562 100644 --- a/lib/libc/i386/gen/getcontextx.c +++ b/lib/libc/i386/gen/getcontextx.c @@ -68,7 +68,7 @@ __getcontextx_size(void) " movl %%ebx,%1\n" " popl %%ebx\n" : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (0x0)); + : "0" (0x1)); if ((p[2] & CPUID2_OSXSAVE) != 0) { __asm __volatile( " pushl %%ebx\n" -- cgit v1.1 From 9ca1308bfb634fec0f594911944b51b4b01a5331 Mon Sep 17 00:00:00 2001 From: lstewart Date: Tue, 10 Jul 2012 08:31:28 +0000 Subject: Move the ffclock symbols from FBSD_1.2 to FBSD_1.3 where they should have been put initially. They were added to head during development of 10-CURRENT, not 9-CURRENT. Submitted by: glebius Reviewed by: kib --- lib/libc/sys/Symbol.map | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index f1c1567..6888ea0 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -365,9 +365,6 @@ FBSD_1.2 { cap_getmode; cap_new; cap_getrights; - ffclock_getcounter; - ffclock_getestimate; - ffclock_setestimate; getloginclass; pdfork; pdgetpid; @@ -382,6 +379,9 @@ FBSD_1.2 { }; FBSD_1.3 { + ffclock_getcounter; + ffclock_getestimate; + ffclock_setestimate; posix_fadvise; }; -- cgit v1.1 From ea0d3f8feef5d5dccadf4649dcda2769dc1ad173 Mon Sep 17 00:00:00 2001 From: pfg Date: Wed, 11 Jul 2012 22:20:19 +0000 Subject: Re-merge a couple of changes from NetBSD's libedit. bin/sh has been taught about el_gets setting the count to -1 on error, so now we can partially revert r238173 to reduce differences with NetBSD's implementation. Unfortunately NetBSD's libedit handling of EINTR (Revision 1.44 in read.c + SIGWINCH fixes) still causes trouble in bin/sh and other utilities and has to be avoided. MFC after: 1 month --- lib/libedit/editline.3 | 5 +++++ lib/libedit/el.h | 1 + lib/libedit/read.c | 56 +++++++++++++++++++++++++++++++++++--------------- lib/libedit/sig.c | 6 ++++-- lib/libedit/sig.h | 3 ++- 5 files changed, 51 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/libedit/editline.3 b/lib/libedit/editline.3 index dd4c874..fe58321 100644 --- a/lib/libedit/editline.3 +++ b/lib/libedit/editline.3 @@ -162,6 +162,11 @@ is modified to contain the number of characters read. Returns the line read if successful, or .Dv NULL if no characters were read or if an error occurred. +If an error occurred, +.Fa count +is set to \-1 and +.Dv errno +contains the error code that caused it. The return value may not remain valid across calls to .Fn el_gets and must be copied if the data is to be retained. diff --git a/lib/libedit/el.h b/lib/libedit/el.h index 8d0d152..67d01ff 100644 --- a/lib/libedit/el.h +++ b/lib/libedit/el.h @@ -115,6 +115,7 @@ struct editline { FILE *el_errfile; /* Stdio stuff */ int el_infd; /* Input file descriptor */ int el_flags; /* Various flags. */ + int el_errno; /* Local copy of errno */ coord_t el_cursor; /* Cursor location */ char **el_display; /* Real screen image = what is there */ char **el_vdisplay; /* Virtual screen image = what we see */ diff --git a/lib/libedit/read.c b/lib/libedit/read.c index ea40e50..7d7f54b 100644 --- a/lib/libedit/read.c +++ b/lib/libedit/read.c @@ -235,9 +235,12 @@ read_getcmd(EditLine *el, el_action_t *cmdnum, char *ch) el_action_t cmd; int num; + el->el_errno = 0; do { - if ((num = el_getc(el, ch)) != 1) /* if EOF or error */ + if ((num = el_getc(el, ch)) != 1) { /* if EOF or error */ + el->el_errno = num == 0 ? 0 : errno; return (num); + } #ifdef KANJI if ((*ch & 0200)) { @@ -289,14 +292,21 @@ read_char(EditLine *el, char *cp) ssize_t num_read; int tried = 0; - while ((num_read = read(el->el_infd, cp, 1)) == -1) + again: + el->el_signal->sig_no = 0; + while ((num_read = read(el->el_infd, cp, 1)) == -1) { + if (el->el_signal->sig_no == SIGCONT) { + sig_set(el); + el_set(el, EL_REFRESH); + goto again; + } if (!tried && read__fixio(el->el_infd, errno) == 0) tried = 1; else { *cp = '\0'; return (-1); } - + } return (int)num_read; } @@ -403,10 +413,13 @@ el_gets(EditLine *el, int *nread) int num; /* how many chars we have read at NL */ char ch; int crlf = 0; + int nrb; #ifdef FIONREAD c_macro_t *ma = &el->el_chared.c_macro; #endif /* FIONREAD */ + if (nread == NULL) + nread = &nrb; *nread = 0; if (el->el_flags & NO_TTY) { @@ -427,12 +440,13 @@ el_gets(EditLine *el, int *nread) if (cp[-1] == '\r' || cp[-1] == '\n') break; } + if (num == -1) + el->el_errno = errno; el->el_line.cursor = el->el_line.lastchar = cp; *cp = '\0'; - if (nread) - *nread = (int)(el->el_line.cursor - el->el_line.buffer); - return (*nread ? el->el_line.buffer : NULL); + *nread = (int)(el->el_line.cursor - el->el_line.buffer); + goto done; } @@ -443,8 +457,8 @@ el_gets(EditLine *el, int *nread) (void) ioctl(el->el_infd, FIONREAD, (ioctl_t) & chrs); if (chrs == 0) { if (tty_rawmode(el) < 0) { - if (nread) - *nread = 0; + errno = 0; + *nread = 0; return (NULL); } } @@ -457,6 +471,7 @@ el_gets(EditLine *el, int *nread) if (el->el_flags & EDIT_DISABLED) { char *cp; size_t idx; + if ((el->el_flags & UNBUFFERED) == 0) cp = el->el_line.buffer; else @@ -480,11 +495,13 @@ el_gets(EditLine *el, int *nread) break; } + if (num == -1) { + el->el_errno = errno; + } + el->el_line.cursor = el->el_line.lastchar = cp; *cp = '\0'; - if (nread) - *nread = (int)(el->el_line.cursor - el->el_line.buffer); - return (*nread ? el->el_line.buffer : NULL); + goto done; } for (num = OKCMD; num == OKCMD;) { /* while still editing this @@ -617,12 +634,17 @@ el_gets(EditLine *el, int *nread) /* make sure the tty is set up correctly */ if ((el->el_flags & UNBUFFERED) == 0) { read_finish(el); - if (nread) - *nread = num; + *nread = num != -1 ? num : 0; } else { - if (nread) - *nread = - (int)(el->el_line.lastchar - el->el_line.buffer); + *nread = (int)(el->el_line.lastchar - el->el_line.buffer); } - return (num ? el->el_line.buffer : NULL); +done: + if (*nread == 0) { + if (num == -1) { + *nread = -1; + errno = el->el_errno; + } + return NULL; + } else + return el->el_line.buffer; } diff --git a/lib/libedit/sig.c b/lib/libedit/sig.c index 218a32e..e48dbef 100644 --- a/lib/libedit/sig.c +++ b/lib/libedit/sig.c @@ -29,7 +29,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $NetBSD: sig.c,v 1.14 2009/02/18 15:04:40 christos Exp $ + * $NetBSD: sig.c,v 1.15 2009/02/19 15:20:22 christos Exp $ */ #if !defined(lint) && !defined(SCCSID) @@ -73,6 +73,8 @@ sig_handler(int signo) (void) sigaddset(&nset, signo); (void) sigprocmask(SIG_BLOCK, &nset, &oset); + sel->el_signal->sig_no = signo; + switch (signo) { case SIGCONT: tty_rawmode(sel); @@ -158,12 +160,12 @@ sig_set(EditLine *el) struct sigaction osa, nsa; nsa.sa_handler = sig_handler; + nsa.sa_flags = 0; sigemptyset(&nsa.sa_mask); (void) sigprocmask(SIG_BLOCK, &el->el_signal->sig_set, &oset); for (i = 0; sighdl[i] != -1; i++) { - nsa.sa_flags = SIGINT ? 0 : SA_RESTART; /* This could happen if we get interrupted */ if (sigaction(sighdl[i], &nsa, &osa) != -1 && osa.sa_handler != sig_handler) diff --git a/lib/libedit/sig.h b/lib/libedit/sig.h index 8773ddc..7e38100 100644 --- a/lib/libedit/sig.h +++ b/lib/libedit/sig.h @@ -30,7 +30,7 @@ * SUCH DAMAGE. * * @(#)sig.h 8.1 (Berkeley) 6/4/93 - * $NetBSD: sig.h,v 1.7 2009/02/15 21:25:01 christos Exp $ + * $NetBSD: sig.h,v 1.8 2009/02/19 15:20:22 christos Exp $ * $FreeBSD$ */ @@ -61,6 +61,7 @@ typedef struct { struct sigaction sig_action[ALLSIGSNO]; sigset_t sig_set; + volatile sig_atomic_t sig_no; } *el_signal_t; protected void sig_end(EditLine*); -- cgit v1.1 From 5d06c100811722500240ff513611a3a7c09eb939 Mon Sep 17 00:00:00 2001 From: brueffer Date: Sun, 15 Jul 2012 11:52:24 +0000 Subject: Jump to the failed label instead of doing cleanup ourselves. Obtained from: DragonFly BSD MFC after: 2 weeks --- lib/libc/rpc/getnetpath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/rpc/getnetpath.c b/lib/libc/rpc/getnetpath.c index d1ea554..92eae95 100644 --- a/lib/libc/rpc/getnetpath.c +++ b/lib/libc/rpc/getnetpath.c @@ -99,9 +99,8 @@ setnetpath() return (NULL); } if ((np_sessionp->nc_handlep = setnetconfig()) == NULL) { - free(np_sessionp); syslog (LOG_ERR, "rpc: failed to open " NETCONFIG); - return (NULL); + goto failed; } np_sessionp->valid = NP_VALID; np_sessionp->ncp_list = NULL; -- cgit v1.1 From a6e30f9565722c5df66277a5e22c7ea869c760d4 Mon Sep 17 00:00:00 2001 From: jilles Date: Sun, 15 Jul 2012 20:39:21 +0000 Subject: libc: Remove some unused strings from getaddrinfo(). --- lib/libc/net/getaddrinfo.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index dc2641f..d6feb71 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -155,7 +155,6 @@ struct explore { int e_af; int e_socktype; int e_protocol; - const char *e_protostr; int e_wild; #define WILD_AF(ex) ((ex)->e_wild & 0x01) #define WILD_SOCKTYPE(ex) ((ex)->e_wild & 0x02) @@ -164,21 +163,21 @@ struct explore { static const struct explore explore[] = { #if 0 - { PF_LOCAL, ANY, ANY, NULL, 0x01 }, + { PF_LOCAL, ANY, ANY, 0x01 }, #endif #ifdef INET6 - { PF_INET6, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 }, - { PF_INET6, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 }, - { PF_INET6, SOCK_STREAM, IPPROTO_SCTP, "sctp", 0x03 }, - { PF_INET6, SOCK_SEQPACKET, IPPROTO_SCTP, "sctp", 0x07 }, - { PF_INET6, SOCK_RAW, ANY, NULL, 0x05 }, + { PF_INET6, SOCK_DGRAM, IPPROTO_UDP, 0x07 }, + { PF_INET6, SOCK_STREAM, IPPROTO_TCP, 0x07 }, + { PF_INET6, SOCK_STREAM, IPPROTO_SCTP, 0x03 }, + { PF_INET6, SOCK_SEQPACKET, IPPROTO_SCTP, 0x07 }, + { PF_INET6, SOCK_RAW, ANY, 0x05 }, #endif - { PF_INET, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 }, - { PF_INET, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 }, - { PF_INET, SOCK_STREAM, IPPROTO_SCTP, "sctp", 0x03 }, - { PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, "sctp", 0x07 }, - { PF_INET, SOCK_RAW, ANY, NULL, 0x05 }, - { -1, 0, 0, NULL, 0 }, + { PF_INET, SOCK_DGRAM, IPPROTO_UDP, 0x07 }, + { PF_INET, SOCK_STREAM, IPPROTO_TCP, 0x07 }, + { PF_INET, SOCK_STREAM, IPPROTO_SCTP, 0x03 }, + { PF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, 0x07 }, + { PF_INET, SOCK_RAW, ANY, 0x05 }, + { -1, 0, 0, 0 }, }; #ifdef INET6 -- cgit v1.1 From c9ba4f7b25ec65c1aba9ca25cfd5978e43d00fba Mon Sep 17 00:00:00 2001 From: emax Date: Wed, 18 Jul 2012 15:52:09 +0000 Subject: Return zero from get_addrselectpolicy() when no source-address-selection policy is installed. MFC after: 1 week --- lib/libc/net/getaddrinfo.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/libc/net/getaddrinfo.c b/lib/libc/net/getaddrinfo.c index d6feb71..d11ff78 100644 --- a/lib/libc/net/getaddrinfo.c +++ b/lib/libc/net/getaddrinfo.c @@ -692,6 +692,8 @@ get_addrselectpolicy(struct policyhead *head) if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), NULL, &l, NULL, 0) < 0) return (0); + if (l == 0) + return (0); if ((buf = malloc(l)) == NULL) return (0); if (sysctl(mib, sizeof(mib) / sizeof(mib[0]), buf, &l, NULL, 0) < 0) { -- cgit v1.1 From d950d8a13fa882cc5c23240d4fa991db5fee3ef6 Mon Sep 17 00:00:00 2001 From: kib Date: Thu, 19 Jul 2012 10:23:59 +0000 Subject: Document F_DUPFD_CLOEXEC. Also provide some wording changes for F_DUPFD to make it less confusing, at least for me. MFC after: 1 week --- lib/libc/sys/fcntl.2 | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2 index beceb72..0844e87 100644 --- a/lib/libc/sys/fcntl.2 +++ b/lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd January 28, 2012 +.Dd July 18, 2012 .Dt FCNTL 2 .Os .Sh NAME @@ -54,7 +54,7 @@ Depending on the value of .Fn fcntl can take an additional third argument .Fa "int arg" . -.Bl -tag -width F_GETOWNX +.Bl -tag -width F_DUPFD_CLOEXEC .It Dv F_DUPFD Return a new descriptor as follows: .Pp @@ -73,11 +73,22 @@ Same access mode (read, write or read/write). Same file status flags (i.e., both file descriptors share the same file status flags). .It -The close-on-exec flag associated with the new file descriptor -is set to remain open across +The close-on-exec flag +.Dv FD_CLOEXEC +associated with the new file descriptor is cleared, so the file descriptor is +to remain open across .Xr execve 2 system calls. .El +.It Dv F_DUPFD_CLOEXEC +Like +.Dv F_DUPFD , +but the +.Dv FD_CLOEXEC +flag associated with the new file descriptor is set, so the file descriptor +is closed when +.Xr execve 2 +system call executes. .It Dv F_DUP2FD It is functionally equivalent to .Bd -literal -offset indent -- cgit v1.1 From e1a4ff2fce7c3738aba9ab99e00246378ccaf3f6 Mon Sep 17 00:00:00 2001 From: pfg Date: Thu, 19 Jul 2012 19:15:47 +0000 Subject: Merge more cosmetical changes from NetBSD's libedit. makelist: change tr '[a-z]' '[A-Z]' to tr '[:lower:]' '[:upper:]' so that POSIX systems work proerly regardsless of environment variable settings. tokenizer.c: pass lint on _LP64 Obtained from: NetBSD MFC after: 2 weeks --- lib/libedit/makelist | 4 ++-- lib/libedit/tokenizer.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libedit/makelist b/lib/libedit/makelist index e44a0e7..6bb2476 100644 --- a/lib/libedit/makelist +++ b/lib/libedit/makelist @@ -1,5 +1,5 @@ #!/bin/sh - -# $NetBSD: makelist,v 1.10 2005/08/08 14:04:49 christos Exp $ +# $NetBSD: makelist,v 1.11 2005/10/22 16:45:03 christos Exp $ # $FreeBSD$ # # Copyright (c) 1992, 1993 @@ -141,7 +141,7 @@ case $FLAG in # -fh) cat $FILES | $AWK '/el_action_t/ { print $3 }' | \ - sort | LC_ALL=C tr 'a-z' 'A-Z' | $AWK ' + sort | LC_ALL=C tr '[:lower:]' '[:upper:]' | $AWK ' BEGIN { printf("/* Automatically generated file, do not edit */\n"); printf("#ifndef _h_fcns_c\n#define _h_fcns_c\n"); diff --git a/lib/libedit/tokenizer.c b/lib/libedit/tokenizer.c index 8e3100d..41284e8 100644 --- a/lib/libedit/tokenizer.c +++ b/lib/libedit/tokenizer.c @@ -29,7 +29,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $NetBSD: tokenizer.c,v 1.14 2003/12/05 13:37:48 lukem Exp $ + * $NetBSD: tokenizer.c,v 1.15 2009/02/15 21:55:23 christos Exp $ */ #if !defined(lint) && !defined(SCCSID) @@ -198,7 +198,7 @@ tok_line(Tokenizer *tok, const LineInfo *line, ptr = ""; if (ptr == line->cursor) { cc = tok->argc; - co = tok->wptr - tok->wstart; + co = (int)(tok->wptr - tok->wstart); } switch (*ptr) { case '\'': @@ -417,7 +417,7 @@ tok_line(Tokenizer *tok, const LineInfo *line, tok_line_outok: if (cc == -1 && co == -1) { cc = tok->argc; - co = tok->wptr - tok->wstart; + co = (int)(tok->wptr - tok->wstart); } if (cursorc != NULL) *cursorc = cc; -- cgit v1.1 From b941de51c7f790e80d83a683e6874e2a56590e3d Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 01:56:14 +0000 Subject: Don't forget to release a thread reference count, replace _thr_ref_add() with _thr_find_thread(), so reference count is no longer needed. MFC after: 3 days --- lib/libthr/thread/thr_setschedparam.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_setschedparam.c b/lib/libthr/thread/thr_setschedparam.c index 59d62dc..a8a15d1 100644 --- a/lib/libthr/thread/thr_setschedparam.c +++ b/lib/libthr/thread/thr_setschedparam.c @@ -70,9 +70,8 @@ _pthread_setschedparam(pthread_t pthread, int policy, curthread->attr.prio = param->sched_priority; } THR_UNLOCK(curthread); - } else if ((ret = _thr_ref_add(curthread, pthread, /*include dead*/0)) - == 0) { - THR_THREAD_LOCK(curthread, pthread); + } else if ((ret = _thr_find_thread(curthread, pthread, + /*include dead*/0)) == 0) { if (pthread->attr.sched_policy == policy && (policy == SCHED_OTHER || pthread->attr.prio == param->sched_priority)) { @@ -88,7 +87,6 @@ _pthread_setschedparam(pthread_t pthread, int policy, pthread->attr.prio = param->sched_priority; } THR_THREAD_UNLOCK(curthread, pthread); - _thr_ref_delete(curthread, pthread); } return (ret); } -- cgit v1.1 From 4bd927cee6b011b243c700ac09bba0bf5bd832db Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 03:00:41 +0000 Subject: Eliminate duplicated code. --- lib/libthr/thread/thr_setschedparam.c | 46 ++++++++++++----------------------- 1 file changed, 16 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_setschedparam.c b/lib/libthr/thread/thr_setschedparam.c index a8a15d1..343e49c 100644 --- a/lib/libthr/thread/thr_setschedparam.c +++ b/lib/libthr/thread/thr_setschedparam.c @@ -54,39 +54,25 @@ _pthread_setschedparam(pthread_t pthread, int policy, int ret; if (pthread == curthread) { + pthread = curthread; THR_LOCK(curthread); - if (curthread->attr.sched_policy == policy && - (policy == SCHED_OTHER || - curthread->attr.prio == param->sched_priority)) { - pthread->attr.prio = param->sched_priority; - THR_UNLOCK(curthread); - return (0); - } - ret = _thr_setscheduler(curthread->tid, policy, param); - if (ret == -1) - ret = errno; - else { - curthread->attr.sched_policy = policy; - curthread->attr.prio = param->sched_priority; - } - THR_UNLOCK(curthread); } else if ((ret = _thr_find_thread(curthread, pthread, - /*include dead*/0)) == 0) { - if (pthread->attr.sched_policy == policy && - (policy == SCHED_OTHER || - pthread->attr.prio == param->sched_priority)) { - pthread->attr.prio = param->sched_priority; - THR_THREAD_UNLOCK(curthread, pthread); - return (0); - } - ret = _thr_setscheduler(pthread->tid, policy, param); - if (ret == -1) - ret = errno; - else { - pthread->attr.sched_policy = policy; - pthread->attr.prio = param->sched_priority; - } + /*include dead*/0)) != 0) + return (ret); + if (pthread->attr.sched_policy == policy && + (policy == SCHED_OTHER || + pthread->attr.prio == param->sched_priority)) { + pthread->attr.prio = param->sched_priority; THR_THREAD_UNLOCK(curthread, pthread); + return (0); } + ret = _thr_setscheduler(pthread->tid, policy, param); + if (ret == -1) + ret = errno; + else { + pthread->attr.sched_policy = policy; + pthread->attr.prio = param->sched_priority; + } + THR_THREAD_UNLOCK(curthread, pthread); return (ret); } -- cgit v1.1 From 8673cbd4778dbd00c4a37ca2bd00b67ae369311f Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 03:16:52 +0000 Subject: Eliminate duplicated code. --- lib/libthr/thread/thr_setprio.c | 43 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_setprio.c b/lib/libthr/thread/thr_setprio.c index b1b2352..b87d536 100644 --- a/lib/libthr/thread/thr_setprio.c +++ b/lib/libthr/thread/thr_setprio.c @@ -46,37 +46,22 @@ _pthread_setprio(pthread_t pthread, int prio) param.sched_priority = prio; if (pthread == curthread) { + pthread = curthread; THR_LOCK(curthread); - if (curthread->attr.sched_policy == SCHED_OTHER || - curthread->attr.prio == prio) { - curthread->attr.prio = prio; - ret = 0; - } else { - ret = _thr_setscheduler(curthread->tid, - curthread->attr.sched_policy, ¶m); - if (ret == -1) - ret = errno; - else - curthread->attr.prio = prio; - } - THR_UNLOCK(curthread); - } else if ((ret = _thr_ref_add(curthread, pthread, /*include dead*/0)) - == 0) { - THR_THREAD_LOCK(curthread, pthread); - if (pthread->attr.sched_policy == SCHED_OTHER || - pthread->attr.prio == prio) { + } else if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0))) + return (ret); + if (pthread->attr.sched_policy == SCHED_OTHER || + pthread->attr.prio == prio) { + pthread->attr.prio = prio; + ret = 0; + } else { + ret = _thr_setscheduler(pthread->tid, + pthread->attr.sched_policy, ¶m); + if (ret == -1) + ret = errno; + else pthread->attr.prio = prio; - ret = 0; - } else { - ret = _thr_setscheduler(pthread->tid, - curthread->attr.sched_policy, ¶m); - if (ret == -1) - ret = errno; - else - pthread->attr.prio = prio; - } - THR_THREAD_UNLOCK(curthread, pthread); - _thr_ref_delete(curthread, pthread); } + THR_THREAD_UNLOCK(curthread, pthread); return (ret); } -- cgit v1.1 From 4583d3fdda444b54d1b6c10aead93bd7a73c8b7b Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 03:22:17 +0000 Subject: Don't assign same value. --- lib/libthr/thread/thr_setprio.c | 5 ++--- lib/libthr/thread/thr_setschedparam.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_setprio.c b/lib/libthr/thread/thr_setprio.c index b87d536..4b7d2c0 100644 --- a/lib/libthr/thread/thr_setprio.c +++ b/lib/libthr/thread/thr_setprio.c @@ -45,10 +45,9 @@ _pthread_setprio(pthread_t pthread, int prio) int ret; param.sched_priority = prio; - if (pthread == curthread) { - pthread = curthread; + if (pthread == curthread) THR_LOCK(curthread); - } else if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0))) + else if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0))) return (ret); if (pthread->attr.sched_policy == SCHED_OTHER || pthread->attr.prio == prio) { diff --git a/lib/libthr/thread/thr_setschedparam.c b/lib/libthr/thread/thr_setschedparam.c index 343e49c..6e98fb4 100644 --- a/lib/libthr/thread/thr_setschedparam.c +++ b/lib/libthr/thread/thr_setschedparam.c @@ -53,10 +53,9 @@ _pthread_setschedparam(pthread_t pthread, int policy, struct pthread *curthread = _get_curthread(); int ret; - if (pthread == curthread) { - pthread = curthread; + if (pthread == curthread) THR_LOCK(curthread); - } else if ((ret = _thr_find_thread(curthread, pthread, + else if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0)) != 0) return (ret); if (pthread->attr.sched_policy == policy && -- cgit v1.1 From 298769009da2d2356405a4415808b404e7c7d234 Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 03:27:07 +0000 Subject: Eliminate duplicated code. --- lib/libthr/thread/thr_getschedparam.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_getschedparam.c b/lib/libthr/thread/thr_getschedparam.c index b36d724..e9f5d3b 100644 --- a/lib/libthr/thread/thr_getschedparam.c +++ b/lib/libthr/thread/thr_getschedparam.c @@ -53,25 +53,16 @@ _pthread_getschedparam(pthread_t pthread, int *policy, if (policy == NULL || param == NULL) return (EINVAL); - if (pthread == curthread) { - /* - * Avoid searching the thread list when it is the current - * thread. - */ + /* + * Avoid searching the thread list when it is the current + * thread. + */ + if (pthread == curthread) THR_LOCK(curthread); - *policy = curthread->attr.sched_policy; - param->sched_priority = curthread->attr.prio; - THR_UNLOCK(curthread); - ret = 0; - } - /* Find the thread in the list of active threads. */ - else if ((ret = _thr_ref_add(curthread, pthread, /*include dead*/0)) - == 0) { - THR_THREAD_LOCK(curthread, pthread); - *policy = pthread->attr.sched_policy; - param->sched_priority = pthread->attr.prio; - THR_THREAD_UNLOCK(curthread, pthread); - _thr_ref_delete(curthread, pthread); - } + else if ((ret = _thr_find_thread(curthread, pthread, /*include dead*/0))) + return (ret); + *policy = pthread->attr.sched_policy; + param->sched_priority = pthread->attr.prio; + THR_THREAD_UNLOCK(curthread, pthread); return (ret); } -- cgit v1.1 From 617731748280d2efa18842ecd83573892a9d73e3 Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 03:37:19 +0000 Subject: Simplify code by replacing _thr_ref_add() with _thr_find_thread(). --- lib/libthr/thread/thr_info.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_info.c b/lib/libthr/thread/thr_info.c index 2da6da2..350c848 100644 --- a/lib/libthr/thread/thr_info.c +++ b/lib/libthr/thread/thr_info.c @@ -51,16 +51,12 @@ _pthread_set_name_np(pthread_t thread, const char *name) if (thr_set_name(thread->tid, name)) ret = errno; } else { - if (_thr_ref_add(curthread, thread, 0) == 0) { - THR_THREAD_LOCK(curthread, thread); + if ((ret=_thr_find_thread(curthread, thread, 0)) == 0) { if (thread->state != PS_DEAD) { if (thr_set_name(thread->tid, name)) ret = errno; } THR_THREAD_UNLOCK(curthread, thread); - _thr_ref_delete(curthread, thread); - } else { - ret = ESRCH; } } #if 0 -- cgit v1.1 From 95c021b95c16705df9fa252f777905c05cb1c233 Mon Sep 17 00:00:00 2001 From: davidxu Date: Fri, 20 Jul 2012 05:47:12 +0000 Subject: Don't forget to initialize return value. --- lib/libthr/thread/thr_getschedparam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_getschedparam.c b/lib/libthr/thread/thr_getschedparam.c index e9f5d3b..ff6c3ab 100644 --- a/lib/libthr/thread/thr_getschedparam.c +++ b/lib/libthr/thread/thr_getschedparam.c @@ -48,7 +48,7 @@ _pthread_getschedparam(pthread_t pthread, int *policy, struct sched_param *param) { struct pthread *curthread = _get_curthread(); - int ret; + int ret = 0; if (policy == NULL || param == NULL) return (EINVAL); -- cgit v1.1 From e04825c920221bef72b0adc4386073fe8e20a946 Mon Sep 17 00:00:00 2001 From: kib Date: Sat, 21 Jul 2012 13:02:11 +0000 Subject: (Incomplete) fixes for symbols visibility issues and style in fcntl.h. Append '__' prefix to the tag of struct oflock, and put it under BSD namespace. Structure is needed both by libc and kernel, thus cannot be hidden under #ifdef _KERNEL. Move a set of non-standard F_* and O_* constants into BSD namespace. SUSv4 explicitely allows implemenation to pollute F_* and O_* names after fcntl.h is included, but it costs us nothing to adhere to the specification if exact POSIX compliance level is requested by user code. Change some spaces after #define to tabs. Noted by and discussed with: bde MFC after: 1 week --- lib/libc/sys/fcntl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/sys/fcntl.c b/lib/libc/sys/fcntl.c index 470f8ab..480cc40 100644 --- a/lib/libc/sys/fcntl.c +++ b/lib/libc/sys/fcntl.c @@ -41,7 +41,7 @@ __fcntl_compat(int fd, int cmd, ...) { va_list args; long arg; - struct oflock ofl; + struct __oflock ofl; struct flock *flp; int res; -- cgit v1.1 From b4194dfd5548262d72f4d521bb5895aa65dff172 Mon Sep 17 00:00:00 2001 From: kargl Date: Mon, 23 Jul 2012 19:13:55 +0000 Subject: Compute the exponential of x for Intel 80-bit format and IEEE 128-bit format. These implementations are based on PTP Tang, "Table-driven implementation of the exponential function in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, 144-157 (1989). PR: standards/152415 Submitted by: kargl Reviewed by: bde, das Approved by: das (mentor) --- lib/msun/Symbol.map | 1 + lib/msun/ld128/s_expl.c | 260 +++++++++++++++++++++++++++++++++++++ lib/msun/ld80/s_expl.c | 304 ++++++++++++++++++++++++++++++++++++++++++++ lib/msun/man/exp.3 | 14 +- lib/msun/src/e_exp.c | 4 + lib/msun/src/math.h | 2 +- lib/msun/src/math_private.h | 41 ++++++ 7 files changed, 620 insertions(+), 6 deletions(-) create mode 100644 lib/msun/ld128/s_expl.c create mode 100644 lib/msun/ld80/s_expl.c (limited to 'lib') diff --git a/lib/msun/Symbol.map b/lib/msun/Symbol.map index f3d3000..76f1bfb 100644 --- a/lib/msun/Symbol.map +++ b/lib/msun/Symbol.map @@ -249,4 +249,5 @@ FBSD_1.3 { ctanf; ctanh; ctanhf; + expl; }; diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c new file mode 100644 index 0000000..624cb8d --- /dev/null +++ b/lib/msun/ld128/s_expl.c @@ -0,0 +1,260 @@ +/*- + * Copyright (c) 2012 Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include "math.h" +#include "math_private.h" +#include "fpmath.h" + +#define BIAS (LDBL_MAX_EXP - 1) +#define EXPMASK (BIAS + LDBL_MAX_EXP) + +static volatile const long double twom10000 = 0x1p-10000L, tiny = 0x1p-10000L; + +static const long double +huge = 0x1p10000L, +o_threshold = 11356.523406294143949491931077970763428L, +u_threshold = -11433.462743336297878837243843452621503L, +L1 = 5.41521234812457272982212595914567508e-03L, +L2 = -1.02536706388947310094527932552595546e-29L, +INV_L = 1.84664965233787316142070359168242182e+02L; + +static const long double +P2 = 5.00000000000000000000000000000000000e-1L, +P3 = 1.66666666666666666666666666666666972e-1L, +P4 = 4.16666666666666666666666666653708268e-2L, +P5 = 8.33333333333333333333333315069867254e-3L, +P6 = 1.38888888888888888888996596213795377e-3L, +P7 = 1.98412698412698412718821436278644414e-4L, +P8 = 2.48015873015869681884882576649543128e-5L, +P9 = 2.75573192240103867817876199544468806e-6L, +P10 = 2.75573236172670046201884000197885520e-7L, +P11 = 2.50517544183909126492878226167697856e-8L; + +#define NUM 128 + +static const struct { + long double hi; + long double lo; +} s[NUM] = { + 0x1p0L, 0x0p0L, + 0x1.0163da9fb33356d84a66aep0L, 0x3.36dcdfa4003ec04c360be2404078p-92L, + 0x1.02c9a3e778060ee6f7cacap0L, 0x4.f7a29bde93d70a2cabc5cb89ba10p-92L, + 0x1.04315e86e7f84bd738f9a2p0L, 0xd.a47e6ed040bb4bfc05af6455e9b8p-96L, + 0x1.059b0d31585743ae7c548ep0L, 0xb.68ca417fe53e3495f7df4baf84a0p-92L, + 0x1.0706b29ddf6ddc6dc403a8p0L, 0x1.d87b27ed07cb8b092ac75e311753p-88L, + 0x1.0874518759bc808c35f25cp0L, 0x1.9427fa2b041b2d6829d8993a0d01p-88L, + 0x1.09e3ecac6f3834521e060cp0L, 0x5.84d6b74ba2e023da730e7fccb758p-92L, + 0x1.0b5586cf9890f6298b92b6p0L, 0x1.1842a98364291408b3ceb0a2a2bbp-88L, + 0x1.0cc922b7247f7407b705b8p0L, 0x9.3dc5e8aac564e6fe2ef1d431fd98p-92L, + 0x1.0e3ec32d3d1a2020742e4ep0L, 0x1.8af6a552ac4b358b1129e9f966a4p-88L, + 0x1.0fb66affed31af232091dcp0L, 0x1.8a1426514e0b627bda694a400a27p-88L, + 0x1.11301d0125b50a4ebbf1aep0L, 0xd.9318ceac5cc47ab166ee57427178p-92L, + 0x1.12abdc06c31cbfb92bad32p0L, 0x4.d68e2f7270bdf7cedf94eb1cb818p-92L, + 0x1.1429aaea92ddfb34101942p0L, 0x1.b2586d01844b389bea7aedd221d4p-88L, + 0x1.15a98c8a58e512480d573cp0L, 0x1.d5613bf92a2b618ee31b376c2689p-88L, + 0x1.172b83c7d517adcdf7c8c4p0L, 0x1.0eb14a792035509ff7d758693f24p-88L, + 0x1.18af9388c8de9bbbf70b9ap0L, 0x3.c2505c97c0102e5f1211941d2840p-92L, + 0x1.1a35beb6fcb753cb698f68p0L, 0x1.2d1c835a6c30724d5cfae31b84e5p-88L, + 0x1.1bbe084045cd39ab1e72b4p0L, 0x4.27e35f9acb57e473915519a1b448p-92L, + 0x1.1d4873168b9aa7805b8028p0L, 0x9.90f07a98b42206e46166cf051d70p-92L, + 0x1.1ed5022fcd91cb8819ff60p0L, 0x1.121d1e504d36c47474c9b7de6067p-88L, + 0x1.2063b88628cd63b8eeb028p0L, 0x1.50929d0fc487d21c2b84004264dep-88L, + 0x1.21f49917ddc962552fd292p0L, 0x9.4bdb4b61ea62477caa1dce823ba0p-92L, + 0x1.2387a6e75623866c1fadb0p0L, 0x1.c15cb593b0328566902df69e4de2p-88L, + 0x1.251ce4fb2a63f3582ab7dep0L, 0x9.e94811a9c8afdcf796934bc652d0p-92L, + 0x1.26b4565e27cdd257a67328p0L, 0x1.d3b249dce4e9186ddd5ff44e6b08p-92L, + 0x1.284dfe1f5638096cf15cf0p0L, 0x3.ca0967fdaa2e52d7c8106f2e262cp-92L, + 0x1.29e9df51fdee12c25d15f4p0L, 0x1.a24aa3bca890ac08d203fed80a07p-88L, + 0x1.2b87fd0dad98ffddea4652p0L, 0x1.8fcab88442fdc3cb6de4519165edp-88L, + 0x1.2d285a6e4030b40091d536p0L, 0xd.075384589c1cd1b3e4018a6b1348p-92L, + 0x1.2ecafa93e2f5611ca0f45cp0L, 0x1.523833af611bdcda253c554cf278p-88L, + 0x1.306fe0a31b7152de8d5a46p0L, 0x3.05c85edecbc27343629f502f1af2p-92L, + 0x1.32170fc4cd8313539cf1c2p0L, 0x1.008f86dde3220ae17a005b6412bep-88L, + 0x1.33c08b26416ff4c9c8610cp0L, 0x1.96696bf95d1593039539d94d662bp-88L, + 0x1.356c55f929ff0c94623476p0L, 0x3.73af38d6d8d6f9506c9bbc93cbc0p-92L, + 0x1.371a7373aa9caa7145502ep0L, 0x1.4547987e3e12516bf9c699be432fp-88L, + 0x1.38cae6d05d86585a9cb0d8p0L, 0x1.bed0c853bd30a02790931eb2e8f0p-88L, + 0x1.3a7db34e59ff6ea1bc9298p0L, 0x1.e0a1d336163fe2f852ceeb134067p-88L, + 0x1.3c32dc313a8e484001f228p0L, 0xb.58f3775e06ab66353001fae9fca0p-92L, + 0x1.3dea64c12342235b41223ep0L, 0x1.3d773fba2cb82b8244267c54443fp-92L, + 0x1.3fa4504ac801ba0bf701aap0L, 0x4.1832fb8c1c8dbdff2c49909e6c60p-92L, + 0x1.4160a21f72e29f84325b8ep0L, 0x1.3db61fb352f0540e6ba05634413ep-88L, + 0x1.431f5d950a896dc7044394p0L, 0x1.0ccec81e24b0caff7581ef4127f7p-92L, + 0x1.44e086061892d03136f408p0L, 0x1.df019fbd4f3b48709b78591d5cb5p-88L, + 0x1.46a41ed1d005772512f458p0L, 0x1.229d97df404ff21f39c1b594d3a8p-88L, + 0x1.486a2b5c13cd013c1a3b68p0L, 0x1.062f03c3dd75ce8757f780e6ec99p-88L, + 0x1.4a32af0d7d3de672d8bcf4p0L, 0x6.f9586461db1d878b1d148bd3ccb8p-92L, + 0x1.4bfdad5362a271d4397afep0L, 0xc.42e20e0363ba2e159c579f82e4b0p-92L, + 0x1.4dcb299fddd0d63b36ef1ap0L, 0x9.e0cc484b25a5566d0bd5f58ad238p-92L, + 0x1.4f9b2769d2ca6ad33d8b68p0L, 0x1.aa073ee55e028497a329a7333dbap-88L, + 0x1.516daa2cf6641c112f52c8p0L, 0x4.d822190e718226177d7608d20038p-92L, + 0x1.5342b569d4f81df0a83c48p0L, 0x1.d86a63f4e672a3e429805b049465p-88L, + 0x1.551a4ca5d920ec52ec6202p0L, 0x4.34ca672645dc6c124d6619a87574p-92L, + 0x1.56f4736b527da66ecb0046p0L, 0x1.64eb3c00f2f5ab3d801d7cc7272dp-88L, + 0x1.58d12d497c7fd252bc2b72p0L, 0x1.43bcf2ec936a970d9cc266f0072fp-88L, + 0x1.5ab07dd48542958c930150p0L, 0x1.91eb345d88d7c81280e069fbdb63p-88L, + 0x1.5c9268a5946b701c4b1b80p0L, 0x1.6986a203d84e6a4a92f179e71889p-88L, + 0x1.5e76f15ad21486e9be4c20p0L, 0x3.99766a06548a05829e853bdb2b52p-92L, + 0x1.605e1b976dc08b076f592ap0L, 0x4.86e3b34ead1b4769df867b9c89ccp-92L, + 0x1.6247eb03a5584b1f0fa06ep0L, 0x1.d2da42bb1ceaf9f732275b8aef30p-88L, + 0x1.6434634ccc31fc76f8714cp0L, 0x4.ed9a4e41000307103a18cf7a6e08p-92L, + 0x1.66238825522249127d9e28p0L, 0x1.b8f314a337f4dc0a3adf1787ff74p-88L, + 0x1.68155d44ca973081c57226p0L, 0x1.b9f32706bfe4e627d809a85dcc66p-88L, + 0x1.6a09e667f3bcc908b2fb12p0L, 0x1.66ea957d3e3adec17512775099dap-88L, + 0x1.6c012750bdabeed76a9980p0L, 0xf.4f33fdeb8b0ecd831106f57b3d00p-96L, + 0x1.6dfb23c651a2ef220e2cbep0L, 0x1.bbaa834b3f11577ceefbe6c1c411p-92L, + 0x1.6ff7df9519483cf87e1b4ep0L, 0x1.3e213bff9b702d5aa477c12523cep-88L, + 0x1.71f75e8ec5f73dd2370f2ep0L, 0xf.0acd6cb434b562d9e8a20adda648p-92L, + 0x1.73f9a48a58173bd5c9a4e6p0L, 0x8.ab1182ae217f3a7681759553e840p-92L, + 0x1.75feb564267c8bf6e9aa32p0L, 0x1.a48b27071805e61a17b954a2dad8p-88L, + 0x1.780694fde5d3f619ae0280p0L, 0x8.58b2bb2bdcf86cd08e35fb04c0f0p-92L, + 0x1.7a11473eb0186d7d51023ep0L, 0x1.6cda1f5ef42b66977960531e821bp-88L, + 0x1.7c1ed0130c1327c4933444p0L, 0x1.937562b2dc933d44fc828efd4c9cp-88L, + 0x1.7e2f336cf4e62105d02ba0p0L, 0x1.5797e170a1427f8fcdf5f3906108p-88L, + 0x1.80427543e1a11b60de6764p0L, 0x9.a354ea706b8e4d8b718a672bf7c8p-92L, + 0x1.82589994cce128acf88afap0L, 0xb.34a010f6ad65cbbac0f532d39be0p-92L, + 0x1.8471a4623c7acce52f6b96p0L, 0x1.c64095370f51f48817914dd78665p-88L, + 0x1.868d99b4492ec80e41d90ap0L, 0xc.251707484d73f136fb5779656b70p-92L, + 0x1.88ac7d98a669966530bcdep0L, 0x1.2d4e9d61283ef385de170ab20f96p-88L, + 0x1.8ace5422aa0db5ba7c55a0p0L, 0x1.92c9bb3e6ed61f2733304a346d8fp-88L, + 0x1.8cf3216b5448bef2aa1cd0p0L, 0x1.61c55d84a9848f8c453b3ca8c946p-88L, + 0x1.8f1ae991577362b982745cp0L, 0x7.2ed804efc9b4ae1458ae946099d4p-92L, + 0x1.9145b0b91ffc588a61b468p0L, 0x1.f6b70e01c2a90229a4c4309ea719p-88L, + 0x1.93737b0cdc5e4f4501c3f2p0L, 0x5.40a22d2fc4af581b63e8326efe9cp-92L, + 0x1.95a44cbc8520ee9b483694p0L, 0x1.a0fc6f7c7d61b2b3a22a0eab2cadp-88L, + 0x1.97d829fde4e4f8b9e920f8p0L, 0x1.1e8bd7edb9d7144b6f6818084cc7p-88L, + 0x1.9a0f170ca07b9ba3109b8cp0L, 0x4.6737beb19e1eada6825d3c557428p-92L, + 0x1.9c49182a3f0901c7c46b06p0L, 0x1.1f2be58ddade50c217186c90b457p-88L, + 0x1.9e86319e323231824ca78ep0L, 0x6.4c6e010f92c082bbadfaf605cfd4p-92L, + 0x1.a0c667b5de564b29ada8b8p0L, 0xc.ab349aa0422a8da7d4512edac548p-92L, + 0x1.a309bec4a2d3358c171f76p0L, 0x1.0daad547fa22c26d168ea762d854p-88L, + 0x1.a5503b23e255c8b424491cp0L, 0xa.f87bc8050a405381703ef7caff50p-92L, + 0x1.a799e1330b3586f2dfb2b0p0L, 0x1.58f1a98796ce8908ae852236ca94p-88L, + 0x1.a9e6b5579fdbf43eb243bcp0L, 0x1.ff4c4c58b571cf465caf07b4b9f5p-88L, + 0x1.ac36bbfd3f379c0db966a2p0L, 0x1.1265fc73e480712d20f8597a8e7bp-88L, + 0x1.ae89f995ad3ad5e8734d16p0L, 0x1.73205a7fbc3ae675ea440b162d6cp-88L, + 0x1.b0e07298db66590842acdep0L, 0x1.c6f6ca0e5dcae2aafffa7a0554cbp-88L, + 0x1.b33a2b84f15faf6bfd0e7ap0L, 0x1.d947c2575781dbb49b1237c87b6ep-88L, + 0x1.b59728de559398e3881110p0L, 0x1.64873c7171fefc410416be0a6525p-88L, + 0x1.b7f76f2fb5e46eaa7b081ap0L, 0xb.53c5354c8903c356e4b625aacc28p-92L, + 0x1.ba5b030a10649840cb3c6ap0L, 0xf.5b47f297203757e1cc6eadc8bad0p-92L, + 0x1.bcc1e904bc1d2247ba0f44p0L, 0x1.b3d08cd0b20287092bd59be4ad98p-88L, + 0x1.bf2c25bd71e088408d7024p0L, 0x1.18e3449fa073b356766dfb568ff4p-88L, + 0x1.c199bdd85529c2220cb12ap0L, 0x9.1ba6679444964a36661240043970p-96L, + 0x1.c40ab5fffd07a6d14df820p0L, 0xf.1828a5366fd387a7bdd54cdf7300p-92L, + 0x1.c67f12e57d14b4a2137fd2p0L, 0xf.2b301dd9e6b151a6d1f9d5d5f520p-96L, + 0x1.c8f6d9406e7b511acbc488p0L, 0x5.c442ddb55820171f319d9e5076a8p-96L, + 0x1.cb720dcef90691503cbd1ep0L, 0x9.49db761d9559ac0cb6dd3ed599e0p-92L, + 0x1.cdf0b555dc3f9c44f8958ep0L, 0x1.ac51be515f8c58bdfb6f5740a3a4p-88L, + 0x1.d072d4a07897b8d0f22f20p0L, 0x1.a158e18fbbfc625f09f4cca40874p-88L, + 0x1.d2f87080d89f18ade12398p0L, 0x9.ea2025b4c56553f5cdee4c924728p-92L, + 0x1.d5818dcfba48725da05aeap0L, 0x1.66e0dca9f589f559c0876ff23830p-88L, + 0x1.d80e316c98397bb84f9d04p0L, 0x8.805f84bec614de269900ddf98d28p-92L, + 0x1.da9e603db3285708c01a5ap0L, 0x1.6d4c97f6246f0ec614ec95c99392p-88L, + 0x1.dd321f301b4604b695de3cp0L, 0x6.30a393215299e30d4fb73503c348p-96L, + 0x1.dfc97337b9b5eb968cac38p0L, 0x1.ed291b7225a944efd5bb5524b927p-88L, + 0x1.e264614f5a128a12761fa0p0L, 0x1.7ada6467e77f73bf65e04c95e29dp-88L, + 0x1.e502ee78b3ff6273d13014p0L, 0x1.3991e8f49659e1693be17ae1d2f9p-88L, + 0x1.e7a51fbc74c834b548b282p0L, 0x1.23786758a84f4956354634a416cep-88L, + 0x1.ea4afa2a490d9858f73a18p0L, 0xf.5db301f86dea20610ceee13eb7b8p-92L, + 0x1.ecf482d8e67f08db0312fap0L, 0x1.949cef462010bb4bc4ce72a900dfp-88L, + 0x1.efa1bee615a27771fd21a8p0L, 0x1.2dac1f6dd5d229ff68e46f27e3dfp-88L, + 0x1.f252b376bba974e8696fc2p0L, 0x1.6390d4c6ad5476b5162f40e1d9a9p-88L, + 0x1.f50765b6e4540674f84b76p0L, 0x2.862baff99000dfc4352ba29b8908p-92L, + 0x1.f7bfdad9cbe138913b4bfep0L, 0x7.2bd95c5ce7280fa4d2344a3f5618p-92L, + 0x1.fa7c1819e90d82e90a7e74p0L, 0xb.263c1dc060c36f7650b4c0f233a8p-92L, + 0x1.fd3c22b8f71f10975ba4b2p0L, 0x1.2bcf3a5e12d269d8ad7c1a4a8875p-88L +}; + +long double +expl(long double x) +{ + union IEEEl2bits u, v; + long double fn, r, r1, r2, q, t, twopk, twopkp10000; + int k, n, n2; + uint32_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & EXPMASK; + if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (u.xbits.manh != 0 + || u.xbits.manl != 0 + || (hx & 0x8000) == 0) + return (x + x); /* x is NaN or +Inf */ + else + return (0.0); /* x is -Inf */ + } + if (x > o_threshold) + return (huge * huge); + if (x < u_threshold) + return (tiny * tiny); + } else if (ix <= BIAS - 115) { /* |x| < 0x1p-33 */ + /* includes pseudo-denormals */ + if (huge + x > 1.0L) /* trigger inexact iff x != 0 */ + return (1.0L + x); + } + + fn = x * INV_L + 0x1.8p112 - 0x1.8p112; + n = (int)fn; + n2 = (unsigned)n % NUM; /* Tang's j. */ + k = (n - n2) / NUM; + r1 = x - fn * L1; + r2 = -fn * L2; + + /* Prepare scale factors. */ + v.xbits.manh = 0; + v.xbits.manl = 0; + if (k >= LDBL_MIN_EXP) { + v.xbits.expsign = BIAS + k; + twopk = v.e; + } else { + v.xbits.expsign = BIAS + k + 10000; + twopkp10000 = v.e; + } + + r = r1 + r2; + q = r * r * (P2 + r * (P3 + r * (P4 + r * (P5 + r * (P6 + r * (P7 + + r * (P8 + r * (P9 + r * (P10 + r * P11))))))))); + t = s[n2].lo + s[n2].hi; + t = s[n2].hi + (s[n2].lo + t * (r2 + q + r1)); + + /* Scale by 2**k. */ + if (k >= LDBL_MIN_EXP) { + if (k == LDBL_MAX_EXP) + return (t * 2.0L * 0x1p16383L); + return (t * twopk); + } else { + return (t * twopkp10000 * twom10000); + } +} diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c new file mode 100644 index 0000000..d2faad2 --- /dev/null +++ b/lib/msun/ld80/s_expl.c @@ -0,0 +1,304 @@ +/*- + * Copyright (c) 2009-2012 Steven G. Kargl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Optimized by Bruce D. Evans. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * Compute the exponential of x for Intel 80-bit format. This is based on: + * + * PTP Tang, "Table-driven implementation of the exponential function + * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, + * 144-157 (1989). + * + * where the 32 table entries have been expanded to NUM (see below). + */ + +#include + +#ifdef __i386__ +#include +#endif + +#include "math.h" +#define FPSETPREC +#ifdef NO_FPSETPREC +#undef FPSETPREC +#endif +#include "math_private.h" +#include "fpmath.h" + +#define BIAS (LDBL_MAX_EXP - 1) + +static const long double +huge = 0x1p10000L, +twom10000 = 0x1p-10000L; +/* XXX Prevent gcc from erroneously constant folding this: */ +static volatile const long double tiny = 0x1p-10000L; + +static const union IEEEl2bits +/* log(2**16384 - 0.5) rounded towards zero: */ +o_threshold = LD80C(0xb17217f7d1cf79ab, 13, 0, 11356.5234062941439488L), +/* log(2**(-16381-64-1)) rounded towards zero: */ +u_threshold = LD80C(0xb21dfe7f09e2baa9, 13, 1, -11399.4985314888605581L); + +static const double __aligned(64) +/* + * ln2/NUM = L1+L2 (hi+lo decomposition for multiplication). L1 must have + * at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(NUM)) lowest bits zero + * so that multiplication of it by n is exact. + */ +L1 = 5.4152123484527692e-3, /* 0x162e42ff000000.0p-60 */ +L2 = -3.2819649005320973e-13, /* -0x1718432a1b0e26.0p-94 */ +INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */ +/* + * Domain [-0.002708, 0.002708], range ~[-5.7136e-24, 5.7110e-24]: + * |exp(x) - p(x)| < 2**-77.2 + * (0.002708 is ln2/(2*NUM) rounded up a little). + */ +P2 = 0.5, +P3 = 1.6666666666666119e-1, /* 0x15555555555490.0p-55 */ +P4 = 4.1666666666665887e-2, /* 0x155555555554e5.0p-57 */ +P5 = 8.3333354987869413e-3, /* 0x1111115b789919.0p-59 */ +P6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ + +/* + * 2^(i/NUM) for i in [0,NUM] is represented by two values where the + * first 47 (?!) bits of the significand is stored in hi and the next 53 + * bits are in lo. + */ +#define NUM 128 + +static const struct { + double hi; + double lo; +} s[NUM] __aligned(16) = { + 0x1p+0, 0x0p+0, + 0x1.0163da9fb330p+0, 0x1.ab6c25335719bp-47, + 0x1.02c9a3e77804p+0, 0x1.07737be56527cp-47, + 0x1.04315e86e7f8p+0, 0x1.2f5ce3e688369p-50, + 0x1.059b0d315854p+0, 0x1.a1d73e2a475b4p-47, + 0x1.0706b29ddf6cp+0, 0x1.dc6dc403a9d88p-48, + 0x1.0874518759bcp+0, 0x1.01186be4bb285p-49, + 0x1.09e3ecac6f38p+0, 0x1.a290f03062c27p-51, + 0x1.0b5586cf9890p+0, 0x1.ec5317256e308p-49, + 0x1.0cc922b7247cp+0, 0x1.ba03db82dc49fp-47, + 0x1.0e3ec32d3d18p+0, 0x1.10103a1727c58p-47, + 0x1.0fb66affed30p+0, 0x1.af232091dd8a1p-48, + 0x1.11301d0125b4p+0, 0x1.0a4ebbf1aed93p-48, + 0x1.12abdc06c31cp+0, 0x1.7f72575a649adp-49, + 0x1.1429aaea92dcp+0, 0x1.fb34101943b26p-48, + 0x1.15a98c8a58e4p+0, 0x1.12480d573dd56p-48, + 0x1.172b83c7d514p+0, 0x1.d6e6fbe462876p-47, + 0x1.18af9388c8dcp+0, 0x1.4dddfb85cd1e1p-47, + 0x1.1a35beb6fcb4p+0, 0x1.a9e5b4c7b4969p-47, + 0x1.1bbe084045ccp+0, 0x1.39ab1e72b4428p-48, + 0x1.1d4873168b98p+0, 0x1.53c02dc0144c8p-47, + 0x1.1ed5022fcd90p+0, 0x1.cb8819ff61122p-48, + 0x1.2063b88628ccp+0, 0x1.63b8eeb029509p-48, + 0x1.21f49917ddc8p+0, 0x1.62552fd29294cp-48, + 0x1.2387a6e75620p+0, 0x1.c3360fd6d8e0bp-47, + 0x1.251ce4fb2a60p+0, 0x1.f9ac155bef4f5p-47, + 0x1.26b4565e27ccp+0, 0x1.d257a673281d4p-48, + 0x1.284dfe1f5638p+0, 0x1.2d9e2b9e07941p-53, + 0x1.29e9df51fdecp+0, 0x1.09612e8afad12p-47, + 0x1.2b87fd0dad98p+0, 0x1.ffbbd48ca71f9p-49, + 0x1.2d285a6e4030p+0, 0x1.680123aa6da0fp-49, + 0x1.2ecafa93e2f4p+0, 0x1.611ca0f45d524p-48, + 0x1.306fe0a31b70p+0, 0x1.52de8d5a46306p-48, + 0x1.32170fc4cd80p+0, 0x1.89a9ce78e1804p-47, + 0x1.33c08b26416cp+0, 0x1.fa64e43086cb3p-47, + 0x1.356c55f929fcp+0, 0x1.864a311a3b1bap-47, + 0x1.371a7373aa9cp+0, 0x1.54e28aa05e8a9p-49, + 0x1.38cae6d05d84p+0, 0x1.2c2d4e586cdf7p-47, + 0x1.3a7db34e59fcp+0, 0x1.b750de494cf05p-47, + 0x1.3c32dc313a8cp+0, 0x1.242000f9145acp-47, + 0x1.3dea64c12340p+0, 0x1.11ada0911f09fp-47, + 0x1.3fa4504ac800p+0, 0x1.ba0bf701aa418p-48, + 0x1.4160a21f72e0p+0, 0x1.4fc2192dc79eep-47, + 0x1.431f5d950a88p+0, 0x1.6dc704439410dp-48, + 0x1.44e086061890p+0, 0x1.68189b7a04ef8p-47, + 0x1.46a41ed1d004p+0, 0x1.772512f45922ap-48, + 0x1.486a2b5c13ccp+0, 0x1.013c1a3b69063p-48, + 0x1.4a32af0d7d3cp+0, 0x1.e672d8bcf46f9p-48, + 0x1.4bfdad5362a0p+0, 0x1.38ea1cbd7f621p-47, + 0x1.4dcb299fddd0p+0, 0x1.ac766dde353c2p-49, + 0x1.4f9b2769d2c8p+0, 0x1.35699ec5b4d50p-47, + 0x1.516daa2cf664p+0, 0x1.c112f52c84d82p-52, + 0x1.5342b569d4f8p+0, 0x1.df0a83c49d86ap-52, + 0x1.551a4ca5d920p+0, 0x1.d8a5d8c40486ap-49, + 0x1.56f4736b527cp+0, 0x1.a66ecb004764fp-48, + 0x1.58d12d497c7cp+0, 0x1.e9295e15b9a1ep-47, + 0x1.5ab07dd48540p+0, 0x1.4ac64980a8c8fp-47, + 0x1.5c9268a59468p+0, 0x1.b80e258dc0b4cp-47, + 0x1.5e76f15ad214p+0, 0x1.0dd37c9840733p-49, + 0x1.605e1b976dc0p+0, 0x1.160edeb25490ep-49, + 0x1.6247eb03a558p+0, 0x1.2c7c3e81bf4b7p-50, + 0x1.6434634ccc30p+0, 0x1.fc76f8714c4eep-48, + 0x1.662388255220p+0, 0x1.24893ecf14dc8p-47, + 0x1.68155d44ca94p+0, 0x1.9840e2b913dd0p-47, + 0x1.6a09e667f3bcp+0, 0x1.921165f626cddp-49, + 0x1.6c012750bda8p+0, 0x1.f76bb54cc007ap-47, + 0x1.6dfb23c651a0p+0, 0x1.779107165f0dep-47, + 0x1.6ff7df951948p+0, 0x1.e7c3f0da79f11p-51, + 0x1.71f75e8ec5f4p+0, 0x1.9ee91b8797785p-47, + 0x1.73f9a48a5814p+0, 0x1.9deae4d273456p-47, + 0x1.75feb564267cp+0, 0x1.17edd35467491p-49, + 0x1.780694fde5d0p+0, 0x1.fb0cd7014042cp-47, + 0x1.7a11473eb018p+0, 0x1.b5f54408fdb37p-50, + 0x1.7c1ed0130c10p+0, 0x1.93e2499a22c9cp-47, + 0x1.7e2f336cf4e4p+0, 0x1.1082e815d0abdp-47, + 0x1.80427543e1a0p+0, 0x1.1b60de67649a3p-48, + 0x1.82589994cce0p+0, 0x1.28acf88afab35p-48, + 0x1.8471a4623c78p+0, 0x1.667297b5cbe32p-47, + 0x1.868d99b4492cp+0, 0x1.640720ec85613p-47, + 0x1.88ac7d98a668p+0, 0x1.966530bcdf2d5p-48, + 0x1.8ace5422aa0cp+0, 0x1.b5ba7c55a192dp-48, + 0x1.8cf3216b5448p+0, 0x1.7de55439a2c39p-49, + 0x1.8f1ae9915770p+0, 0x1.b15cc13a2e397p-47, + 0x1.9145b0b91ffcp+0, 0x1.622986d1a7daep-50, + 0x1.93737b0cdc5cp+0, 0x1.27a280e1f92a0p-47, + 0x1.95a44cbc8520p+0, 0x1.dd36906d2b420p-49, + 0x1.97d829fde4e4p+0, 0x1.f173d241f23d1p-49, + 0x1.9a0f170ca078p+0, 0x1.cdd1884dc6234p-47, + 0x1.9c49182a3f08p+0, 0x1.01c7c46b071f3p-48, + 0x1.9e86319e3230p+0, 0x1.18c12653c7326p-47, + 0x1.a0c667b5de54p+0, 0x1.2594d6d45c656p-47, + 0x1.a309bec4a2d0p+0, 0x1.9ac60b8fbb86dp-47, + 0x1.a5503b23e254p+0, 0x1.c8b424491caf8p-48, + 0x1.a799e1330b34p+0, 0x1.86f2dfb2b158fp-48, + 0x1.a9e6b5579fd8p+0, 0x1.fa1f5921deffap-47, + 0x1.ac36bbfd3f34p+0, 0x1.ce06dcb351893p-47, + 0x1.ae89f995ad38p+0, 0x1.6af439a68bb99p-47, + 0x1.b0e07298db64p+0, 0x1.2c8421566fe38p-47, + 0x1.b33a2b84f15cp+0, 0x1.d7b5fe873decap-47, + 0x1.b59728de5590p+0, 0x1.cc71c40888b24p-47, + 0x1.b7f76f2fb5e4p+0, 0x1.baa9ec206ad4fp-50, + 0x1.ba5b030a1064p+0, 0x1.30819678d5eb7p-49, + 0x1.bcc1e904bc1cp+0, 0x1.2247ba0f45b3dp-48, + 0x1.bf2c25bd71e0p+0, 0x1.10811ae04a31cp-49, + 0x1.c199bdd85528p+0, 0x1.c2220cb12a092p-48, + 0x1.c40ab5fffd04p+0, 0x1.d368a6fc1078cp-47, + 0x1.c67f12e57d14p+0, 0x1.694426ffa41e5p-49, + 0x1.c8f6d9406e78p+0, 0x1.a88d65e24402ep-47, + 0x1.cb720dcef904p+0, 0x1.48a81e5e8f4a5p-47, + 0x1.cdf0b555dc3cp+0, 0x1.ce227c4ac7d63p-47, + 0x1.d072d4a07894p+0, 0x1.dc68791790d0bp-47, + 0x1.d2f87080d89cp+0, 0x1.8c56f091cc4f5p-47, + 0x1.d5818dcfba48p+0, 0x1.c976816bad9b8p-50, + 0x1.d80e316c9838p+0, 0x1.7bb84f9d04880p-48, + 0x1.da9e603db328p+0, 0x1.5c2300696db53p-50, + 0x1.dd321f301b44p+0, 0x1.025b4aef1e032p-47, + 0x1.dfc97337b9b4p+0, 0x1.eb968cac39ed3p-48, + 0x1.e264614f5a10p+0, 0x1.45093b0fd0bd7p-47, + 0x1.e502ee78b3fcp+0, 0x1.b139e8980a9cdp-47, + 0x1.e7a51fbc74c8p+0, 0x1.a5aa4594191bcp-51, + 0x1.ea4afa2a490cp+0, 0x1.9858f73a18f5ep-48, + 0x1.ecf482d8e67cp+0, 0x1.846d81897dca5p-47, + 0x1.efa1bee615a0p+0, 0x1.3bb8fe90d496dp-47, + 0x1.f252b376bba8p+0, 0x1.74e8696fc3639p-48, + 0x1.f50765b6e454p+0, 0x1.9d3e12dd8a18bp-54, + 0x1.f7bfdad9cbe0p+0, 0x1.38913b4bfe72cp-48, + 0x1.fa7c1819e90cp+0, 0x1.82e90a7e74b26p-48, + 0x1.fd3c22b8f71cp+0, 0x1.884badd25995ep-47 +}; + +long double +expl(long double x) +{ + union IEEEl2bits u, v; + long double fn, r, r1, r2, q, t, t23, t45, twopk, twopkp10000, z; + int k, n, n2; + uint16_t hx, ix; + + /* Filter out exceptional cases. */ + u.e = x; + hx = u.xbits.expsign; + ix = hx & 0x7fff; + if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ + if (ix == BIAS + LDBL_MAX_EXP) { + if (hx & 0x8000 && u.xbits.man == 1ULL << 63) + return (0.0L); /* x is -Inf */ + return (x + x); /* x is +Inf, NaN or unsupported */ + } + if (x > o_threshold.e) + return (huge * huge); + if (x < u_threshold.e) + return (tiny * tiny); + } else if (ix <= BIAS - 34) { /* |x| < 0x1p-33 */ + /* includes pseudo-denormals */ + if (huge + x > 1.0L) /* trigger inexact iff x != 0 */ + return (1.0L + x); + } + + ENTERI(); + + /* Reduce x to (k*ln2 + midpoint[n2] + r1 + r2). */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + fn = x * INV_L + 0x1.8p63 - 0x1.8p63; + r = x - fn * L1 - fn * L2; /* r = r1 + r2 done independently. */ +#if defined(HAVE_EFFICIENT_IRINTL) + n = irintl(fn); +#elif defined(HAVE_EFFICIENT_IRINT) + n = irint(fn); +#else + n = (int)fn; +#endif + n2 = (unsigned)n % NUM; /* Tang's j. */ + k = (n - n2) / NUM; + r1 = x - fn * L1; + r2 = -fn * L2; + + /* Prepare scale factors. */ + v.xbits.man = 1ULL << 63; + if (k >= LDBL_MIN_EXP) { + v.xbits.expsign = BIAS + k; + twopk = v.e; + } else { + v.xbits.expsign = BIAS + k + 10000; + twopkp10000 = v.e; + } + + /* Evaluate expl(midpoint[n2] + r1 + r2) = s[n2] * expl(r1 + r2). */ + /* Here q = q(r), not q(r1), since r1 is lopped like L1. */ + t45 = r * P5 + P4; + z = r * r; + t23 = r * P3 + P2; + q = r2 + z * t23 + z * z * t45 + z * z * z * P6; + t = (long double)s[n2].lo + s[n2].hi; + t = s[n2].lo + t * (q + r1) + s[n2].hi; + + /* Scale by 2**k. */ + if (k >= LDBL_MIN_EXP) { + if (k == LDBL_MAX_EXP) + RETURNI(t * 2.0L * 0x1p16383L); + RETURNI(t * twopk); + } else { + RETURNI(t * twopkp10000 * twom10000); + } +} diff --git a/lib/msun/man/exp.3 b/lib/msun/man/exp.3 index b051e5b..5907337 100644 --- a/lib/msun/man/exp.3 +++ b/lib/msun/man/exp.3 @@ -28,13 +28,14 @@ .\" from: @(#)exp.3 6.12 (Berkeley) 7/31/91 .\" $FreeBSD$ .\" -.Dd January 17, 2008 +.Dd July 10, 2012 .Dt EXP 3 .Os .Sh NAME .Nm exp , .Nm expf , -.\" The sorting error is intentional. exp and expf should be adjacent. +.Nm expl , +.\" The sorting error is intentional. exp, expf, and expl should be adjacent. .Nm exp2 , .Nm exp2f , .Nm exp2l , @@ -51,6 +52,8 @@ .Fn exp "double x" .Ft float .Fn expf "float x" +.Ft long double +.Fn expl "long double x" .Ft double .Fn exp2 "double x" .Ft float @@ -67,9 +70,10 @@ .Fn powf "float x" "float y" .Sh DESCRIPTION The -.Fn exp -and the -.Fn expf +.Fn exp , +.Fn expf , +and +.Fn expl functions compute the base .Ms e exponential value of the given argument diff --git a/lib/msun/src/e_exp.c b/lib/msun/src/e_exp.c index b47aef5..e432bc8 100644 --- a/lib/msun/src/e_exp.c +++ b/lib/msun/src/e_exp.c @@ -158,3 +158,7 @@ __ieee754_exp(double x) /* default IEEE double exp */ return y*twopk*twom1000; } } + +#if (LDBL_MANT_DIG == 53) +__weak_reference(exp, expl); +#endif diff --git a/lib/msun/src/math.h b/lib/msun/src/math.h index cf34583..c6cee13 100644 --- a/lib/msun/src/math.h +++ b/lib/msun/src/math.h @@ -404,6 +404,7 @@ long double ceill(long double); long double copysignl(long double, long double) __pure2; long double cosl(long double); long double exp2l(long double); +long double expl(long double); long double fabsl(long double) __pure2; long double fdiml(long double, long double); long double floorl(long double); @@ -461,7 +462,6 @@ long double atanhl(long double); long double coshl(long double); long double erfcl(long double); long double erfl(long double); -long double expl(long double); long double expm1l(long double); long double lgammal(long double); long double log10l(long double); diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 79280e3..94e7507 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -207,6 +207,13 @@ do { \ (d) = se_u.e; \ } while (0) +/* Long double constants are broken on i386. This workaround is OK always. */ +#define LD80C(m, ex, s, v) { \ + /* .e = v, */ /* overwritten */ \ + .xbits.man = __CONCAT(m, ULL), \ + .xbits.expsign = (0x3fff + (ex)) | ((s) ? 0x8000 : 0), \ +} + #ifdef FLT_EVAL_METHOD /* * Attempt to get strict C99 semantics for assignment with non-C99 compilers. @@ -225,8 +232,30 @@ do { \ } \ } while (0) #endif +#endif /* FLT_EVAL_METHOD */ + +/* Support switching the mode to FP_PE if necessary. */ +#if defined(__i386__) && !defined(NO_FPSETPREC) +#define ENTERI() \ + long double __retval; \ + fp_prec_t __oprec; \ + \ + if ((__oprec = fpgetprec()) != FP_PE) \ + fpsetprec(FP_PE); +#define RETURNI(x) do { \ + __retval = (x); \ + if (__oprec != FP_PE) \ + fpsetprec(__oprec); \ + RETURNF(__retval); \ +} while (0) +#else +#define ENTERI(x) +#define RETURNI(x) RETURNF(x) #endif +/* Default return statement if hack*_t() is not used. */ +#define RETURNF(v) return (v) + /* * Common routine to process the arguments to nan(), nanf(), and nanl(). */ @@ -323,6 +352,18 @@ irint(double x) #define HAVE_EFFICIENT_IRINT #endif +#if defined(__amd64__) || defined(__i386__) +static __inline int +irintl(long double x) +{ + int n; + + asm("fistl %0" : "=m" (n) : "t" (x)); + return (n); +} +#define HAVE_EFFICIENT_IRINTL +#endif + #endif /* __GNUCLIKE_ASM */ /* -- cgit v1.1 From 615f9f5c414aac0d6075e7b8e042f07122fe6c05 Mon Sep 17 00:00:00 2001 From: kargl Date: Mon, 23 Jul 2012 19:23:49 +0000 Subject: Hook ld80/s_expl.c or ld128/s_expl.c into the building of libm. PR: standards/152415 Approved by: das (mentor) --- lib/msun/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/msun/Makefile b/lib/msun/Makefile index 0646dc0..a1ec6c9 100644 --- a/lib/msun/Makefile +++ b/lib/msun/Makefile @@ -94,7 +94,7 @@ COMMON_SRCS+= e_acosl.c e_asinl.c e_atan2l.c e_fmodl.c \ e_hypotl.c e_remainderl.c e_sqrtl.c \ invtrig.c k_cosl.c k_sinl.c k_tanl.c \ s_atanl.c s_cbrtl.c s_ceill.c s_cosl.c s_cprojl.c \ - s_csqrtl.c s_exp2l.c s_floorl.c s_fmal.c \ + s_csqrtl.c s_exp2l.c s_expl.c s_floorl.c s_fmal.c \ s_frexpl.c s_logbl.c s_nanl.c s_nextafterl.c s_nexttoward.c \ s_remquol.c s_rintl.c s_scalbnl.c \ s_sinl.c s_tanl.c s_truncl.c w_cabsl.c -- cgit v1.1 From b7a0f04c526af6628c15d9d29f4ea078ad0f4caf Mon Sep 17 00:00:00 2001 From: ache Date: Tue, 24 Jul 2012 16:03:28 +0000 Subject: Don't ever build files depending on the directory where they are placed in. It is obvious that its modification time will change with each such file builded. This bug cause whole libelf to rebuild itself each second make run (and relink that files on each first make run) in the loop. --- lib/libelf/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libelf/Makefile b/lib/libelf/Makefile index ee96812..f4baf14 100644 --- a/lib/libelf/Makefile +++ b/lib/libelf/Makefile @@ -68,11 +68,9 @@ CLEANFILES= ${GENSRCS} CLEANDIRS= sys CFLAGS+= -I${.CURDIR} -I. -sys/elf32.h sys/elf64.h sys/elf_common.h: sys - ln -sf ${.CURDIR}/../../sys/${.TARGET} ${.TARGET} - -sys: +sys/elf32.h sys/elf64.h sys/elf_common.h: ${.CURDIR}/../../sys/${.TARGET} mkdir -p ${.OBJDIR}/sys + ln -sf ${.CURDIR}/../../sys/${.TARGET} ${.TARGET} SHLIB_MAJOR= 1 -- cgit v1.1 From 430cf8efee0b56a369f33b1a60883a776f71a1d2 Mon Sep 17 00:00:00 2001 From: issyl0 Date: Wed, 25 Jul 2012 22:17:44 +0000 Subject: Add a new man page containing details of new locale-specific functions for wctype.h, iswalnum_l(3). Add it and its functions to the Makefile. Reviewed by: gavin, jilles Approved by: theraven MFC after: 5 days --- lib/libc/locale/Makefile.inc | 15 +++- lib/libc/locale/iswalnum_l.3 | 168 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 lib/libc/locale/iswalnum_l.3 (limited to 'lib') diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index adae8bd..8f3c4c6 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -30,7 +30,8 @@ MAN+= btowc.3 \ ctype.3 digittoint.3 isalnum.3 isalpha.3 isascii.3 isblank.3 iscntrl.3 \ isdigit.3 isgraph.3 isideogram.3 islower.3 isphonogram.3 isprint.3 \ ispunct.3 isrune.3 isspace.3 isspecial.3 \ - isupper.3 iswalnum.3 isxdigit.3 localeconv.3 mblen.3 mbrlen.3 \ + isupper.3 iswalnum.3 iswalnum_l.3 isxdigit.3 \ + localeconv.3 mblen.3 mbrlen.3 \ mbrtowc.3 \ mbsinit.3 \ mbsrtowcs.3 mbstowcs.3 mbtowc.3 multibyte.3 \ @@ -53,6 +54,18 @@ MLINKS+=iswalnum.3 iswalpha.3 iswalnum.3 iswascii.3 iswalnum.3 iswblank.3 \ iswalnum.3 iswphonogram.3 iswalnum.3 iswprint.3 iswalnum.3 iswpunct.3 \ iswalnum.3 iswrune.3 iswalnum.3 iswspace.3 iswalnum.3 iswspecial.3 \ iswalnum.3 iswupper.3 iswalnum.3 iswxdigit.3 +MLINKS+=iswalnum_l.3 iswalpha_l.3 iswalnum_l.3 iswcntrl_l.3 \ + iswalnum_l.3 iswctype_l.3 iswalnum_l.3 iswdigit_l.3 \ + iswalnum_l.3 iswgraph_l.3 iswalnum_l.3 iswlower_l.3 \ + iswalnum_l.3 iswprint_l.3 iswalnum_l.3 iswpunct_l.3 \ + iswalnum_l.3 iswspace_l.3 iswalnum_l.3 iswupper_l.3 \ + iswalnum_l.3 iswxdigit_l.3 iswalnum_l.3 towlower_l.3 \ + iswalnum_l.3 towupper_l.3 iswalnum_l.3 wctype_l.3 \ + iswalnum_l.3 iswblank_l.3 iswalnum_l.3 iswhexnumber_l.3 \ + iswalnum_l.3 iswideogram_l.3 iswalnum_l.3 iswnumber_l.3 \ + iswalnum_l.3 iswphonogram_l.3 iswalnum_l.3 iswrune_l.3 \ + iswalnum_l.3 iswspecial_l.3 iswalnum_l.3 nextwctype_l.3 \ + iswalnum_l.3 towctrans_l.3 iswalnum_l.3 wctrans_l.3 MLINKS+=isxdigit.3 ishexnumber.3 MLINKS+=mbsrtowcs.3 mbsnrtowcs.3 MLINKS+=wcsrtombs.3 wcsnrtombs.3 diff --git a/lib/libc/locale/iswalnum_l.3 b/lib/libc/locale/iswalnum_l.3 new file mode 100644 index 0000000..679a2dd --- /dev/null +++ b/lib/libc/locale/iswalnum_l.3 @@ -0,0 +1,168 @@ +.\" Copyright (c) 2012 Isabell Long +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dt ISWALNUM_L 3 +.Dd July 25, 2012 +.Os +.Sh NAME +.Nm iswalnum_l , +.Nm iswalpha_l , +.Nm iswcntrl_l , +.Nm iswctype_l , +.Nm iswdigit_l , +.Nm iswgraph_l , +.Nm iswlower_l , +.Nm iswprint_l , +.Nm iswpunct_l , +.Nm iswspace_l , +.Nm iswupper_l , +.Nm iswxdigit_l , +.Nm towlower_l , +.Nm towupper_l , +.Nm wctype_l , +.Nm iswblank_l , +.Nm iswhexnumber_l , +.Nm iswideogram_l , +.Nm iswnumber_l , +.Nm iswphonogram_l , +.Nm iswrune_l , +.Nm iswspecial_l , +.Nm nextwctype_l , +.Nm towctrans_l , +.Nm wctrans_l +.Nd wide character classification utilities +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wctype.h +.Ft int +.Fn iswalnum_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswalpha_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswcntrl_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswctype_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswdigit_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswgraph_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswlower_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswprint_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswpunct_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswspace_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswupper_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswxdigit_l "wint_t wc" "locale_t loc" +.Ft wint_t +.Fn towlower_l "wint_t wc" "locale_t loc" +.Ft wint_t +.Fn towupper_l "wint_t wc" "locale_t loc" +.Ft wctype_t +.Fn wctype_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswblank_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswhexnumber_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswideogram_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswnumber_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswphonogram_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswrune_l "wint_t wc" "locale_t loc" +.Ft int +.Fn iswspecial_l "wint_t wc" "locale_t loc" +.Ft wint_t +.Fn nextwctype_l "wint_t wc" "locale_t loc" +.Ft wint_t +.Fn towctrans_l "wint_t wc" "wctrans_t" "locale_t loc" +.Ft wctrans_t +.Fn wctrans_l "const char *" "locale_t loc" +.Sh DESCRIPTION +The above functions are character classification utility functions, +for use with wide characters +.Vt ( wchar_t +or +.Vt wint_t ) +in the locale +.Fa loc . +They behave in the same way as the versions without the _l suffix, but use +the specified locale rather than the global or per-thread locale. +These functions may be implemented as inline functions in +.In wctype.h +and as functions in the C library. +See the specific manual pages for more information. +.Sh RETURN VALUES +These functions return the same things as their non-locale versions. +If the locale is invalid, their behaviors are undefined. +.Sh SEE ALSO +.Xr iswalnum 3 , +.Xr iswalpha 3 , +.Xr iswblank 3 , +.Xr iswcntrl 3 , +.Xr iswctype 3 , +.Xr iswdigit 3 , +.Xr iswgraph 3 , +.Xr iswhexnumber 3 , +.Xr iswideogram 3 , +.Xr iswlower 3 , +.Xr iswnumber 3 , +.Xr iswphonogram 3 , +.Xr iswprint 3 , +.Xr iswpunct 3 , +.Xr iswrune 3 , +.Xr iswspace 3 , +.Xr iswspecial 3 , +.Xr iswupper 3 , +.Xr iswxdigit 3 , +.Xr nextwctype 3 , +.Xr towctrans 3 , +.Xr towlower 3 , +.Xr towupper 3 , +.Xr wctrans 3 , +.Xr wctype 3 +.Sh STANDARDS +These functions conform to +.St -p1003.1-2008 , +except for +.Fn iswascii_l , +.Fn iswhexnumber_l , +.Fn iswideogram_l , +.Fn iswphonogram_l , +.Fn iswrune_l , +.Fn iswspecial_l +and +.Fn nextwctype_l +which are +.Fx +extensions. -- cgit v1.1 From 20f821c4653c4a0a7821eccf56ed51ea2fd835a8 Mon Sep 17 00:00:00 2001 From: kargl Date: Thu, 26 Jul 2012 03:50:24 +0000 Subject: Replace code that toggles between 53 and 64 bits on i386 class hardware with the ENTERI and RETURNI macros, which are now available in math_private.h. Suggested by: bde Approved by: das (mentor) --- lib/msun/src/s_cbrtl.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/msun/src/s_cbrtl.c b/lib/msun/src/s_cbrtl.c index 23c9184..95ab956 100644 --- a/lib/msun/src/s_cbrtl.c +++ b/lib/msun/src/s_cbrtl.c @@ -51,23 +51,12 @@ cbrtl(long double x) if (k == BIAS + LDBL_MAX_EXP) return (x + x); -#ifdef __i386__ - fp_prec_t oprec; - - oprec = fpgetprec(); - if (oprec != FP_PE) - fpsetprec(FP_PE); -#endif + ENTERI(); if (k == 0) { /* If x = +-0, then cbrt(x) = +-0. */ - if ((u.bits.manh | u.bits.manl) == 0) { -#ifdef __i386__ - if (oprec != FP_PE) - fpsetprec(oprec); -#endif - return (x); - } + if ((u.bits.manh | u.bits.manl) == 0) + RETURNI(x); /* Adjust subnormal numbers. */ u.e *= 0x1.0p514; k = u.bits.exp; @@ -149,9 +138,5 @@ cbrtl(long double x) t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ t *= v.e; -#ifdef __i386__ - if (oprec != FP_PE) - fpsetprec(oprec); -#endif - return (t); + RETURNI(t); } -- cgit v1.1 From da1349053f2cae9f35535b9a0d84c5bb437bcaed Mon Sep 17 00:00:00 2001 From: kargl Date: Thu, 26 Jul 2012 03:59:33 +0000 Subject: * ld80/expl.c: . Remove a few #ifdefs that should have been removed in the initial commit. . Sort fpmath.h to its rightful place. * ld128/s_expl.c: . Replace EXPMASK with its actual value. . Sort fpmath.h to its rightful place. Requested by: bde Approved by: das (mentor) --- lib/msun/ld128/s_expl.c | 5 ++--- lib/msun/ld80/s_expl.c | 6 +----- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index 624cb8d..1888ef8 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -29,12 +29,11 @@ __FBSDID("$FreeBSD$"); #include +#include "fpmath.h" #include "math.h" #include "math_private.h" -#include "fpmath.h" #define BIAS (LDBL_MAX_EXP - 1) -#define EXPMASK (BIAS + LDBL_MAX_EXP) static volatile const long double twom10000 = 0x1p-10000L, tiny = 0x1p-10000L; @@ -205,7 +204,7 @@ expl(long double x) /* Filter out exceptional cases. */ u.e = x; hx = u.xbits.expsign; - ix = hx & EXPMASK; + ix = hx & 0x7fff; if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { if (u.xbits.manh != 0 diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index d2faad2..e295473 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -45,13 +45,9 @@ __FBSDID("$FreeBSD$"); #include #endif +#include "fpmath.h" #include "math.h" -#define FPSETPREC -#ifdef NO_FPSETPREC -#undef FPSETPREC -#endif #include "math_private.h" -#include "fpmath.h" #define BIAS (LDBL_MAX_EXP - 1) -- cgit v1.1 From 0fee65786ad8b4344a79fa7da306e7c00289e111 Mon Sep 17 00:00:00 2001 From: kargl Date: Thu, 26 Jul 2012 04:05:08 +0000 Subject: Replace the macro name NUM with INTERVALS. This change provides compatibility with the INTERVALS macro used in the soon-to-be-commmitted expm1l() and someday-to-be-committed log*l() functions. Add a comment into ld128/s_expl.c noting at gcc issue that was deleted when rewriting ld80/e_expl.c as ld128/s_expl.c. Requested by: bde Approved by: das (mentor) --- lib/msun/ld128/s_expl.c | 9 +++++---- lib/msun/ld80/s_expl.c | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index 1888ef8..c03368e 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #define BIAS (LDBL_MAX_EXP - 1) +/* XXX Prevent gcc from erroneously constant folding this: */ static volatile const long double twom10000 = 0x1p-10000L, tiny = 0x1p-10000L; static const long double @@ -57,12 +58,12 @@ P9 = 2.75573192240103867817876199544468806e-6L, P10 = 2.75573236172670046201884000197885520e-7L, P11 = 2.50517544183909126492878226167697856e-8L; -#define NUM 128 +#define INTERVALS 128 static const struct { long double hi; long double lo; -} s[NUM] = { +} s[INTERVALS] = { 0x1p0L, 0x0p0L, 0x1.0163da9fb33356d84a66aep0L, 0x3.36dcdfa4003ec04c360be2404078p-92L, 0x1.02c9a3e778060ee6f7cacap0L, 0x4.f7a29bde93d70a2cabc5cb89ba10p-92L, @@ -226,8 +227,8 @@ expl(long double x) fn = x * INV_L + 0x1.8p112 - 0x1.8p112; n = (int)fn; - n2 = (unsigned)n % NUM; /* Tang's j. */ - k = (n - n2) / NUM; + n2 = (unsigned)n % INTERVALS; /* Tang's j. */ + k = (n - n2) / INTERVALS; r1 = x - fn * L1; r2 = -fn * L2; diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index e295473..7fb0d99 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$"); * in IEEE floating-point arithmetic," ACM Trans. Math. Soft., 15, * 144-157 (1989). * - * where the 32 table entries have been expanded to NUM (see below). + * where the 32 table entries have been expanded to INTERVALS (see below). */ #include @@ -65,9 +65,9 @@ u_threshold = LD80C(0xb21dfe7f09e2baa9, 13, 1, -11399.4985314888605581L); static const double __aligned(64) /* - * ln2/NUM = L1+L2 (hi+lo decomposition for multiplication). L1 must have - * at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(NUM)) lowest bits zero - * so that multiplication of it by n is exact. + * ln2/INTERVALS = L1+L2 (hi+lo decomposition for multiplication). L1 must + * have at least 22 (= log2(|LDBL_MIN_EXP-extras|) + log2(INTERVALS)) lowest + * bits zero so that multiplication of it by n is exact. */ L1 = 5.4152123484527692e-3, /* 0x162e42ff000000.0p-60 */ L2 = -3.2819649005320973e-13, /* -0x1718432a1b0e26.0p-94 */ @@ -75,7 +75,7 @@ INV_L = 1.8466496523378731e+2, /* 0x171547652b82fe.0p-45 */ /* * Domain [-0.002708, 0.002708], range ~[-5.7136e-24, 5.7110e-24]: * |exp(x) - p(x)| < 2**-77.2 - * (0.002708 is ln2/(2*NUM) rounded up a little). + * (0.002708 is ln2/(2*INTERVALS) rounded up a little). */ P2 = 0.5, P3 = 1.6666666666666119e-1, /* 0x15555555555490.0p-55 */ @@ -84,16 +84,16 @@ P5 = 8.3333354987869413e-3, /* 0x1111115b789919.0p-59 */ P6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ /* - * 2^(i/NUM) for i in [0,NUM] is represented by two values where the - * first 47 (?!) bits of the significand is stored in hi and the next 53 + * 2^(i/INTERVALS) for i in [0,INTERVALS] is represented by two values where + * the first 47 (?!) bits of the significand is stored in hi and the next 53 * bits are in lo. */ -#define NUM 128 +#define INTERVALS 128 static const struct { double hi; double lo; -} s[NUM] __aligned(16) = { +} s[INTERVALS] __aligned(16) = { 0x1p+0, 0x0p+0, 0x1.0163da9fb330p+0, 0x1.ab6c25335719bp-47, 0x1.02c9a3e77804p+0, 0x1.07737be56527cp-47, @@ -265,8 +265,8 @@ expl(long double x) #else n = (int)fn; #endif - n2 = (unsigned)n % NUM; /* Tang's j. */ - k = (n - n2) / NUM; + n2 = (unsigned)n % INTERVALS; /* Tang's j. */ + k = (n - n2) / INTERVALS; r1 = x - fn * L1; r2 = -fn * L2; -- cgit v1.1 From 99f0d16551073e4c685d980e14eaaf27ce5754be Mon Sep 17 00:00:00 2001 From: pluknet Date: Thu, 26 Jul 2012 12:04:11 +0000 Subject: Update the 'C1x draft' reference to '.St -isoC-2011' mdoc macro. Reviewed by: theraven MFC after: 1 week --- lib/libc/stdlib/at_quick_exit.3 | 3 ++- lib/libc/stdlib/quick_exit.3 | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/at_quick_exit.3 b/lib/libc/stdlib/at_quick_exit.3 index a75c0c0..c430154 100644 --- a/lib/libc/stdlib/at_quick_exit.3 +++ b/lib/libc/stdlib/at_quick_exit.3 @@ -58,4 +58,5 @@ function returns the value 0 if successful and a non-zero value on failure. .Sh STANDARDS The .Fn at_quick_exit -function conforms to the C1x draft specification. +function conforms to +.St -isoC-2011 . diff --git a/lib/libc/stdlib/quick_exit.3 b/lib/libc/stdlib/quick_exit.3 index 06fde8b..f2ea379 100644 --- a/lib/libc/stdlib/quick_exit.3 +++ b/lib/libc/stdlib/quick_exit.3 @@ -54,4 +54,5 @@ function does not return. .Sh STANDARDS The .Fn quick_exit -function conforms to the C1x draft specification. +function conforms to +.St -isoC-2011 . -- cgit v1.1 From dcf3898747d71456dfeea167cdaa723c9368ba2f Mon Sep 17 00:00:00 2001 From: joel Date: Thu, 26 Jul 2012 14:46:19 +0000 Subject: Start manpage with Dd macro and also remove a trailing whitespace while here. --- lib/libc/locale/iswalnum_l.3 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/locale/iswalnum_l.3 b/lib/libc/locale/iswalnum_l.3 index 679a2dd..21ee48f 100644 --- a/lib/libc/locale/iswalnum_l.3 +++ b/lib/libc/locale/iswalnum_l.3 @@ -24,8 +24,8 @@ .\" .\" $FreeBSD$ .\" -.Dt ISWALNUM_L 3 .Dd July 25, 2012 +.Dt ISWALNUM_L 3 .Os .Sh NAME .Nm iswalnum_l , @@ -161,7 +161,7 @@ except for .Fn iswphonogram_l , .Fn iswrune_l , .Fn iswspecial_l -and +and .Fn nextwctype_l which are .Fx -- cgit v1.1 From b54693d6988f72c068a2bf4fe22d787b5ffe2b88 Mon Sep 17 00:00:00 2001 From: pfg Date: Thu, 26 Jul 2012 15:48:07 +0000 Subject: Drop non-portable libedit's el_data_set() and el_data_get() for private data. We can set/get private data with the documented el_get() and el_set() so there's no need for our local extensions, which never received much use anyway. While here, also re-arrange the call to term_init_arrow. This was left over from r89735 but is not required anymore. This changes reduce differences against NetBSD's libedit. MFC after: 2 months --- lib/libedit/el.c | 24 ------------------------ lib/libedit/histedit.h | 7 ------- lib/libedit/term.c | 2 +- 3 files changed, 1 insertion(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/libedit/el.c b/lib/libedit/el.c index 8dcd698..d6cfb2d 100644 --- a/lib/libedit/el.c +++ b/lib/libedit/el.c @@ -473,30 +473,6 @@ el_get(EditLine *el, int op, ...) return (rv); } -/* el_data_get(): - * Set user private data. - */ -public void -el_data_set (el, data) - EditLine *el; - void *data; -{ - el->el_data = data; - - return; -} - -/* el_data_get(): - * Return user private data. - */ -public void * -el_data_get (el) - EditLine *el; -{ - if (el->el_data) - return (el->el_data); - return (NULL); -} /* el_line(): * Return editing info diff --git a/lib/libedit/histedit.h b/lib/libedit/histedit.h index 24af1d5..8a6caf9 100644 --- a/lib/libedit/histedit.h +++ b/lib/libedit/histedit.h @@ -154,13 +154,6 @@ int el_source(EditLine *, const char *); */ void el_resize(EditLine *); - -/* - * Set user private data. - */ -void el_data_set(EditLine *, void *); -void * el_data_get(EditLine *); - /* * User-defined function interface. */ diff --git a/lib/libedit/term.c b/lib/libedit/term.c index 4899193..e526484 100644 --- a/lib/libedit/term.c +++ b/lib/libedit/term.c @@ -340,8 +340,8 @@ term_init(EditLine *el) if (el->el_term.t_val == NULL) return (-1); (void) memset(el->el_term.t_val, 0, T_val * sizeof(int)); - term_init_arrow(el); (void) term_set(el, NULL); + term_init_arrow(el); return (0); } -- cgit v1.1 From 4e7839007ab707452aab4262d6fee9846bb28767 Mon Sep 17 00:00:00 2001 From: kib Date: Fri, 27 Jul 2012 10:41:53 +0000 Subject: Document F_DUP2FD_CLOEXEC. MFC after: 1 week --- lib/libc/sys/fcntl.2 | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2 index 0844e87..63d6510 100644 --- a/lib/libc/sys/fcntl.2 +++ b/lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd July 18, 2012 +.Dd July 27, 2012 .Dt FCNTL 2 .Os .Sh NAME @@ -54,7 +54,7 @@ Depending on the value of .Fn fcntl can take an additional third argument .Fa "int arg" . -.Bl -tag -width F_DUPFD_CLOEXEC +.Bl -tag -width F_DUP2FD_CLOEXEC .It Dv F_DUPFD Return a new descriptor as follows: .Pp @@ -94,13 +94,23 @@ It is functionally equivalent to .Bd -literal -offset indent dup2(fd, arg) .Ed +.It Dv F_DU2PFD_CLOEXEC +Like +.Dv F_DUP2FD , +but the +.Dv FD_CLOEXEC +flag associated with the new file descriptor is set. .Pp The .Dv F_DUP2FD -constant is not portable, so it should not be used if portability is needed. +and +.Dv F_DUP2DF_CLOEXEC +constants are not portable, so they should not be used if +portability is needed. Use .Fn dup2 -instead. +instead of +.Dv F_DUP2FD . .It Dv F_GETFD Get the close-on-exec flag associated with the file descriptor .Fa fd -- cgit v1.1 From 6856a4229035b7ffb57cf926225c8029989e0625 Mon Sep 17 00:00:00 2001 From: emaste Date: Fri, 27 Jul 2012 21:38:14 +0000 Subject: Correct BUGS description of static buffer use Since r142667 strerror has unconditionally returned a pointer to a static buffer. MFC after: 1 week --- lib/libc/string/strerror.3 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/string/strerror.3 b/lib/libc/string/strerror.3 index 5961a84..cc25602 100644 --- a/lib/libc/string/strerror.3 +++ b/lib/libc/string/strerror.3 @@ -174,10 +174,10 @@ function was implemented in by .An Wes Peters Aq wes@FreeBSD.org . .Sh BUGS -For unknown error numbers, the +The .Fn strerror -function will return its result in a static buffer which -may be overwritten by subsequent calls. +function returns its result in a static buffer which +will be overwritten by subsequent calls. .Pp The return type for .Fn strerror -- cgit v1.1 From d97d8a1634024b638502d49688fb426fe4e091df Mon Sep 17 00:00:00 2001 From: mm Date: Sun, 29 Jul 2012 06:34:45 +0000 Subject: Catch up config_freebsd.h with libarchive 3.0.4 --- lib/libarchive/config_freebsd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/libarchive/config_freebsd.h b/lib/libarchive/config_freebsd.h index 0a39aa6..8ee70a6 100644 --- a/lib/libarchive/config_freebsd.h +++ b/lib/libarchive/config_freebsd.h @@ -129,7 +129,6 @@ #define HAVE_LSTAT 1 #define HAVE_LUTIMES 1 #define HAVE_MBRTOWC 1 -#define HAVE_MBSNRTOWCS 1 #define HAVE_MEMMOVE 1 #define HAVE_MEMORY_H 1 #define HAVE_MEMSET 1 @@ -206,7 +205,6 @@ #define HAVE_WCSCMP 1 #define HAVE_WCSCPY 1 #define HAVE_WCSLEN 1 -#define HAVE_WCSNRTOMBS 1 #define HAVE_WCTOMB 1 #define HAVE_WCTYPE_H 1 #define HAVE_WMEMCMP 1 -- cgit v1.1 From 18c77ad45409e71e62e91d336526a6254a64a1c9 Mon Sep 17 00:00:00 2001 From: mm Date: Mon, 30 Jul 2012 14:47:35 +0000 Subject: Backport NFSv4 ACL fix from libarchive master branch. Source: https://github.com/libarchive/libarchive/commit/f67370d5 Obtained from: libarchive (master branch) --- lib/libarchive/Makefile | 1 + lib/libarchive/config_freebsd.h | 2 ++ lib/libarchive/test/Makefile | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libarchive/Makefile b/lib/libarchive/Makefile index f2ba2f8..29508ad 100644 --- a/lib/libarchive/Makefile +++ b/lib/libarchive/Makefile @@ -110,6 +110,7 @@ SRCS= archive_acl.c \ archive_virtual.c \ archive_write.c \ archive_write_add_filter.c \ + archive_write_disk_acl.c \ archive_write_disk_set_standard_lookup.c \ archive_write_disk_posix.c \ archive_write_open_fd.c \ diff --git a/lib/libarchive/config_freebsd.h b/lib/libarchive/config_freebsd.h index 8ee70a6..214ffd1 100644 --- a/lib/libarchive/config_freebsd.h +++ b/lib/libarchive/config_freebsd.h @@ -31,10 +31,12 @@ #define HAVE_ACL_GET_LINK_NP 1 #define HAVE_ACL_GET_PERM_NP 1 #define HAVE_ACL_INIT 1 +#define HAVE_ACL_IS_TRIVIAL_NP 1 #define HAVE_ACL_PERMSET_T 1 #define HAVE_ACL_SET_FD 1 #define HAVE_ACL_SET_FD_NP 1 #define HAVE_ACL_SET_FILE 1 +#define HAVE_ACL_SET_LINK_NP 1 #define HAVE_ACL_USER 1 #define HAVE_EXTATTR_GET_FILE 1 #define HAVE_EXTATTR_LIST_FILE 1 diff --git a/lib/libarchive/test/Makefile b/lib/libarchive/test/Makefile index ce039be..5e7c7a4 100644 --- a/lib/libarchive/test/Makefile +++ b/lib/libarchive/test/Makefile @@ -19,7 +19,8 @@ CFLAGS+= -DHAVE_LIBLZMA=1 -DHAVE_LZMA_H=1 .PATH: ${LIBARCHIVEDIR}/libarchive/test TESTS= \ - test_acl_freebsd.c \ + test_acl_freebsd_nfs4.c \ + test_acl_freebsd_posix1e.c \ test_acl_nfs4.c \ test_acl_pax.c \ test_acl_posix1e.c \ -- cgit v1.1 From 6a683d47fc64b7528b27294edebfc2000aebb8e6 Mon Sep 17 00:00:00 2001 From: issyl0 Date: Mon, 30 Jul 2012 20:56:19 +0000 Subject: Add more locale-specific functions to the relevant man pages and Makefile: - lib/libc/locale/islower.3 - lib/libc/locale/ispunct.3 - lib/libc/locale/nl_langinfo.3 - lib/libc/locale/isgraph.3 - lib/libc/locale/isspace.3 Reviewed by: bz Approved by: theraven MFC after: 5 days --- lib/libc/locale/Makefile.inc | 5 +++++ lib/libc/locale/isgraph.3 | 20 +++++++++++++++++--- lib/libc/locale/islower.3 | 19 ++++++++++++++++--- lib/libc/locale/ispunct.3 | 20 +++++++++++++++++--- lib/libc/locale/isspace.3 | 20 +++++++++++++++++--- lib/libc/locale/nl_langinfo.3 | 16 ++++++++++++++-- 6 files changed, 86 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index 8f3c4c6..f2161be 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -47,6 +47,11 @@ MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf8.5 MLINKS+=btowc.3 wctob.3 MLINKS+=isdigit.3 isnumber.3 +MLINKS+=isgraph.3 isgraph_l.3 +MLINKS+=islower.3 islower_l.3 +MLINKS+=ispunct.3 ispunct_l.3 +MLINKS+=isspace.3 isspace_l.3 +MLINKS+=nl_langinfo.3 nl_langinfo_l.3 MLINKS+=iswalnum.3 iswalpha.3 iswalnum.3 iswascii.3 iswalnum.3 iswblank.3 \ iswalnum.3 iswcntrl.3 iswalnum.3 iswdigit.3 iswalnum.3 iswgraph.3 \ iswalnum.3 iswhexnumber.3 \ diff --git a/lib/libc/locale/isgraph.3 b/lib/libc/locale/isgraph.3 index bedf31e..95a8319 100644 --- a/lib/libc/locale/isgraph.3 +++ b/lib/libc/locale/isgraph.3 @@ -32,7 +32,7 @@ .\" @(#)isgraph.3 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd July 17, 2005 +.Dd July 30, 2012 .Dt ISGRAPH 3 .Os .Sh NAME @@ -44,6 +44,8 @@ .In ctype.h .Ft int .Fn isgraph "int c" +.Ft int +.Fn isgraph_l "int c" "locale_t loc" .Sh DESCRIPTION The .Fn isgraph @@ -79,11 +81,19 @@ In the ASCII character set, this includes the following characters .It "\&166\ ``v''" Ta "167\ ``w''" Ta "170\ ``x''" Ta "171\ ``y''" Ta "172\ ``z''" .It "\&173\ ``{''" Ta "174\ ``|''" Ta "175\ ``}''" Ta "176\ ``~''" Ta \& .El +.Pp +The +.Fn isgraph_l +function takes an explicit locale argument, whereas the +.Fn isgraph +function uses the current global or per-thread locale. .Sh RETURN VALUES The .Fn isgraph -function returns zero if the character tests false and -returns non-zero if the character tests true. +and +.Fn isgraph_l +functions return zero if the character tests false and +return non-zero if the character tests true. .Sh COMPATIBILITY The .Bx 4.4 @@ -103,3 +113,7 @@ The .Fn isgraph function conforms to .St -isoC . +The +.Fn isgraph_l +function conforms to +.St -p1003.1-2008 . diff --git a/lib/libc/locale/islower.3 b/lib/libc/locale/islower.3 index 987ff59..14c3b5f 100644 --- a/lib/libc/locale/islower.3 +++ b/lib/libc/locale/islower.3 @@ -32,7 +32,7 @@ .\" @(#)islower.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd July 17, 2005 +.Dd July 30, 2012 .Dt ISLOWER 3 .Os .Sh NAME @@ -44,6 +44,8 @@ .In ctype.h .Ft int .Fn islower "int c" +.Ft int +.Fn islower_l "int c" "locale_t loc" .Sh DESCRIPTION The .Fn islower @@ -63,11 +65,18 @@ In the ASCII character set, this includes the following characters .It "\&165\ ``u''" Ta "166\ ``v''" Ta "167\ ``w''" Ta "170\ ``x''" Ta "171\ ``y''" .It "\&172\ ``z''" Ta \& Ta \& Ta \& Ta \& .El +The +.Fn islower_l +function takes an explicit locale argument, whereas the +.Fn islower +function uses the current global or per-thread locale. .Sh RETURN VALUES The .Fn islower -function returns zero if the character tests false and -returns non-zero if the character tests true. +and +.Fn islower_l +functions return zero if the character tests false and +return non-zero if the character tests true. .Sh COMPATIBILITY The .Bx 4.4 @@ -88,3 +97,7 @@ The .Fn islower function conforms to .St -isoC . +The +.Fn islower_l +function conforms to +.St -p1003.1-2008 . diff --git a/lib/libc/locale/ispunct.3 b/lib/libc/locale/ispunct.3 index 8fcfe54..c06b25d 100644 --- a/lib/libc/locale/ispunct.3 +++ b/lib/libc/locale/ispunct.3 @@ -32,7 +32,7 @@ .\" @(#)ispunct.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd July 17, 2005 +.Dd July 30, 2012 .Dt ISPUNCT 3 .Os .Sh NAME @@ -44,6 +44,8 @@ .In ctype.h .Ft int .Fn ispunct "int c" +.Ft int +.Fn ispunct_l "int c" "locale_t loc" .Sh DESCRIPTION The .Fn ispunct @@ -69,11 +71,19 @@ In the ASCII character set, this includes the following characters .It "\&136\ ``^''" Ta "137\ ``_''" Ta "140\ ```''" Ta "173\ ``{''" Ta "174\ ``|''" .It "\&175\ ``}''" Ta "176\ ``~''" Ta \& Ta \& Ta \& .El +.Pp +The +.Fn ispunct_l +function takes an explicit locale argument, whereas the +.Fn ispunct +function uses the current global or per-thread locale. .Sh RETURN VALUES The .Fn ispunct -function returns zero if the character tests false and -returns non-zero if the character tests true. +and +.Fn ispunct_l +functions return zero if the character tests false and +return non-zero if the character tests true. .Sh COMPATIBILITY The .Bx 4.4 @@ -93,3 +103,7 @@ The .Fn ispunct function conforms to .St -isoC . +The +.Fn ispunct_l +function conforms to +.St -p1003.1-2008 . diff --git a/lib/libc/locale/isspace.3 b/lib/libc/locale/isspace.3 index 6dfeb5f..5dfd1b6 100644 --- a/lib/libc/locale/isspace.3 +++ b/lib/libc/locale/isspace.3 @@ -32,7 +32,7 @@ .\" @(#)isspace.3 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd July 17, 2005 +.Dd July 30, 2012 .Dt ISSPACE 3 .Os .Sh NAME @@ -44,6 +44,8 @@ .In ctype.h .Ft int .Fn isspace "int c" +.Ft int +.Fn isspace_l "int c" "locale_t loc" .Sh DESCRIPTION The .Fn isspace @@ -60,11 +62,19 @@ The value of the argument must be representable as an .Vt "unsigned char" or the value of .Dv EOF . +.Pp +The +.Fn isspace_l +function takes an explicit locale argument, whereas the +.Fn isspace +function uses the current global or per-thread locale. .Sh RETURN VALUES The .Fn isspace -function returns zero if the character tests false and -returns non-zero if the character tests true. +and +.Fn isspace_l +functions return zero if the character tests false and +return non-zero if the character tests true. .Sh COMPATIBILITY The .Bx 4.4 @@ -85,3 +95,7 @@ The .Fn isspace function conforms to .St -isoC . +The +.Fn isspace_l +function conforms to +.St -p1003.1-2008 . diff --git a/lib/libc/locale/nl_langinfo.3 b/lib/libc/locale/nl_langinfo.3 index 789cac2..d8c01b2 100644 --- a/lib/libc/locale/nl_langinfo.3 +++ b/lib/libc/locale/nl_langinfo.3 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 3, 2001 +.Dd July 30, 2012 .Dt NL_LANGINFO 3 .Os .Sh NAME @@ -36,11 +36,16 @@ .In langinfo.h .Ft char * .Fn nl_langinfo "nl_item item" +.Ft char * +.Fn nl_langinfo_l "nl_item item" "locale_t loc" .Sh DESCRIPTION The .Fn nl_langinfo function returns a pointer to a string containing information relevant to -the particular language or cultural area defined in the program's locale. +the particular language or cultural area defined in the program or thread's +locale, or in the case of +.Fn nl_langinfo_l , +the locale passed as the second argument. The manifest constant names and values of .Fa item are defined in @@ -60,6 +65,9 @@ In a locale where langinfo data is not defined, returns a pointer to the corresponding string in the .Tn POSIX locale. +.Fn nl_langinfo_l +returns the same values as +.Fn nl_langinfo . In all locales, .Fn nl_langinfo returns a pointer to an empty string if @@ -83,6 +91,10 @@ The .Fn nl_langinfo function conforms to .St -susv2 . +The +.Fn nl_langinfo_l +function conforms to +.St -p1003.1-2008 . .Sh HISTORY The .Fn nl_langinfo -- cgit v1.1 From 84ed784b36ca5c56410485c6fcfc66e2d37a82d1 Mon Sep 17 00:00:00 2001 From: joel Date: Mon, 30 Jul 2012 21:02:44 +0000 Subject: Remove trailing whitespace. --- lib/libc/locale/islower.3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libc/locale/islower.3 b/lib/libc/locale/islower.3 index 14c3b5f..0b340da 100644 --- a/lib/libc/locale/islower.3 +++ b/lib/libc/locale/islower.3 @@ -69,7 +69,7 @@ The .Fn islower_l function takes an explicit locale argument, whereas the .Fn islower -function uses the current global or per-thread locale. +function uses the current global or per-thread locale. .Sh RETURN VALUES The .Fn islower -- cgit v1.1 From 0aadb27b4102ebb037ecde41aaddbd8e1db8e2dd Mon Sep 17 00:00:00 2001 From: kargl Date: Mon, 30 Jul 2012 21:55:49 +0000 Subject: Whitespace. Submitted by: bde Approved by: das (pre-approved) --- lib/msun/ld128/s_expl.c | 4 ++-- lib/msun/ld80/s_expl.c | 2 +- lib/msun/src/s_cbrtl.c | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/msun/ld128/s_expl.c b/lib/msun/ld128/s_expl.c index c03368e..7594486 100644 --- a/lib/msun/ld128/s_expl.c +++ b/lib/msun/ld128/s_expl.c @@ -58,7 +58,7 @@ P9 = 2.75573192240103867817876199544468806e-6L, P10 = 2.75573236172670046201884000197885520e-7L, P11 = 2.50517544183909126492878226167697856e-8L; -#define INTERVALS 128 +#define INTERVALS 128 static const struct { long double hi; @@ -205,7 +205,7 @@ expl(long double x) /* Filter out exceptional cases. */ u.e = x; hx = u.xbits.expsign; - ix = hx & 0x7fff; + ix = hx & 0x7fff; if (ix >= BIAS + 13) { /* |x| >= 8192 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { if (u.xbits.manh != 0 diff --git a/lib/msun/ld80/s_expl.c b/lib/msun/ld80/s_expl.c index 7fb0d99..8b26d5b 100644 --- a/lib/msun/ld80/s_expl.c +++ b/lib/msun/ld80/s_expl.c @@ -88,7 +88,7 @@ P6 = 1.3888891738560272e-3; /* 0x16c16c651633ae.0p-62 */ * the first 47 (?!) bits of the significand is stored in hi and the next 53 * bits are in lo. */ -#define INTERVALS 128 +#define INTERVALS 128 static const struct { double hi; diff --git a/lib/msun/src/s_cbrtl.c b/lib/msun/src/s_cbrtl.c index 95ab956..3203d96 100644 --- a/lib/msun/src/s_cbrtl.c +++ b/lib/msun/src/s_cbrtl.c @@ -52,7 +52,6 @@ cbrtl(long double x) return (x + x); ENTERI(); - if (k == 0) { /* If x = +-0, then cbrt(x) = +-0. */ if ((u.bits.manh | u.bits.manl) == 0) -- cgit v1.1 From 42e27d2ed6ba1e01db7f82b00e5cb967b522129c Mon Sep 17 00:00:00 2001 From: kargl Date: Mon, 30 Jul 2012 21:58:28 +0000 Subject: ieeefp.h is only needed on i386 class hardware. Submitted by: bde Approved by: das (pre-approved) --- lib/msun/src/s_cbrtl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/msun/src/s_cbrtl.c b/lib/msun/src/s_cbrtl.c index 3203d96..2236c0f 100644 --- a/lib/msun/src/s_cbrtl.c +++ b/lib/msun/src/s_cbrtl.c @@ -18,7 +18,9 @@ __FBSDID("$FreeBSD$"); #include +#ifdef __i386__ #include +#endif #include "fpmath.h" #include "math.h" -- cgit v1.1 From dc040356951ccf5c9e9fab8c348e87e5dcc818c1 Mon Sep 17 00:00:00 2001 From: delphij Date: Wed, 1 Aug 2012 00:21:55 +0000 Subject: Use calloc(). --- lib/libc/gen/fts.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/fts.c b/lib/libc/gen/fts.c index 153b8da..afaa057 100644 --- a/lib/libc/gen/fts.c +++ b/lib/libc/gen/fts.c @@ -134,9 +134,8 @@ fts_open(argv, options, compar) } /* Allocate/initialize the stream. */ - if ((priv = malloc(sizeof(*priv))) == NULL) + if ((priv = calloc(1, sizeof(*priv))) == NULL) return (NULL); - memset(priv, 0, sizeof(*priv)); sp = &priv->ftsp_fts; sp->fts_compar = compar; sp->fts_options = options; -- cgit v1.1 From 6bdab82e0ad7726e705a69f2d0d5987cf035484b Mon Sep 17 00:00:00 2001 From: dfr Date: Sun, 5 Aug 2012 13:40:35 +0000 Subject: Add an option for pam_krb5 to allow it to authenticate users which don't have a local account. PR: 76678 Submitted by: daved at tamu.edu MFC after: 2 weeks --- lib/libpam/modules/pam_krb5/pam_krb5.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libpam/modules/pam_krb5/pam_krb5.c b/lib/libpam/modules/pam_krb5/pam_krb5.c index 602718f..3d5105d 100644 --- a/lib/libpam/modules/pam_krb5/pam_krb5.c +++ b/lib/libpam/modules/pam_krb5/pam_krb5.c @@ -91,6 +91,7 @@ static void compat_free_data_contents(krb5_context, krb5_data *); #define PAM_OPT_NO_CCACHE "no_ccache" #define PAM_OPT_NO_USER_CHECK "no_user_check" #define PAM_OPT_REUSE_CCACHE "reuse_ccache" +#define PAM_OPT_NO_USER_CHECK "no_user_check" #define PAM_LOG_KRB5_ERR(ctx, rv, fmt, ...) \ do { \ @@ -218,10 +219,12 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags __unused, PAM_LOG("PAM_USER Redone"); } - pwd = getpwnam(user); - if (pwd == NULL) { - retval = PAM_USER_UNKNOWN; - goto cleanup2; + if (!openpam_get_option(pamh, PAM_OPT_NO_USER_CHECK)) { + pwd = getpwnam(user); + if (pwd == NULL) { + retval = PAM_USER_UNKNOWN; + goto cleanup2; + } } PAM_LOG("Done getpwnam()"); -- cgit v1.1 From 74a518dd3c33db9ca48c1cb01d6e0a71528e7845 Mon Sep 17 00:00:00 2001 From: dim Date: Mon, 6 Aug 2012 18:40:14 +0000 Subject: Fix two instances in pam_krb5(8), where the variable 'princ_name' could be used uninitialized. Found by: clang 3.2 Reviewed by: des MFC after: 1 week --- lib/libpam/modules/pam_krb5/pam_krb5.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/libpam/modules/pam_krb5/pam_krb5.c b/lib/libpam/modules/pam_krb5/pam_krb5.c index 3d5105d..e547a3a 100644 --- a/lib/libpam/modules/pam_krb5/pam_krb5.c +++ b/lib/libpam/modules/pam_krb5/pam_krb5.c @@ -338,11 +338,11 @@ cleanup: PAM_LOG("Done cleanup"); cleanup2: krb5_free_principal(pam_context, princ); - PAM_LOG("Done cleanup2"); -cleanup3: if (princ_name) free(princ_name); + PAM_LOG("Done cleanup2"); +cleanup3: krb5_free_context(pam_context); PAM_LOG("Done cleanup3"); @@ -805,11 +805,11 @@ cleanup: PAM_LOG("Done cleanup"); cleanup2: krb5_free_principal(pam_context, princ); - PAM_LOG("Done cleanup2"); -cleanup3: if (princ_name) free(princ_name); + PAM_LOG("Done cleanup2"); +cleanup3: krb5_free_context(pam_context); PAM_LOG("Done cleanup3"); -- cgit v1.1 From e55724fcb2f630d9fabf2eb0f3d6e1b1aaf99381 Mon Sep 17 00:00:00 2001 From: dim Date: Mon, 6 Aug 2012 18:44:59 +0000 Subject: Fix an instance in pam_krb5(8), where the variable 'user' could be used uninitialized. Found by: clang 3.2 Reviewed by: des MFC after: 1 week --- lib/libpam/modules/pam_unix/pam_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libpam/modules/pam_unix/pam_unix.c b/lib/libpam/modules/pam_unix/pam_unix.c index 415004a..5881ecf 100644 --- a/lib/libpam/modules/pam_unix/pam_unix.c +++ b/lib/libpam/modules/pam_unix/pam_unix.c @@ -94,13 +94,13 @@ pam_sm_authenticate(pam_handle_t *pamh, int flags __unused, const char *pass, *user, *realpw, *prompt; if (openpam_get_option(pamh, PAM_OPT_AUTH_AS_SELF)) { - pwd = getpwnam(getlogin()); + user = getlogin(); } else { retval = pam_get_user(pamh, &user, NULL); if (retval != PAM_SUCCESS) return (retval); - pwd = getpwnam(user); } + pwd = getpwnam(user); PAM_LOG("Got user: %s", user); -- cgit v1.1 From 05000e5bd1001071522b669bef4e6844d1453f80 Mon Sep 17 00:00:00 2001 From: jilles Date: Thu, 9 Aug 2012 15:04:06 +0000 Subject: nftw(): Do not check the maxfds argument against OPEN_MAX. Apart from the fact that nothing should have OPEN_MAX as a limit (as opposed to RLIMIT_NOFILE from getrlimit() or _SC_OPEN_MAX from sysconf()), POSIX does not require us to check this. PR: 95239 Submitted by: Todd Miller --- lib/libc/gen/nftw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/nftw.c b/lib/libc/gen/nftw.c index 43110c1..e14d09e 100644 --- a/lib/libc/gen/nftw.c +++ b/lib/libc/gen/nftw.c @@ -34,7 +34,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include int nftw(const char *path, int (*fn)(const char *, const struct stat *, int, @@ -47,7 +46,7 @@ nftw(const char *path, int (*fn)(const char *, const struct stat *, int, int error = 0, ftsflags, fnflag, postorder, sverrno; /* XXX - nfds is currently unused */ - if (nfds < 1 || nfds > OPEN_MAX) { + if (nfds < 1) { errno = EINVAL; return (-1); } -- cgit v1.1 From 01c6b906beb3f49fe7d799ffaa172f7b9d686595 Mon Sep 17 00:00:00 2001 From: jilles Date: Thu, 9 Aug 2012 15:11:38 +0000 Subject: ftw(): Do not check the maxfds argument against OPEN_MAX. Apart from the fact that nothing should have OPEN_MAX as a limit (as opposed to RLIMIT_NOFILE from getrlimit() or _SC_OPEN_MAX from sysconf()), POSIX does not require us to check this. POSIX does have a requirement on the application that maxfds not exceed {OPEN_MAX}, but does not require the implementation to check it ("may fail"). PR: 95239 --- lib/libc/gen/ftw.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/ftw.c b/lib/libc/gen/ftw.c index bfaf121..253a295 100644 --- a/lib/libc/gen/ftw.c +++ b/lib/libc/gen/ftw.c @@ -28,7 +28,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include int ftw(const char *path, int (*fn)(const char *, const struct stat *, int), @@ -40,7 +39,7 @@ ftw(const char *path, int (*fn)(const char *, const struct stat *, int), int error = 0, fnflag, sverrno; /* XXX - nfds is currently unused */ - if (nfds < 1 || nfds > OPEN_MAX) { + if (nfds < 1) { errno = EINVAL; return (-1); } -- cgit v1.1 From 278be30294f435b7b5e07d641af9c4c6b7dc3b95 Mon Sep 17 00:00:00 2001 From: delphij Date: Thu, 9 Aug 2012 19:22:54 +0000 Subject: Refresh with OpenBSD RCS ID changes to reflect that we now have essentionally the same file. --- lib/libc/gen/nftw.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/nftw.c b/lib/libc/gen/nftw.c index e14d09e..5bca2de 100644 --- a/lib/libc/gen/nftw.c +++ b/lib/libc/gen/nftw.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nftw.c,v 1.4 2004/07/07 16:05:23 millert Exp $ */ +/* $OpenBSD: nftw.c,v 1.7 2006/03/31 19:41:44 millert Exp $ */ /* * Copyright (c) 2003, 2004 Todd C. Miller @@ -20,12 +20,6 @@ * Materiel Command, USAF, under agreement number F39502-99-1-0512. */ -#if 0 -#if defined(LIBC_SCCS) && !defined(lint) -static const char rcsid[] = "$OpenBSD: nftw.c,v 1.4 2004/07/07 16:05:23 millert Exp $"; -#endif /* LIBC_SCCS and not lint */ -#endif - #include __FBSDID("$FreeBSD$"); -- cgit v1.1 From 4d961f95f8cad07145afdb640f32972b8b61fbaa Mon Sep 17 00:00:00 2001 From: jilles Date: Thu, 9 Aug 2012 22:05:40 +0000 Subject: nftw(): POSIX says directories causing loops should be silently skipped. Formerly, loops caused nftw() to abort the traversal with ELOOP. --- lib/libc/gen/nftw.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/libc/gen/nftw.c b/lib/libc/gen/nftw.c index 5bca2de..a338e5a 100644 --- a/lib/libc/gen/nftw.c +++ b/lib/libc/gen/nftw.c @@ -65,6 +65,8 @@ nftw(const char *path, int (*fn)(const char *, const struct stat *, int, continue; fnflag = FTW_D; break; + case FTS_DC: + continue; case FTS_DNR: fnflag = FTW_DNR; break; @@ -87,9 +89,6 @@ nftw(const char *path, int (*fn)(const char *, const struct stat *, int, case FTS_SLNONE: fnflag = FTW_SLN; break; - case FTS_DC: - errno = ELOOP; - /* FALLTHROUGH */ default: error = -1; goto done; -- cgit v1.1 From d3224b8ca5f865af7b35f0443b1128ba4fa6f1b4 Mon Sep 17 00:00:00 2001 From: dim Date: Sat, 11 Aug 2012 11:13:48 +0000 Subject: Change a few extern inline functions in libm to static inline, since they need to refer to static constants, which C99 does not allow for extern inline functions. While here, change a comment in e_rem_pio2f.c to mention the correct number of bits. Reviewed by: bde MFC after: 1 week --- lib/msun/src/e_rem_pio2.c | 6 +++--- lib/msun/src/e_rem_pio2f.c | 8 ++++---- lib/msun/src/k_cosf.c | 6 +++--- lib/msun/src/k_sinf.c | 6 +++--- lib/msun/src/k_tanf.c | 6 +++--- lib/msun/src/math_private.h | 19 +++++++------------ 6 files changed, 23 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c index fde9660..6dd453a 100644 --- a/lib/msun/src/e_rem_pio2.c +++ b/lib/msun/src/e_rem_pio2.c @@ -48,10 +48,10 @@ pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ -#ifndef INLINE_REM_PIO2 -extern +#ifdef INLINE_REM_PIO2 +static __inline #endif -__inline int +int __ieee754_rem_pio2(double x, double *y) { double z,w,t,r,fn; diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c index fb608d1..bd12186 100644 --- a/lib/msun/src/e_rem_pio2f.c +++ b/lib/msun/src/e_rem_pio2f.c @@ -31,7 +31,7 @@ __FBSDID("$FreeBSD$"); /* * invpio2: 53 bits of 2/pi - * pio2_1: first 33 bit of pi/2 + * pio2_1: first 25 bits of pi/2 * pio2_1t: pi/2 - pio2_1 */ @@ -40,10 +40,10 @@ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ -#ifndef INLINE_REM_PIO2F -extern +#ifdef INLINE_REM_PIO2F +static __inline #endif -__inline int +int __ieee754_rem_pio2f(float x, double *y) { double w,r,fn; diff --git a/lib/msun/src/k_cosf.c b/lib/msun/src/k_cosf.c index 92bce48..f7a2c0a 100644 --- a/lib/msun/src/k_cosf.c +++ b/lib/msun/src/k_cosf.c @@ -30,10 +30,10 @@ C1 = 0x155553e1053a42.0p-57, /* 0.0416666233237390631894 */ C2 = -0x16c087e80f1e27.0p-62, /* -0.00138867637746099294692 */ C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ -#ifndef INLINE_KERNEL_COSDF -extern +#ifdef INLINE_KERNEL_COSDF +static __inline #endif -__inline float +float __kernel_cosdf(double x) { double r, w, z; diff --git a/lib/msun/src/k_sinf.c b/lib/msun/src/k_sinf.c index aa4f268..0841759 100644 --- a/lib/msun/src/k_sinf.c +++ b/lib/msun/src/k_sinf.c @@ -29,10 +29,10 @@ S2 = 0x111110896efbb2.0p-59, /* 0.0083333293858894631756 */ S3 = -0x1a00f9e2cae774.0p-65, /* -0.000198393348360966317347 */ S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ -#ifndef INLINE_KERNEL_SINDF -extern +#ifdef INLINE_KERNEL_SINDF +static __inline #endif -__inline float +float __kernel_sindf(double x) { double r, s, w, z; diff --git a/lib/msun/src/k_tanf.c b/lib/msun/src/k_tanf.c index 6b073da..52f1aaa 100644 --- a/lib/msun/src/k_tanf.c +++ b/lib/msun/src/k_tanf.c @@ -32,10 +32,10 @@ T[] = { 0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */ }; -#ifndef INLINE_KERNEL_TANDF -extern +#ifdef INLINE_KERNEL_TANDF +static __inline #endif -__inline float +float __kernel_tandf(double x, int iy) { double z,r,w,s,t,u; diff --git a/lib/msun/src/math_private.h b/lib/msun/src/math_private.h index 94e7507..f835c7f 100644 --- a/lib/msun/src/math_private.h +++ b/lib/msun/src/math_private.h @@ -431,10 +431,9 @@ irintl(long double x) int __kernel_rem_pio2(double*,double*,int,int,int); /* double precision kernel functions */ -#ifdef INLINE_REM_PIO2 -__inline -#endif +#ifndef INLINE_REM_PIO2 int __ieee754_rem_pio2(double,double*); +#endif double __kernel_sin(double,double,int); double __kernel_cos(double,double); double __kernel_tan(double,double,int); @@ -444,22 +443,18 @@ double complex __ldexp_cexp(double complex,int); #endif /* float precision kernel functions */ -#ifdef INLINE_REM_PIO2F -__inline -#endif +#ifndef INLINE_REM_PIO2F int __ieee754_rem_pio2f(float,double*); -#ifdef INLINE_KERNEL_SINDF -__inline #endif +#ifndef INLINE_KERNEL_SINDF float __kernel_sindf(double); -#ifdef INLINE_KERNEL_COSDF -__inline #endif +#ifndef INLINE_KERNEL_COSDF float __kernel_cosdf(double); -#ifdef INLINE_KERNEL_TANDF -__inline #endif +#ifndef INLINE_KERNEL_TANDF float __kernel_tandf(double,int); +#endif float __ldexp_expf(float,int); #ifdef _COMPLEX_H float complex __ldexp_cexpf(float complex,int); -- cgit v1.1 From 362478d3c0bd28967053d21d4ba2a88a97feae01 Mon Sep 17 00:00:00 2001 From: ed Date: Sat, 11 Aug 2012 12:07:24 +0000 Subject: Rename aux.c to auxv.c. On Windows, AUX is the auxiliary device, usually pointing to COM1. Therefore it is forbidden to create a file named aux.c. To make it a bit easier for Windows users to check out our source code, rename this file to auxv.c. MFC after: 1 month Discussed with: kib Suggested by: Eric van Gyzen --- lib/libc/gen/Makefile.inc | 2 +- lib/libc/gen/aux.c | 186 ---------------------------------------------- lib/libc/gen/auxv.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 187 insertions(+), 187 deletions(-) delete mode 100644 lib/libc/gen/aux.c create mode 100644 lib/libc/gen/auxv.c (limited to 'lib') diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 98e8183..2e7a31c 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -7,7 +7,7 @@ SRCS+= __getosreldate.c __xuname.c \ _once_stub.c _pthread_stubs.c _rand48.c _spinlock_stub.c \ _thread_init.c \ - alarm.c arc4random.c assert.c aux.c basename.c check_utility_compat.c \ + alarm.c arc4random.c assert.c auxv.c basename.c check_utility_compat.c \ clock.c closedir.c confstr.c \ crypt.c ctermid.c daemon.c devname.c dirfd.c dirname.c disklabel.c \ dlfcn.c drand48.c elf_utils.c erand48.c err.c errlst.c errno.c \ diff --git a/lib/libc/gen/aux.c b/lib/libc/gen/aux.c deleted file mode 100644 index 3767ac0..0000000 --- a/lib/libc/gen/aux.c +++ /dev/null @@ -1,186 +0,0 @@ -/*- - * Copyright 2010, 2012 Konstantin Belousov . - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "namespace.h" -#include -#include -#include -#include -#include -#include "un-namespace.h" -#include "libc_private.h" - -extern char **environ; -extern int _DYNAMIC; -#pragma weak _DYNAMIC - -void *__elf_aux_vector; -static pthread_once_t aux_vector_once = PTHREAD_ONCE_INIT; - -static void -init_aux_vector_once(void) -{ - Elf_Addr *sp; - - sp = (Elf_Addr *)environ; - while (*sp++ != 0) - ; - __elf_aux_vector = (Elf_Auxinfo *)sp; -} - -void -__init_elf_aux_vector(void) -{ - - if (&_DYNAMIC != NULL) - return; - _once(&aux_vector_once, init_aux_vector_once); -} - -static pthread_once_t aux_once = PTHREAD_ONCE_INIT; -static int pagesize, osreldate, canary_len, ncpus, pagesizes_len; -static char *canary, *pagesizes; -static void *timekeep; - -static void -init_aux(void) -{ - Elf_Auxinfo *aux; - - for (aux = __elf_aux_vector; aux->a_type != AT_NULL; aux++) { - switch (aux->a_type) { - case AT_CANARY: - canary = (char *)(aux->a_un.a_ptr); - break; - - case AT_CANARYLEN: - canary_len = aux->a_un.a_val; - break; - - case AT_PAGESIZES: - pagesizes = (char *)(aux->a_un.a_ptr); - break; - - case AT_PAGESIZESLEN: - pagesizes_len = aux->a_un.a_val; - break; - - case AT_PAGESZ: - pagesize = aux->a_un.a_val; - break; - - case AT_OSRELDATE: - osreldate = aux->a_un.a_val; - break; - - case AT_NCPUS: - ncpus = aux->a_un.a_val; - break; - - case AT_TIMEKEEP: - timekeep = aux->a_un.a_ptr; - break; - } - } -} - -int -_elf_aux_info(int aux, void *buf, int buflen) -{ - int res; - - __init_elf_aux_vector(); - if (__elf_aux_vector == NULL) - return (ENOSYS); - _once(&aux_once, init_aux); - - switch (aux) { - case AT_CANARY: - if (canary != NULL && canary_len >= buflen) { - memcpy(buf, canary, buflen); - memset(canary, 0, canary_len); - canary = NULL; - res = 0; - } else - res = ENOENT; - break; - case AT_PAGESIZES: - if (pagesizes != NULL && pagesizes_len >= buflen) { - memcpy(buf, pagesizes, buflen); - res = 0; - } else - res = ENOENT; - break; - - case AT_PAGESZ: - if (buflen == sizeof(int)) { - if (pagesize != 0) { - *(int *)buf = pagesize; - res = 0; - } else - res = ENOENT; - } else - res = EINVAL; - break; - case AT_OSRELDATE: - if (buflen == sizeof(int)) { - if (osreldate != 0) { - *(int *)buf = osreldate; - res = 0; - } else - res = ENOENT; - } else - res = EINVAL; - break; - case AT_NCPUS: - if (buflen == sizeof(int)) { - if (ncpus != 0) { - *(int *)buf = ncpus; - res = 0; - } else - res = ENOENT; - } else - res = EINVAL; - break; - case AT_TIMEKEEP: - if (buflen == sizeof(void *)) { - if (timekeep != NULL) { - *(void **)buf = timekeep; - res = 0; - } else - res = ENOENT; - } else - res = EINVAL; - break; - default: - res = ENOENT; - break; - } - return (res); -} diff --git a/lib/libc/gen/auxv.c b/lib/libc/gen/auxv.c new file mode 100644 index 0000000..3767ac0 --- /dev/null +++ b/lib/libc/gen/auxv.c @@ -0,0 +1,186 @@ +/*- + * Copyright 2010, 2012 Konstantin Belousov . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include +#include +#include +#include "un-namespace.h" +#include "libc_private.h" + +extern char **environ; +extern int _DYNAMIC; +#pragma weak _DYNAMIC + +void *__elf_aux_vector; +static pthread_once_t aux_vector_once = PTHREAD_ONCE_INIT; + +static void +init_aux_vector_once(void) +{ + Elf_Addr *sp; + + sp = (Elf_Addr *)environ; + while (*sp++ != 0) + ; + __elf_aux_vector = (Elf_Auxinfo *)sp; +} + +void +__init_elf_aux_vector(void) +{ + + if (&_DYNAMIC != NULL) + return; + _once(&aux_vector_once, init_aux_vector_once); +} + +static pthread_once_t aux_once = PTHREAD_ONCE_INIT; +static int pagesize, osreldate, canary_len, ncpus, pagesizes_len; +static char *canary, *pagesizes; +static void *timekeep; + +static void +init_aux(void) +{ + Elf_Auxinfo *aux; + + for (aux = __elf_aux_vector; aux->a_type != AT_NULL; aux++) { + switch (aux->a_type) { + case AT_CANARY: + canary = (char *)(aux->a_un.a_ptr); + break; + + case AT_CANARYLEN: + canary_len = aux->a_un.a_val; + break; + + case AT_PAGESIZES: + pagesizes = (char *)(aux->a_un.a_ptr); + break; + + case AT_PAGESIZESLEN: + pagesizes_len = aux->a_un.a_val; + break; + + case AT_PAGESZ: + pagesize = aux->a_un.a_val; + break; + + case AT_OSRELDATE: + osreldate = aux->a_un.a_val; + break; + + case AT_NCPUS: + ncpus = aux->a_un.a_val; + break; + + case AT_TIMEKEEP: + timekeep = aux->a_un.a_ptr; + break; + } + } +} + +int +_elf_aux_info(int aux, void *buf, int buflen) +{ + int res; + + __init_elf_aux_vector(); + if (__elf_aux_vector == NULL) + return (ENOSYS); + _once(&aux_once, init_aux); + + switch (aux) { + case AT_CANARY: + if (canary != NULL && canary_len >= buflen) { + memcpy(buf, canary, buflen); + memset(canary, 0, canary_len); + canary = NULL; + res = 0; + } else + res = ENOENT; + break; + case AT_PAGESIZES: + if (pagesizes != NULL && pagesizes_len >= buflen) { + memcpy(buf, pagesizes, buflen); + res = 0; + } else + res = ENOENT; + break; + + case AT_PAGESZ: + if (buflen == sizeof(int)) { + if (pagesize != 0) { + *(int *)buf = pagesize; + res = 0; + } else + res = ENOENT; + } else + res = EINVAL; + break; + case AT_OSRELDATE: + if (buflen == sizeof(int)) { + if (osreldate != 0) { + *(int *)buf = osreldate; + res = 0; + } else + res = ENOENT; + } else + res = EINVAL; + break; + case AT_NCPUS: + if (buflen == sizeof(int)) { + if (ncpus != 0) { + *(int *)buf = ncpus; + res = 0; + } else + res = ENOENT; + } else + res = EINVAL; + break; + case AT_TIMEKEEP: + if (buflen == sizeof(void *)) { + if (timekeep != NULL) { + *(void **)buf = timekeep; + res = 0; + } else + res = ENOENT; + } else + res = EINVAL; + break; + default: + res = ENOENT; + break; + } + return (res); +} -- cgit v1.1 From c88a41061f74c31f13da5dd0358685499d8dbb06 Mon Sep 17 00:00:00 2001 From: dim Date: Sat, 11 Aug 2012 15:47:22 +0000 Subject: Add __always_inline to __ieee754_rem_pio2() and __ieee754_rem_pio2f(), since some older versions of gcc refuse to inline these otherwise. Requested by: bde MFC after: 1 week --- lib/msun/src/e_rem_pio2.c | 2 +- lib/msun/src/e_rem_pio2f.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/msun/src/e_rem_pio2.c b/lib/msun/src/e_rem_pio2.c index 6dd453a..be2630b 100644 --- a/lib/msun/src/e_rem_pio2.c +++ b/lib/msun/src/e_rem_pio2.c @@ -49,7 +49,7 @@ pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ #ifdef INLINE_REM_PIO2 -static __inline +static __inline __always_inline #endif int __ieee754_rem_pio2(double x, double *y) diff --git a/lib/msun/src/e_rem_pio2f.c b/lib/msun/src/e_rem_pio2f.c index bd12186..f1ee7a0 100644 --- a/lib/msun/src/e_rem_pio2f.c +++ b/lib/msun/src/e_rem_pio2f.c @@ -41,7 +41,7 @@ pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ #ifdef INLINE_REM_PIO2F -static __inline +static __inline __always_inline #endif int __ieee754_rem_pio2f(float x, double *y) -- cgit v1.1 From 72ce06de363df873b583c5e3420f53a5606aa570 Mon Sep 17 00:00:00 2001 From: davidxu Date: Sat, 11 Aug 2012 23:17:02 +0000 Subject: MFp4: Further decreases unexpected context switches by defering mutex wakeup until internal sleep queue lock is released. --- lib/libthr/thread/thr_cond.c | 15 +++++++++++++-- lib/libthr/thread/thr_kern.c | 7 ------- lib/libthr/thread/thr_mutex.c | 16 ++++++++-------- lib/libthr/thread/thr_private.h | 8 ++++---- lib/libthr/thread/thr_umtx.h | 16 +++++++++++++--- 5 files changed, 38 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/libthr/thread/thr_cond.c b/lib/libthr/thread/thr_cond.c index a834711..541e40a 100644 --- a/lib/libthr/thread/thr_cond.c +++ b/lib/libthr/thread/thr_cond.c @@ -217,6 +217,7 @@ cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp, struct sleepqueue *sq; int recurse; int error; + int defered; if (curthread->wchan != NULL) PANIC("thread was already on queue."); @@ -230,13 +231,23 @@ cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp, * us to check it without locking in pthread_cond_signal(). */ cvp->__has_user_waiters = 1; - curthread->will_sleep = 1; - (void)_mutex_cv_unlock(mp, &recurse); + defered = 0; + (void)_mutex_cv_unlock(mp, &recurse, &defered); curthread->mutex_obj = mp; _sleepq_add(cvp, curthread); for(;;) { _thr_clear_wake(curthread); _sleepq_unlock(cvp); + if (defered) { + if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0) + (void)_umtx_op_err(&mp->m_lock, UMTX_OP_MUTEX_WAKE2, + mp->m_lock.m_flags, 0, 0); + } + if (curthread->nwaiter_defer > 0) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; + } if (cancel) { _thr_cancel_enter2(curthread, 0); diff --git a/lib/libthr/thread/thr_kern.c b/lib/libthr/thread/thr_kern.c index 48f7c65..1e7cb51 100644 --- a/lib/libthr/thread/thr_kern.c +++ b/lib/libthr/thread/thr_kern.c @@ -199,13 +199,6 @@ _thr_sleep(struct pthread *curthread, int clockid, const struct timespec *abstime) { - curthread->will_sleep = 0; - if (curthread->nwaiter_defer > 0) { - _thr_wake_all(curthread->defer_waiters, - curthread->nwaiter_defer); - curthread->nwaiter_defer = 0; - } - if (curthread->wake_addr->value != 0) return (0); diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c index 91b47ec..61ff077 100644 --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -92,7 +92,7 @@ int __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count); static int mutex_self_trylock(pthread_mutex_t); static int mutex_self_lock(pthread_mutex_t, const struct timespec *abstime); -static int mutex_unlock_common(struct pthread_mutex *, int); +static int mutex_unlock_common(struct pthread_mutex *, int, int *); static int mutex_lock_sleep(struct pthread *, pthread_mutex_t, const struct timespec *); @@ -461,7 +461,7 @@ _pthread_mutex_unlock(pthread_mutex_t *mutex) struct pthread_mutex *mp; mp = *mutex; - return (mutex_unlock_common(mp, 0)); + return (mutex_unlock_common(mp, 0, NULL)); } int @@ -476,7 +476,7 @@ _mutex_cv_lock(struct pthread_mutex *m, int count) } int -_mutex_cv_unlock(struct pthread_mutex *m, int *count) +_mutex_cv_unlock(struct pthread_mutex *m, int *count, int *defer) { /* @@ -484,7 +484,7 @@ _mutex_cv_unlock(struct pthread_mutex *m, int *count) */ *count = m->m_count; m->m_count = 0; - (void)mutex_unlock_common(m, 1); + (void)mutex_unlock_common(m, 1, defer); return (0); } @@ -629,7 +629,7 @@ mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime) } static int -mutex_unlock_common(struct pthread_mutex *m, int cv) +mutex_unlock_common(struct pthread_mutex *m, int cv, int *mtx_defer) { struct pthread *curthread = _get_curthread(); uint32_t id; @@ -657,12 +657,12 @@ mutex_unlock_common(struct pthread_mutex *m, int cv) defered = 1; m->m_flags &= ~PMUTEX_FLAG_DEFERED; } else - defered = 0; + defered = 0; DEQUEUE_MUTEX(curthread, m); - _thr_umutex_unlock(&m->m_lock, id); + _thr_umutex_unlock2(&m->m_lock, id, mtx_defer); - if (curthread->will_sleep == 0 && defered) { + if (mtx_defer == NULL && defered) { _thr_wake_all(curthread->defer_waiters, curthread->nwaiter_defer); curthread->nwaiter_defer = 0; diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h index 5224c7c..ba272fe 100644 --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -727,10 +727,10 @@ extern struct umutex _thr_event_lock __hidden; */ __BEGIN_DECLS int _thr_setthreaded(int) __hidden; -int _mutex_cv_lock(struct pthread_mutex *, int count) __hidden; -int _mutex_cv_unlock(struct pthread_mutex *, int *count) __hidden; -int _mutex_cv_attach(struct pthread_mutex *, int count) __hidden; -int _mutex_cv_detach(struct pthread_mutex *, int *count) __hidden; +int _mutex_cv_lock(struct pthread_mutex *, int) __hidden; +int _mutex_cv_unlock(struct pthread_mutex *, int *, int *) __hidden; +int _mutex_cv_attach(struct pthread_mutex *, int) __hidden; +int _mutex_cv_detach(struct pthread_mutex *, int *) __hidden; int _mutex_owned(struct pthread *, const struct pthread_mutex *) __hidden; int _mutex_reinit(pthread_mutex_t *) __hidden; void _mutex_fork(struct pthread *curthread) __hidden; diff --git a/lib/libthr/thread/thr_umtx.h b/lib/libthr/thread/thr_umtx.h index d74cc81..f3d95f1 100644 --- a/lib/libthr/thread/thr_umtx.h +++ b/lib/libthr/thread/thr_umtx.h @@ -120,7 +120,7 @@ _thr_umutex_timedlock(struct umutex *mtx, uint32_t id, } static inline int -_thr_umutex_unlock(struct umutex *mtx, uint32_t id) +_thr_umutex_unlock2(struct umutex *mtx, uint32_t id, int *defer) { uint32_t flags = mtx->m_flags; @@ -132,8 +132,12 @@ _thr_umutex_unlock(struct umutex *mtx, uint32_t id) return (EPERM); } while (__predict_false(!atomic_cmpset_rel_32(&mtx->m_owner, owner, UMUTEX_UNOWNED))); - if ((owner & UMUTEX_CONTESTED)) - (void)_umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE2, flags, 0, 0); + if ((owner & UMUTEX_CONTESTED)) { + if (defer == NULL) + (void)_umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE2, flags, 0, 0); + else + *defer = 1; + } return (0); } if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED)) @@ -142,6 +146,12 @@ _thr_umutex_unlock(struct umutex *mtx, uint32_t id) } static inline int +_thr_umutex_unlock(struct umutex *mtx, uint32_t id) +{ + return _thr_umutex_unlock2(mtx, id, NULL); +} + +static inline int _thr_rwlock_tryrdlock(struct urwlock *rwlock, int flags) { int32_t state; -- cgit v1.1 From 2d48812b47fa6c0f28066a552ec406bc5ca3c3ee Mon Sep 17 00:00:00 2001 From: davidxu Date: Sun, 12 Aug 2012 00:56:56 +0000 Subject: Do defered mutex wakeup once. --- lib/libthr/thread/thr_cond.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/libthr/thread/thr_cond.c b/lib/libthr/thread/thr_cond.c index 541e40a..6af15db 100644 --- a/lib/libthr/thread/thr_cond.c +++ b/lib/libthr/thread/thr_cond.c @@ -239,6 +239,7 @@ cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp, _thr_clear_wake(curthread); _sleepq_unlock(cvp); if (defered) { + defered = 0; if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0) (void)_umtx_op_err(&mp->m_lock, UMTX_OP_MUTEX_WAKE2, mp->m_lock.m_flags, 0, 0); -- cgit v1.1 From ac49f9f94c356dc44ea69f7a8dfe014bc91c5806 Mon Sep 17 00:00:00 2001 From: hselasky Date: Mon, 13 Aug 2012 18:10:52 +0000 Subject: Add support for streams to LibUSB v2.0. MFC after: 2 weeks --- lib/libusb/libusb20.3 | 13 ++++++++++++- lib/libusb/libusb20.c | 9 ++++++++- lib/libusb/libusb20.h | 1 + lib/libusb/libusb20_int.h | 2 +- lib/libusb/libusb20_ugen20.c | 32 +++++++++++++++++++++----------- 5 files changed, 43 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/libusb/libusb20.3 b/lib/libusb/libusb20.3 index c9bf902..af80c6c 100644 --- a/lib/libusb/libusb20.3 +++ b/lib/libusb/libusb20.3 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 14, 2010 +.Dd August 13, 2012 .Dt LIBUSB20 3 .Os .Sh NAME @@ -48,6 +48,7 @@ USB access library (libusb -lusb) .Fn libusb20_tr_close "struct libusb20_transfer *xfer" .Ft int .Fn libusb20_tr_open "struct libusb20_transfer *xfer" "uint32_t max_buf_size" "uint32_t max_frame_count" "uint8_t ep_no" +.Fn libusb20_tr_open_stream "struct libusb20_transfer *xfer" "uint32_t max_buf_size" "uint32_t max_frame_count" "uint8_t ep_no" "uint16_t stream_id" .Ft struct libusb20_transfer* .Fn libusb20_tr_get_pointer "struct libusb20_device *pdev" "uint16_t tr_index" .Ft uint16_t @@ -284,6 +285,16 @@ Non-zero return values indicate a LIBUSB20_ERROR value. . .Pp . +.Fn libusb20_tr_open_stream +is identical to +.Fn libusb20_tr_open +except that a stream ID can be specified for BULK endpoints having +such a feature. +.Fn libusb20_tr_open +can be used to open stream ID zero. +. +.Pp +. .Fn libusb20_tr_get_pointer will return a pointer to the allocated USB transfer according to the .Fa pdev diff --git a/lib/libusb/libusb20.c b/lib/libusb/libusb20.c index 75af7a1..aa45991 100644 --- a/lib/libusb/libusb20.c +++ b/lib/libusb/libusb20.c @@ -155,6 +155,13 @@ int libusb20_tr_open(struct libusb20_transfer *xfer, uint32_t MaxBufSize, uint32_t MaxFrameCount, uint8_t ep_no) { + return (libusb20_tr_open_stream(xfer, MaxBufSize, MaxFrameCount, ep_no, 0)); +} + +int +libusb20_tr_open_stream(struct libusb20_transfer *xfer, uint32_t MaxBufSize, + uint32_t MaxFrameCount, uint8_t ep_no, uint16_t stream_id) +{ uint32_t size; uint8_t pre_scale; int error; @@ -188,7 +195,7 @@ libusb20_tr_open(struct libusb20_transfer *xfer, uint32_t MaxBufSize, memset(xfer->ppBuffer, 0, size); error = xfer->pdev->methods->tr_open(xfer, MaxBufSize, - MaxFrameCount, ep_no, pre_scale); + MaxFrameCount, ep_no, stream_id, pre_scale); if (error) { free(xfer->ppBuffer); diff --git a/lib/libusb/libusb20.h b/lib/libusb/libusb20.h index e4359fc..87e0572 100644 --- a/lib/libusb/libusb20.h +++ b/lib/libusb/libusb20.h @@ -202,6 +202,7 @@ struct libusb20_quirk { /* USB transfer operations */ int libusb20_tr_close(struct libusb20_transfer *xfer); int libusb20_tr_open(struct libusb20_transfer *xfer, uint32_t max_buf_size, uint32_t max_frame_count, uint8_t ep_no); +int libusb20_tr_open_stream(struct libusb20_transfer *xfer, uint32_t max_buf_size, uint32_t max_frame_count, uint8_t ep_no, uint16_t stream_id); struct libusb20_transfer *libusb20_tr_get_pointer(struct libusb20_device *pdev, uint16_t tr_index); uint16_t libusb20_tr_get_time_complete(struct libusb20_transfer *xfer); uint32_t libusb20_tr_get_actual_frames(struct libusb20_transfer *xfer); diff --git a/lib/libusb/libusb20_int.h b/lib/libusb/libusb20_int.h index bef4d02..0251c5f 100644 --- a/lib/libusb/libusb20_int.h +++ b/lib/libusb/libusb20_int.h @@ -110,7 +110,7 @@ typedef int (libusb20_set_config_index_t)(struct libusb20_device *pdev, uint8_t typedef int (libusb20_check_connected_t)(struct libusb20_device *pdev); /* USB transfer specific */ -typedef int (libusb20_tr_open_t)(struct libusb20_transfer *xfer, uint32_t MaxBufSize, uint32_t MaxFrameCount, uint8_t ep_no, uint8_t pre_scale); +typedef int (libusb20_tr_open_t)(struct libusb20_transfer *xfer, uint32_t MaxBufSize, uint32_t MaxFrameCount, uint8_t ep_no, uint16_t stream_id, uint8_t pre_scale); typedef int (libusb20_tr_close_t)(struct libusb20_transfer *xfer); typedef int (libusb20_tr_clear_stall_sync_t)(struct libusb20_transfer *xfer); typedef void (libusb20_tr_submit_t)(struct libusb20_transfer *xfer); diff --git a/lib/libusb/libusb20_ugen20.c b/lib/libusb/libusb20_ugen20.c index 17c948b..2c67778 100644 --- a/lib/libusb/libusb20_ugen20.c +++ b/lib/libusb/libusb20_ugen20.c @@ -741,9 +741,13 @@ ugen20_process(struct libusb20_device *pdev) static int ugen20_tr_open(struct libusb20_transfer *xfer, uint32_t MaxBufSize, - uint32_t MaxFrameCount, uint8_t ep_no, uint8_t pre_scale) + uint32_t MaxFrameCount, uint8_t ep_no, uint16_t stream_id, + uint8_t pre_scale) { - struct usb_fs_open temp; + union { + struct usb_fs_open fs_open; + struct usb_fs_open_stream fs_open_stream; + } temp; struct usb_fs_endpoint *fsep; if (pre_scale) @@ -754,20 +758,26 @@ ugen20_tr_open(struct libusb20_transfer *xfer, uint32_t MaxBufSize, fsep = xfer->pdev->privBeData; fsep += xfer->trIndex; - temp.max_bufsize = MaxBufSize; - temp.max_frames = MaxFrameCount; - temp.ep_index = xfer->trIndex; - temp.ep_no = ep_no; + temp.fs_open.max_bufsize = MaxBufSize; + temp.fs_open.max_frames = MaxFrameCount; + temp.fs_open.ep_index = xfer->trIndex; + temp.fs_open.ep_no = ep_no; - if (ioctl(xfer->pdev->file, USB_FS_OPEN, &temp)) { - return (LIBUSB20_ERROR_INVALID_PARAM); + if (stream_id != 0) { + temp.fs_open_stream.stream_id = stream_id; + + if (ioctl(xfer->pdev->file, USB_FS_OPEN_STREAM, &temp.fs_open_stream)) + return (LIBUSB20_ERROR_INVALID_PARAM); + } else { + if (ioctl(xfer->pdev->file, USB_FS_OPEN, &temp.fs_open)) + return (LIBUSB20_ERROR_INVALID_PARAM); } /* maximums might have changed - update */ - xfer->maxFrames = temp.max_frames; + xfer->maxFrames = temp.fs_open.max_frames; /* "max_bufsize" should be multiple of "max_packet_length" */ - xfer->maxTotalLength = temp.max_bufsize; - xfer->maxPacketLen = temp.max_packet_length; + xfer->maxTotalLength = temp.fs_open.max_bufsize; + xfer->maxPacketLen = temp.fs_open.max_packet_length; /* setup buffer and length lists using zero copy */ fsep->ppBuffer = libusb20_pass_ptr(xfer->ppBuffer); -- cgit v1.1 From 0761ed25354c932b4754aa6fa130cc9d9d2254c4 Mon Sep 17 00:00:00 2001 From: gonzo Date: Wed, 15 Aug 2012 03:08:29 +0000 Subject: Merging of projects/armv6, part 2 Handle TLS for ARMv6 and ARMv7 --- lib/libthr/arch/arm/include/pthread_md.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libthr/arch/arm/include/pthread_md.h b/lib/libthr/arch/arm/include/pthread_md.h index 83ddf7f..3c3dd6d 100644 --- a/lib/libthr/arch/arm/include/pthread_md.h +++ b/lib/libthr/arch/arm/include/pthread_md.h @@ -57,7 +57,11 @@ void _tcb_dtor(struct tcb *); static __inline void _tcb_set(struct tcb *tcb) { - *((struct tcb **)ARM_TP_ADDRESS) = tcb; +#ifdef ARM_TP_ADDRESS + *((struct tcb **)ARM_TP_ADDRESS) = tcb; /* avoids a system call */ +#else + sysarch(ARM_SET_TP, tcb); +#endif } /* @@ -66,7 +70,15 @@ _tcb_set(struct tcb *tcb) static __inline struct tcb * _tcb_get(void) { +#ifdef ARM_TP_ADDRESS return (*((struct tcb **)ARM_TP_ADDRESS)); +#else + struct tcb *tcb; + + __asm __volatile("mrc p15, 0, %0, c13, c0, 3" \ + : "=r" (tcb)); + return (tcb); +#endif } extern struct pthread *_thr_initial; -- cgit v1.1 From 0c19fd41e201fbcbf4ce5cb949c595c01a9ad102 Mon Sep 17 00:00:00 2001 From: gonzo Date: Wed, 15 Aug 2012 03:09:00 +0000 Subject: Merging of projects/armv6, part 2 Handle TLS for ARMv6 and ARMv7 --- lib/libc/arm/gen/__aeabi_read_tp.S | 6 ++++++ lib/libc/arm/gen/_set_tp.c | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'lib') diff --git a/lib/libc/arm/gen/__aeabi_read_tp.S b/lib/libc/arm/gen/__aeabi_read_tp.S index 228acff..c3ea99d 100644 --- a/lib/libc/arm/gen/__aeabi_read_tp.S +++ b/lib/libc/arm/gen/__aeabi_read_tp.S @@ -31,10 +31,16 @@ __FBSDID("$FreeBSD$"); #include ENTRY(__aeabi_read_tp) +#ifdef ARM_TP_ADDRESS ldr r0, .Larm_tp_address ldr r0, [r0] +#else + mrc p15, 0, r0, c13, c0, 3 +#endif RET +#ifdef ARM_TP_ADDRESS .Larm_tp_address: .word ARM_TP_ADDRESS +#endif diff --git a/lib/libc/arm/gen/_set_tp.c b/lib/libc/arm/gen/_set_tp.c index 44bbdd8..97cabec 100644 --- a/lib/libc/arm/gen/_set_tp.c +++ b/lib/libc/arm/gen/_set_tp.c @@ -35,5 +35,9 @@ void _set_tp(void *tp) { +#ifdef ARM_TP_ADDRESS *((struct tcb **)ARM_TP_ADDRESS) = tp; +#else + sysarch(ARM_SET_TP, tp); +#endif } -- cgit v1.1 From 721c201bd55ffb73cb2ba8d39e0570fa38c44e15 Mon Sep 17 00:00:00 2001 From: dim Date: Wed, 15 Aug 2012 19:34:23 +0000 Subject: Vendor import of llvm trunk r161861: http://llvm.org/svn/llvm-project/llvm/trunk@161861 --- lib/Analysis/AliasAnalysis.cpp | 83 + lib/Analysis/AliasSetTracker.cpp | 6 +- lib/Analysis/BasicAliasAnalysis.cpp | 45 +- lib/Analysis/CMakeLists.txt | 4 +- lib/Analysis/CaptureTracking.cpp | 2 +- lib/Analysis/CodeMetrics.cpp | 26 +- lib/Analysis/ConstantFolding.cpp | 62 +- lib/Analysis/DIBuilder.cpp | 1015 ------ lib/Analysis/DbgInfoPrinter.cpp | 6 +- lib/Analysis/DebugInfo.cpp | 1229 ------- lib/Analysis/IPA/CMakeLists.txt | 2 + lib/Analysis/IPA/CallGraphSCCPass.cpp | 4 +- lib/Analysis/IPA/GlobalsModRef.cpp | 20 +- lib/Analysis/IVUsers.cpp | 7 + lib/Analysis/InlineCost.cpp | 82 +- lib/Analysis/InstructionSimplify.cpp | 30 +- lib/Analysis/LazyValueInfo.cpp | 129 +- lib/Analysis/LoopInfo.cpp | 10 +- lib/Analysis/LoopPass.cpp | 10 +- lib/Analysis/MemDepPrinter.cpp | 4 +- lib/Analysis/MemoryBuiltins.cpp | 639 +++- lib/Analysis/MemoryDependenceAnalysis.cpp | 102 +- lib/Analysis/ModuleDebugInfoPrinter.cpp | 4 +- lib/Analysis/PathNumbering.cpp | 2 +- lib/Analysis/ProfileInfoLoader.cpp | 6 +- lib/Analysis/ProfileInfoLoaderPass.cpp | 2 +- lib/Analysis/RegionInfo.cpp | 14 +- lib/Analysis/RegionPass.cpp | 3 +- lib/Analysis/RegionPrinter.cpp | 8 +- lib/Analysis/ScalarEvolution.cpp | 302 +- lib/Analysis/ScalarEvolutionExpander.cpp | 46 +- lib/Analysis/ValueTracking.cpp | 33 +- lib/Archive/ArchiveReader.cpp | 7 +- lib/Archive/ArchiveWriter.cpp | 7 +- lib/AsmParser/LLLexer.cpp | 10 +- lib/AsmParser/LLParser.cpp | 57 +- lib/AsmParser/LLParser.h | 3 + lib/AsmParser/LLToken.h | 4 +- lib/Bitcode/Reader/BitcodeReader.cpp | 206 +- lib/Bitcode/Reader/CMakeLists.txt | 2 + lib/Bitcode/Writer/BitcodeWriter.cpp | 148 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 43 +- lib/CodeGen/AllocationOrder.cpp | 2 +- lib/CodeGen/Analysis.cpp | 100 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 4 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 34 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 33 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 99 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 2 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 43 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 22 +- lib/CodeGen/AsmPrinter/DwarfException.h | 11 - lib/CodeGen/BranchFolding.cpp | 202 +- lib/CodeGen/CMakeLists.txt | 7 +- lib/CodeGen/CalcSpillWeights.cpp | 53 +- lib/CodeGen/CallingConvLower.cpp | 8 +- lib/CodeGen/CodeGen.cpp | 4 +- lib/CodeGen/CodePlacementOpt.cpp | 2 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 81 +- lib/CodeGen/CriticalAntiDepBreaker.h | 2 +- lib/CodeGen/DFAPacketizer.cpp | 91 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 11 +- lib/CodeGen/DwarfEHPrepare.cpp | 12 +- lib/CodeGen/EarlyIfConversion.cpp | 803 +++++ lib/CodeGen/ExecutionDepsFix.cpp | 5 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 11 +- lib/CodeGen/IfConversion.cpp | 45 +- lib/CodeGen/InlineSpiller.cpp | 20 +- lib/CodeGen/InterferenceCache.cpp | 93 +- lib/CodeGen/InterferenceCache.h | 34 +- lib/CodeGen/IntrinsicLowering.cpp | 6 +- lib/CodeGen/LLVMTargetMachine.cpp | 70 +- lib/CodeGen/LexicalScopes.cpp | 2 +- lib/CodeGen/LiveDebugVariables.cpp | 39 +- lib/CodeGen/LiveInterval.cpp | 286 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 738 ++-- lib/CodeGen/LiveIntervalUnion.cpp | 24 +- lib/CodeGen/LiveIntervalUnion.h | 26 +- lib/CodeGen/LiveRangeCalc.cpp | 120 +- lib/CodeGen/LiveRangeCalc.h | 63 +- lib/CodeGen/LiveRangeEdit.cpp | 120 +- lib/CodeGen/LiveRegMatrix.cpp | 152 + lib/CodeGen/LiveRegMatrix.h | 148 + lib/CodeGen/LiveVariables.cpp | 69 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 3 +- lib/CodeGen/MachineBasicBlock.cpp | 80 +- lib/CodeGen/MachineBlockPlacement.cpp | 111 +- lib/CodeGen/MachineCSE.cpp | 69 +- lib/CodeGen/MachineCopyPropagation.cpp | 50 +- lib/CodeGen/MachineFunction.cpp | 41 +- lib/CodeGen/MachineFunctionPrinterPass.cpp | 9 +- lib/CodeGen/MachineInstr.cpp | 295 +- lib/CodeGen/MachineInstrBundle.cpp | 4 +- lib/CodeGen/MachineLICM.cpp | 18 +- lib/CodeGen/MachineLoopInfo.cpp | 16 +- lib/CodeGen/MachinePassRegistry.cpp | 13 + lib/CodeGen/MachineRegisterInfo.cpp | 103 +- lib/CodeGen/MachineSSAUpdater.cpp | 47 +- lib/CodeGen/MachineScheduler.cpp | 926 ++++- lib/CodeGen/MachineSink.cpp | 17 +- lib/CodeGen/MachineTraceMetrics.cpp | 1153 ++++++ lib/CodeGen/MachineTraceMetrics.h | 341 ++ lib/CodeGen/MachineVerifier.cpp | 746 ++-- lib/CodeGen/PHIElimination.cpp | 184 +- lib/CodeGen/Passes.cpp | 273 +- lib/CodeGen/PeepholeOptimizer.cpp | 156 +- lib/CodeGen/PostRASchedulerList.cpp | 39 +- lib/CodeGen/ProcessImplicitDefs.cpp | 374 +- lib/CodeGen/PrologEpilogInserter.cpp | 2 +- lib/CodeGen/RegAllocBase.cpp | 161 +- lib/CodeGen/RegAllocBase.h | 85 +- lib/CodeGen/RegAllocBasic.cpp | 171 +- lib/CodeGen/RegAllocFast.cpp | 55 +- lib/CodeGen/RegAllocGreedy.cpp | 238 +- lib/CodeGen/RegAllocPBQP.cpp | 187 +- lib/CodeGen/RegisterClassInfo.cpp | 7 +- lib/CodeGen/RegisterClassInfo.h | 132 - lib/CodeGen/RegisterCoalescer.cpp | 1252 +++---- lib/CodeGen/RegisterCoalescer.h | 29 +- lib/CodeGen/RegisterPressure.cpp | 841 +++++ lib/CodeGen/RegisterScavenging.cpp | 25 +- lib/CodeGen/RenderMachineFunction.cpp | 1013 ------ lib/CodeGen/RenderMachineFunction.h | 338 -- lib/CodeGen/ScheduleDAG.cpp | 23 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 405 ++- lib/CodeGen/ScoreboardHazardRecognizer.cpp | 26 +- lib/CodeGen/SelectionDAG/CMakeLists.txt | 2 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 336 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 55 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 2 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 58 +- lib/CodeGen/SelectionDAG/InstrEmitter.h | 6 - lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 1007 +++--- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 33 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 20 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 7 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 10 + lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 57 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 79 +- lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 15 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 42 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 7 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 218 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 542 ++- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 14 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 8 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 79 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 162 +- lib/CodeGen/ShadowStackGC.cpp | 8 +- lib/CodeGen/SjLjEHPrepare.cpp | 20 +- lib/CodeGen/SlotIndexes.cpp | 3 - lib/CodeGen/SpillPlacement.cpp | 11 + lib/CodeGen/SplitKit.cpp | 27 +- lib/CodeGen/StackProtector.cpp | 25 +- lib/CodeGen/StackSlotColoring.cpp | 3 +- lib/CodeGen/StrongPHIElimination.cpp | 4 +- lib/CodeGen/TailDuplication.cpp | 34 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 210 ++ lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 62 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 752 ++-- lib/CodeGen/VirtRegMap.cpp | 179 +- lib/CodeGen/VirtRegMap.h | 7 - lib/DebugInfo/DWARFCompileUnit.cpp | 25 +- lib/DebugInfo/DWARFCompileUnit.h | 9 +- lib/DebugInfo/DWARFContext.cpp | 74 +- lib/DebugInfo/DWARFContext.h | 3 +- lib/DebugInfo/DWARFDebugAranges.cpp | 2 +- lib/DebugInfo/DWARFDebugInfoEntry.cpp | 51 + lib/DebugInfo/DWARFDebugInfoEntry.h | 9 + lib/DebugInfo/DWARFDebugLine.cpp | 117 +- lib/DebugInfo/DWARFDebugLine.h | 68 +- lib/ExecutionEngine/EventListenerCommon.h | 2 +- .../IntelJITEvents/IntelJITEventListener.cpp | 6 +- lib/ExecutionEngine/Interpreter/CMakeLists.txt | 2 + lib/ExecutionEngine/Interpreter/Execution.cpp | 37 +- lib/ExecutionEngine/JIT/JIT.cpp | 11 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 13 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 55 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 19 +- lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h | 14 +- .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- lib/ExecutionEngine/RuntimeDyld/ObjectImage.h | 2 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 123 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 52 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 3 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 143 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 97 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 3 +- lib/ExecutionEngine/TargetSelect.cpp | 11 +- lib/Linker/LinkModules.cpp | 28 +- lib/MC/CMakeLists.txt | 1 + lib/MC/ELFObjectWriter.cpp | 18 +- lib/MC/MCAsmBackend.cpp | 2 +- lib/MC/MCAsmInfo.cpp | 13 +- lib/MC/MCAsmInfoCOFF.cpp | 4 +- lib/MC/MCAsmInfoDarwin.cpp | 15 +- lib/MC/MCAsmStreamer.cpp | 24 +- lib/MC/MCAssembler.cpp | 7 +- lib/MC/MCContext.cpp | 10 +- lib/MC/MCDisassembler/Disassembler.h | 8 + lib/MC/MCDisassembler/EDDisassembler.cpp | 4 +- lib/MC/MCDisassembler/EDMain.cpp | 32 +- lib/MC/MCDwarf.cpp | 40 +- lib/MC/MCELFObjectTargetWriter.cpp | 6 +- lib/MC/MCELFStreamer.cpp | 4 +- lib/MC/MCExpr.cpp | 4 + lib/MC/MCMachOStreamer.cpp | 48 +- lib/MC/MCNullStreamer.cpp | 8 +- lib/MC/MCObjectFileInfo.cpp | 8 +- lib/MC/MCObjectWriter.cpp | 34 - lib/MC/MCParser/AsmParser.cpp | 511 ++- lib/MC/MCParser/DarwinAsmParser.cpp | 83 +- lib/MC/MCParser/ELFAsmParser.cpp | 28 + lib/MC/MCPureStreamer.cpp | 4 +- lib/MC/MCRegisterInfo.cpp | 71 + lib/MC/MCSectionCOFF.cpp | 6 +- lib/MC/MCSectionELF.cpp | 12 +- lib/MC/MCStreamer.cpp | 94 +- lib/MC/MCSubtargetInfo.cpp | 33 +- lib/MC/MCSymbol.cpp | 4 +- lib/MC/MCWin64EH.cpp | 6 +- lib/MC/MachObjectWriter.cpp | 53 +- lib/MC/SubtargetFeature.cpp | 16 +- lib/MC/WinCOFFStreamer.cpp | 4 +- lib/Object/Archive.cpp | 6 +- lib/Object/COFFObjectFile.cpp | 36 + lib/Object/MachOObject.cpp | 13 + lib/Object/MachOObjectFile.cpp | 18 +- lib/Support/APFloat.cpp | 38 +- lib/Support/APInt.cpp | 4 +- lib/Support/CMakeLists.txt | 1 + lib/Support/CommandLine.cpp | 8 +- lib/Support/ConstantRange.cpp | 84 +- lib/Support/CrashRecoveryContext.cpp | 2 +- lib/Support/Debug.cpp | 10 +- lib/Support/Errno.cpp | 2 +- lib/Support/FileOutputBuffer.cpp | 148 + lib/Support/GraphWriter.cpp | 1 - lib/Support/Host.cpp | 198 +- lib/Support/Memory.cpp | 11 +- lib/Support/MemoryBuffer.cpp | 19 +- lib/Support/Mutex.cpp | 3 +- lib/Support/Path.cpp | 7 +- lib/Support/PathV2.cpp | 2 + lib/Support/SourceMgr.cpp | 121 +- lib/Support/StreamableMemoryObject.cpp | 2 +- lib/Support/StringMap.cpp | 2 +- lib/Support/StringRef.cpp | 44 +- lib/Support/TargetRegistry.cpp | 41 + lib/Support/ThreadLocal.cpp | 28 +- lib/Support/Triple.cpp | 68 +- lib/Support/Unix/Path.inc | 7 +- lib/Support/Unix/PathV2.inc | 134 +- lib/Support/Unix/Process.inc | 53 +- lib/Support/Unix/Signals.inc | 50 +- lib/Support/Unix/Unix.h | 10 +- lib/Support/Windows/Path.inc | 16 +- lib/Support/Windows/PathV2.inc | 72 +- lib/Support/Windows/Process.inc | 14 +- lib/Support/Windows/RWMutex.inc | 6 +- lib/Support/Windows/ThreadLocal.inc | 13 +- lib/Support/YAMLParser.cpp | 50 +- lib/Support/raw_ostream.cpp | 7 +- lib/TableGen/CMakeLists.txt | 1 + lib/TableGen/Main.cpp | 9 +- lib/TableGen/Record.cpp | 2 +- lib/TableGen/StringMatcher.cpp | 149 + lib/TableGen/TGParser.cpp | 246 +- lib/TableGen/TGParser.h | 20 +- lib/TableGen/TableGenBackend.cpp | 30 +- lib/Target/ARM/ARM.td | 10 +- lib/Target/ARM/ARMAsmPrinter.cpp | 106 +- lib/Target/ARM/ARMAsmPrinter.h | 6 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 540 ++- lib/Target/ARM/ARMBaseInstrInfo.h | 29 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 35 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 5 +- lib/Target/ARM/ARMCallingConv.td | 31 + lib/Target/ARM/ARMCodeEmitter.cpp | 76 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 76 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 15 +- lib/Target/ARM/ARMFastISel.cpp | 362 +- lib/Target/ARM/ARMFrameLowering.cpp | 26 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 417 ++- lib/Target/ARM/ARMISelLowering.cpp | 669 +++- lib/Target/ARM/ARMISelLowering.h | 30 +- lib/Target/ARM/ARMInstrFormats.td | 3 +- lib/Target/ARM/ARMInstrInfo.cpp | 3 +- lib/Target/ARM/ARMInstrInfo.td | 548 ++- lib/Target/ARM/ARMInstrNEON.td | 895 +---- lib/Target/ARM/ARMInstrThumb.td | 48 +- lib/Target/ARM/ARMInstrThumb2.td | 292 +- lib/Target/ARM/ARMInstrVFP.td | 93 +- lib/Target/ARM/ARMJITInfo.cpp | 6 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 12 +- lib/Target/ARM/ARMRegisterInfo.td | 32 +- lib/Target/ARM/ARMSchedule.td | 24 +- lib/Target/ARM/ARMScheduleA8.td | 50 +- lib/Target/ARM/ARMScheduleA9.td | 58 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 7 +- lib/Target/ARM/ARMSubtarget.cpp | 37 +- lib/Target/ARM/ARMSubtarget.h | 5 +- lib/Target/ARM/ARMTargetMachine.cpp | 24 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 43 +- lib/Target/ARM/ARMTargetObjectFile.h | 4 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 341 +- lib/Target/ARM/CMakeLists.txt | 2 + lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 199 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 140 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 177 +- lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 106 +- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 8 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 28 +- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 114 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 49 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 4 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 13 +- lib/Target/ARM/MLxExpansionPass.cpp | 4 +- lib/Target/ARM/README.txt | 21 + lib/Target/ARM/Thumb1InstrInfo.cpp | 8 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 11 +- lib/Target/ARM/Thumb1RegisterInfo.h | 3 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 8 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 54 +- lib/Target/ARM/Thumb2InstrInfo.h | 5 - lib/Target/ARM/Thumb2SizeReduction.cpp | 1 + lib/Target/CellSPU/CMakeLists.txt | 2 + lib/Target/CellSPU/README.txt | 14 + lib/Target/CellSPU/SPUAsmPrinter.cpp | 4 +- lib/Target/CellSPU/SPUHazardRecognizers.cpp | 6 - lib/Target/CellSPU/SPUHazardRecognizers.h | 6 +- lib/Target/CellSPU/SPUISelLowering.cpp | 73 +- lib/Target/CellSPU/SPUISelLowering.h | 9 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 94 +- lib/Target/CellSPU/SPUInstrInfo.td | 4 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 3 +- lib/Target/CellSPU/SPURegisterInfo.h | 7 +- lib/Target/CellSPU/SPUTargetMachine.cpp | 6 +- lib/Target/CppBackend/CPPBackend.cpp | 33 +- lib/Target/CppBackend/CPPTargetMachine.h | 4 +- lib/Target/Hexagon/CMakeLists.txt | 4 + lib/Target/Hexagon/Hexagon.h | 8 +- lib/Target/Hexagon/Hexagon.td | 14 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 58 +- lib/Target/Hexagon/HexagonCallingConv.td | 8 +- lib/Target/Hexagon/HexagonCallingConvLower.cpp | 7 +- lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp | 16 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 44 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 9 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 54 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 408 ++- lib/Target/Hexagon/HexagonISelLowering.h | 12 +- lib/Target/Hexagon/HexagonImmediates.td | 2 +- lib/Target/Hexagon/HexagonInstrFormats.td | 136 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 27 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 1253 ++++++- lib/Target/Hexagon/HexagonInstrInfo.h | 13 +- lib/Target/Hexagon/HexagonInstrInfo.td | 2285 +++++++----- lib/Target/Hexagon/HexagonInstrInfoV3.td | 55 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 3718 ++++++++++++++++---- lib/Target/Hexagon/HexagonInstrInfoV5.td | 626 ++++ lib/Target/Hexagon/HexagonIntrinsics.td | 1247 +++---- lib/Target/Hexagon/HexagonIntrinsicsDerived.td | 34 +- lib/Target/Hexagon/HexagonIntrinsicsV5.td | 395 +++ lib/Target/Hexagon/HexagonMCInst.h | 41 + lib/Target/Hexagon/HexagonMCInstLower.cpp | 2 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 647 ++++ lib/Target/Hexagon/HexagonRegisterInfo.cpp | 23 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 4 + lib/Target/Hexagon/HexagonRegisterInfo.td | 16 +- lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 2 +- lib/Target/Hexagon/HexagonSchedule.td | 37 +- lib/Target/Hexagon/HexagonScheduleV4.td | 41 +- lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp | 146 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 28 +- lib/Target/Hexagon/HexagonSubtarget.h | 8 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 28 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 3646 +++++++++++++++++++ .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 67 +- .../Hexagon/InstPrinter/HexagonInstPrinter.h | 13 +- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 31 +- lib/Target/LLVMBuild.txt | 2 +- lib/Target/MBlaze/CMakeLists.txt | 2 + lib/Target/MBlaze/MBlaze.td | 2 +- lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 12 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 60 +- lib/Target/MBlaze/MBlazeISelLowering.h | 8 +- lib/Target/MBlaze/MBlazeInstrInfo.cpp | 2 +- lib/Target/MBlaze/MBlazeInstrInfo.td | 4 +- lib/Target/MBlaze/MBlazeMCInstLower.h | 6 +- lib/Target/MBlaze/MBlazeSchedule.td | 5 - lib/Target/MBlaze/MBlazeSubtarget.cpp | 7 - lib/Target/MBlaze/MBlazeTargetMachine.cpp | 4 +- .../MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp | 1 + .../MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h | 2 + lib/Target/MSP430/CMakeLists.txt | 2 + lib/Target/MSP430/MSP430AsmPrinter.cpp | 2 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 56 +- lib/Target/MSP430/MSP430ISelLowering.h | 8 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 2 +- lib/Target/MSP430/MSP430InstrInfo.h | 1 - lib/Target/MSP430/MSP430InstrInfo.td | 6 +- lib/Target/MSP430/MSP430MCInstLower.h | 6 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 3 +- lib/Target/MSP430/MSP430RegisterInfo.h | 3 +- lib/Target/MSP430/MSP430RegisterInfo.td | 6 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 4 +- lib/Target/Mips/AsmParser/CMakeLists.txt | 3 +- lib/Target/Mips/CMakeLists.txt | 11 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 193 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 20 + lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 2 +- lib/Target/Mips/MCTargetDesc/Makefile | 1 + lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 39 +- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 7 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 44 +- lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 21 + lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 29 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 3 + lib/Target/Mips/Mips.h | 4 +- lib/Target/Mips/Mips.td | 4 + lib/Target/Mips/Mips16FrameLowering.cpp | 87 + lib/Target/Mips/Mips16FrameLowering.h | 43 + lib/Target/Mips/Mips16InstrFormats.td | 663 ++++ lib/Target/Mips/Mips16InstrInfo.cpp | 132 + lib/Target/Mips/Mips16InstrInfo.h | 76 + lib/Target/Mips/Mips16InstrInfo.td | 419 +++ lib/Target/Mips/Mips16RegisterInfo.cpp | 111 + lib/Target/Mips/Mips16RegisterInfo.h | 37 + lib/Target/Mips/Mips64InstrInfo.td | 169 +- lib/Target/Mips/MipsAsmPrinter.cpp | 254 +- lib/Target/Mips/MipsCallingConv.td | 52 + lib/Target/Mips/MipsCodeEmitter.cpp | 6 +- lib/Target/Mips/MipsCondMov.td | 94 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 75 +- lib/Target/Mips/MipsEmitGPRestore.cpp | 97 - lib/Target/Mips/MipsExpandPseudo.cpp | 123 - lib/Target/Mips/MipsFrameLowering.cpp | 244 +- lib/Target/Mips/MipsFrameLowering.h | 20 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 141 +- lib/Target/Mips/MipsISelLowering.cpp | 809 +++-- lib/Target/Mips/MipsISelLowering.h | 42 +- lib/Target/Mips/MipsInstrFPU.td | 184 +- lib/Target/Mips/MipsInstrFormats.td | 37 +- lib/Target/Mips/MipsInstrInfo.cpp | 289 +- lib/Target/Mips/MipsInstrInfo.h | 105 +- lib/Target/Mips/MipsInstrInfo.td | 530 +-- lib/Target/Mips/MipsJITInfo.cpp | 53 +- lib/Target/Mips/MipsJITInfo.h | 6 +- lib/Target/Mips/MipsLongBranch.cpp | 419 +++ lib/Target/Mips/MipsMCInstLower.cpp | 226 +- lib/Target/Mips/MipsMCInstLower.h | 9 +- lib/Target/Mips/MipsMachineFunction.cpp | 16 +- lib/Target/Mips/MipsMachineFunction.h | 24 +- lib/Target/Mips/MipsRegisterInfo.cpp | 145 +- lib/Target/Mips/MipsRegisterInfo.h | 15 +- lib/Target/Mips/MipsRegisterInfo.td | 236 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 210 ++ lib/Target/Mips/MipsSEFrameLowering.h | 44 + lib/Target/Mips/MipsSEInstrInfo.cpp | 320 ++ lib/Target/Mips/MipsSEInstrInfo.h | 86 + lib/Target/Mips/MipsSERegisterInfo.cpp | 138 + lib/Target/Mips/MipsSERegisterInfo.h | 39 + lib/Target/Mips/MipsSubtarget.cpp | 8 +- lib/Target/Mips/MipsSubtarget.h | 6 + lib/Target/Mips/MipsTargetMachine.cpp | 49 +- lib/Target/Mips/MipsTargetMachine.h | 121 +- lib/Target/NVPTX/CMakeLists.txt | 34 + lib/Target/NVPTX/InstPrinter/CMakeLists.txt | 7 + lib/Target/NVPTX/InstPrinter/LLVMBuild.txt | 23 + lib/Target/NVPTX/InstPrinter/Makefile | 15 + lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp | 1 + lib/Target/NVPTX/LLVMBuild.txt | 32 + lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt | 9 + lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt | 23 + lib/Target/NVPTX/MCTargetDesc/Makefile | 16 + lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 88 + lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 63 + lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h | 30 + .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 91 + lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h | 36 + lib/Target/NVPTX/Makefile | 23 + lib/Target/NVPTX/ManagedStringPool.h | 49 + lib/Target/NVPTX/NVPTX.h | 137 + lib/Target/NVPTX/NVPTX.td | 44 + lib/Target/NVPTX/NVPTXAllocaHoisting.cpp | 48 + lib/Target/NVPTX/NVPTXAllocaHoisting.h | 49 + lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2064 +++++++++++ lib/Target/NVPTX/NVPTXAsmPrinter.h | 315 ++ lib/Target/NVPTX/NVPTXFrameLowering.cpp | 76 + lib/Target/NVPTX/NVPTXFrameLowering.h | 40 + lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 683 ++++ lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 105 + lib/Target/NVPTX/NVPTXISelLowering.cpp | 1291 +++++++ lib/Target/NVPTX/NVPTXISelLowering.h | 144 + lib/Target/NVPTX/NVPTXInstrFormats.td | 43 + lib/Target/NVPTX/NVPTXInstrInfo.cpp | 326 ++ lib/Target/NVPTX/NVPTXInstrInfo.h | 83 + lib/Target/NVPTX/NVPTXInstrInfo.td | 2837 +++++++++++++++ lib/Target/NVPTX/NVPTXIntrinsics.td | 1675 +++++++++ lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 208 ++ lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 47 + lib/Target/NVPTX/NVPTXNumRegisters.h | 20 + lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 325 ++ lib/Target/NVPTX/NVPTXRegisterInfo.h | 92 + lib/Target/NVPTX/NVPTXRegisterInfo.td | 108 + lib/Target/NVPTX/NVPTXSection.h | 45 + lib/Target/NVPTX/NVPTXSplitBBatBar.cpp | 77 + lib/Target/NVPTX/NVPTXSplitBBatBar.h | 41 + lib/Target/NVPTX/NVPTXSubtarget.cpp | 57 + lib/Target/NVPTX/NVPTXSubtarget.h | 92 + lib/Target/NVPTX/NVPTXTargetMachine.cpp | 133 + lib/Target/NVPTX/NVPTXTargetMachine.h | 125 + lib/Target/NVPTX/NVPTXTargetObjectFile.h | 105 + lib/Target/NVPTX/NVPTXUtilities.cpp | 514 +++ lib/Target/NVPTX/NVPTXUtilities.h | 94 + lib/Target/NVPTX/NVPTXVector.td | 1481 ++++++++ lib/Target/NVPTX/NVPTXutil.cpp | 92 + lib/Target/NVPTX/NVPTXutil.h | 25 + lib/Target/NVPTX/TargetInfo/CMakeLists.txt | 7 + lib/Target/NVPTX/TargetInfo/LLVMBuild.txt | 23 + lib/Target/NVPTX/TargetInfo/Makefile | 15 + lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp | 23 + lib/Target/NVPTX/VectorElementize.cpp | 1248 +++++++ lib/Target/NVPTX/cl_common_defines.h | 125 + lib/Target/NVPTX/gen-register-defs.py | 202 ++ lib/Target/PTX/CMakeLists.txt | 32 - lib/Target/PTX/InstPrinter/CMakeLists.txt | 8 - lib/Target/PTX/InstPrinter/LLVMBuild.txt | 23 - lib/Target/PTX/InstPrinter/Makefile | 16 - lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp | 249 -- lib/Target/PTX/InstPrinter/PTXInstPrinter.h | 45 - lib/Target/PTX/LLVMBuild.txt | 32 - lib/Target/PTX/MCTargetDesc/CMakeLists.txt | 6 - lib/Target/PTX/MCTargetDesc/LLVMBuild.txt | 23 - lib/Target/PTX/MCTargetDesc/Makefile | 16 - lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h | 134 - lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp | 37 - lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h | 30 - lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp | 98 - lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h | 36 - lib/Target/PTX/Makefile | 23 - lib/Target/PTX/PTX.h | 43 - lib/Target/PTX/PTX.td | 141 - lib/Target/PTX/PTXAsmPrinter.cpp | 561 --- lib/Target/PTX/PTXAsmPrinter.h | 57 - lib/Target/PTX/PTXFPRoundingModePass.cpp | 181 - lib/Target/PTX/PTXFrameLowering.cpp | 24 - lib/Target/PTX/PTXFrameLowering.h | 44 - lib/Target/PTX/PTXISelDAGToDAG.cpp | 356 -- lib/Target/PTX/PTXISelLowering.cpp | 522 --- lib/Target/PTX/PTXISelLowering.h | 82 - lib/Target/PTX/PTXInstrFormats.td | 51 - lib/Target/PTX/PTXInstrInfo.cpp | 359 -- lib/Target/PTX/PTXInstrInfo.h | 133 - lib/Target/PTX/PTXInstrInfo.td | 1031 ------ lib/Target/PTX/PTXInstrLoadStore.td | 278 -- lib/Target/PTX/PTXIntrinsicInstrInfo.td | 110 - lib/Target/PTX/PTXMCAsmStreamer.cpp | 556 --- lib/Target/PTX/PTXMCInstLower.cpp | 32 - lib/Target/PTX/PTXMFInfoExtract.cpp | 85 - lib/Target/PTX/PTXMachineFunctionInfo.cpp | 14 - lib/Target/PTX/PTXMachineFunctionInfo.h | 202 -- lib/Target/PTX/PTXParamManager.cpp | 73 - lib/Target/PTX/PTXParamManager.h | 87 - lib/Target/PTX/PTXRegAlloc.cpp | 53 - lib/Target/PTX/PTXRegisterInfo.cpp | 38 - lib/Target/PTX/PTXRegisterInfo.h | 56 - lib/Target/PTX/PTXRegisterInfo.td | 36 - lib/Target/PTX/PTXSelectionDAGInfo.cpp | 150 - lib/Target/PTX/PTXSelectionDAGInfo.h | 53 - lib/Target/PTX/PTXSubtarget.cpp | 68 - lib/Target/PTX/PTXSubtarget.h | 131 - lib/Target/PTX/PTXTargetMachine.cpp | 165 - lib/Target/PTX/PTXTargetMachine.h | 104 - lib/Target/PTX/TargetInfo/CMakeLists.txt | 7 - lib/Target/PTX/TargetInfo/LLVMBuild.txt | 23 - lib/Target/PTX/TargetInfo/Makefile | 15 - lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp | 25 - lib/Target/PowerPC/CMakeLists.txt | 3 + lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 27 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 2 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 1 + lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 2 + lib/Target/PowerPC/PPC.h | 19 +- lib/Target/PowerPC/PPC.td | 43 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 10 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 36 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 724 ++++ lib/Target/PowerPC/PPCFrameLowering.cpp | 40 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 61 +- lib/Target/PowerPC/PPCISelLowering.cpp | 228 +- lib/Target/PowerPC/PPCISelLowering.h | 14 +- lib/Target/PowerPC/PPCInstr64Bit.td | 190 +- lib/Target/PowerPC/PPCInstrAltivec.td | 10 +- lib/Target/PowerPC/PPCInstrFormats.td | 6 + lib/Target/PowerPC/PPCInstrInfo.cpp | 137 +- lib/Target/PowerPC/PPCInstrInfo.h | 3 + lib/Target/PowerPC/PPCInstrInfo.td | 222 +- lib/Target/PowerPC/PPCJITInfo.cpp | 2 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 20 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 35 +- lib/Target/PowerPC/PPCRegisterInfo.h | 7 +- lib/Target/PowerPC/PPCRegisterInfo.td | 14 +- lib/Target/PowerPC/PPCSchedule.td | 39 +- lib/Target/PowerPC/PPCSchedule440.td | 29 +- lib/Target/PowerPC/PPCScheduleA2.td | 66 +- lib/Target/PowerPC/PPCScheduleG3.td | 1 + lib/Target/PowerPC/PPCScheduleG4.td | 1 + lib/Target/PowerPC/PPCScheduleG4Plus.td | 1 + lib/Target/PowerPC/PPCScheduleG5.td | 1 + lib/Target/PowerPC/PPCSubtarget.cpp | 62 +- lib/Target/PowerPC/PPCSubtarget.h | 8 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 27 +- lib/Target/PowerPC/README.txt | 1 - lib/Target/PowerPC/TargetInfo/Makefile | 2 +- lib/Target/README.txt | 6 + lib/Target/Sparc/CMakeLists.txt | 2 + lib/Target/Sparc/DelaySlotFiller.cpp | 11 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 4 +- lib/Target/Sparc/SparcFrameLowering.h | 5 +- lib/Target/Sparc/SparcISelLowering.cpp | 35 +- lib/Target/Sparc/SparcISelLowering.h | 7 +- lib/Target/Sparc/SparcInstrInfo.cpp | 12 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 3 - lib/Target/Sparc/SparcTargetMachine.cpp | 9 +- lib/Target/Sparc/SparcTargetMachine.h | 2 +- lib/Target/TargetData.cpp | 4 +- lib/Target/TargetInstrInfo.cpp | 60 +- lib/Target/TargetLibraryInfo.cpp | 105 +- lib/Target/TargetLoweringObjectFile.cpp | 2 +- lib/Target/TargetMachine.cpp | 54 +- lib/Target/TargetRegisterInfo.cpp | 146 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 162 +- lib/Target/X86/CMakeLists.txt | 2 + lib/Target/X86/Disassembler/X86Disassembler.cpp | 67 +- lib/Target/X86/Disassembler/X86Disassembler.h | 10 +- .../X86/Disassembler/X86DisassemblerDecoder.c | 16 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 74 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 25 +- lib/Target/X86/InstPrinter/X86InstComments.cpp | 34 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 71 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 24 +- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 68 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 1 + lib/Target/X86/Utils/X86ShuffleDecode.cpp | 90 +- lib/Target/X86/Utils/X86ShuffleDecode.h | 18 +- lib/Target/X86/X86.h | 7 +- lib/Target/X86/X86.td | 52 +- lib/Target/X86/X86AsmPrinter.cpp | 10 +- lib/Target/X86/X86AsmPrinter.h | 8 +- lib/Target/X86/X86COFFMachineModuleInfo.cpp | 1 - lib/Target/X86/X86COFFMachineModuleInfo.h | 4 +- lib/Target/X86/X86CallingConv.td | 13 +- lib/Target/X86/X86CodeEmitter.cpp | 786 ++++- lib/Target/X86/X86FastISel.cpp | 169 +- lib/Target/X86/X86FloatingPoint.cpp | 20 +- lib/Target/X86/X86FrameLowering.cpp | 167 +- lib/Target/X86/X86FrameLowering.h | 2 + lib/Target/X86/X86ISelDAGToDAG.cpp | 316 +- lib/Target/X86/X86ISelLowering.cpp | 2961 +++++++++------- lib/Target/X86/X86ISelLowering.h | 73 +- lib/Target/X86/X86InstrArithmetic.td | 6 +- lib/Target/X86/X86InstrBuilder.h | 16 +- lib/Target/X86/X86InstrCompiler.td | 24 +- lib/Target/X86/X86InstrControl.td | 48 +- lib/Target/X86/X86InstrExtension.td | 8 + lib/Target/X86/X86InstrFMA.td | 326 +- lib/Target/X86/X86InstrFPStack.td | 185 +- lib/Target/X86/X86InstrFormats.td | 33 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 46 +- lib/Target/X86/X86InstrInfo.cpp | 1099 +++++- lib/Target/X86/X86InstrInfo.h | 46 +- lib/Target/X86/X86InstrInfo.td | 586 +-- lib/Target/X86/X86InstrMMX.td | 421 ++- lib/Target/X86/X86InstrSSE.td | 1390 ++++---- lib/Target/X86/X86InstrSystem.td | 293 +- lib/Target/X86/X86InstrVMX.td | 8 +- lib/Target/X86/X86InstrXOP.td | 109 +- lib/Target/X86/X86JITInfo.h | 2 +- lib/Target/X86/X86MCInstLower.cpp | 110 +- lib/Target/X86/X86MCInstLower.h | 6 +- lib/Target/X86/X86MachineFunctionInfo.h | 20 +- lib/Target/X86/X86RegisterInfo.cpp | 111 +- lib/Target/X86/X86RegisterInfo.h | 14 +- lib/Target/X86/X86RegisterInfo.td | 111 +- lib/Target/X86/X86Relocations.h | 2 +- lib/Target/X86/X86Schedule.td | 218 +- lib/Target/X86/X86ScheduleAtom.td | 229 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 8 +- lib/Target/X86/X86Subtarget.cpp | 89 +- lib/Target/X86/X86Subtarget.h | 18 +- lib/Target/X86/X86TargetMachine.cpp | 23 +- lib/Target/X86/X86TargetObjectFile.cpp | 13 +- lib/Target/X86/X86TargetObjectFile.h | 10 +- lib/Target/X86/X86VZeroUpper.cpp | 6 +- lib/Target/XCore/CMakeLists.txt | 2 + lib/Target/XCore/XCoreAsmPrinter.cpp | 14 +- lib/Target/XCore/XCoreFrameLowering.cpp | 10 +- lib/Target/XCore/XCoreFrameLowering.h | 3 - lib/Target/XCore/XCoreISelLowering.cpp | 50 +- lib/Target/XCore/XCoreISelLowering.h | 11 +- lib/Target/XCore/XCoreInstrInfo.td | 16 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 10 +- lib/Target/XCore/XCoreRegisterInfo.h | 2 + lib/Target/XCore/XCoreTargetMachine.cpp | 2 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 14 +- lib/Transforms/IPO/CMakeLists.txt | 2 + lib/Transforms/IPO/DeadArgumentElimination.cpp | 8 +- lib/Transforms/IPO/ExtractGV.cpp | 20 +- lib/Transforms/IPO/GlobalDCE.cpp | 6 +- lib/Transforms/IPO/GlobalOpt.cpp | 221 +- lib/Transforms/IPO/Inliner.cpp | 21 +- lib/Transforms/IPO/LoopExtractor.cpp | 7 +- lib/Transforms/IPO/MergeFunctions.cpp | 14 +- lib/Transforms/IPO/PartialInlining.cpp | 5 +- lib/Transforms/IPO/StripSymbols.cpp | 7 +- lib/Transforms/InstCombine/CMakeLists.txt | 2 + lib/Transforms/InstCombine/InstCombine.h | 4 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 88 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 17 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 164 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 22 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 21 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 121 +- .../InstCombine/InstCombineMulDivRem.cpp | 5 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 23 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 76 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 29 + .../InstCombine/InstructionCombining.cpp | 250 +- .../Instrumentation/AddressSanitizer.cpp | 291 +- lib/Transforms/Instrumentation/BoundsChecking.cpp | 209 ++ lib/Transforms/Instrumentation/CMakeLists.txt | 3 + lib/Transforms/Instrumentation/GCOVProfiling.cpp | 120 +- lib/Transforms/Instrumentation/Instrumentation.cpp | 5 +- lib/Transforms/Instrumentation/PathProfiling.cpp | 2 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 267 +- lib/Transforms/Scalar/ADCE.cpp | 16 +- lib/Transforms/Scalar/CMakeLists.txt | 2 + lib/Transforms/Scalar/CodeGenPrepare.cpp | 226 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 131 +- lib/Transforms/Scalar/EarlyCSE.cpp | 80 +- lib/Transforms/Scalar/GVN.cpp | 391 +- lib/Transforms/Scalar/GlobalMerge.cpp | 6 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 74 +- lib/Transforms/Scalar/JumpThreading.cpp | 8 +- lib/Transforms/Scalar/LICM.cpp | 42 +- lib/Transforms/Scalar/LoopDeletion.cpp | 50 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 12 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 2 +- lib/Transforms/Scalar/LoopRotation.cpp | 7 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 264 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 4 +- lib/Transforms/Scalar/LowerAtomic.cpp | 6 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 178 +- lib/Transforms/Scalar/ObjCARC.cpp | 658 ++-- lib/Transforms/Scalar/Reassociate.cpp | 1426 +++++--- lib/Transforms/Scalar/Reg2Mem.cpp | 29 +- lib/Transforms/Scalar/SCCP.cpp | 2 +- lib/Transforms/Scalar/Scalar.cpp | 4 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 467 ++- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 76 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 189 +- lib/Transforms/Scalar/Sink.cpp | 174 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 12 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 33 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 9 +- lib/Transforms/Utils/BuildLibCalls.cpp | 160 +- lib/Transforms/Utils/CMakeLists.txt | 2 + lib/Transforms/Utils/CloneFunction.cpp | 2 +- lib/Transforms/Utils/CloneModule.cpp | 2 +- lib/Transforms/Utils/CodeExtractor.cpp | 356 +- lib/Transforms/Utils/InlineFunction.cpp | 22 +- lib/Transforms/Utils/Local.cpp | 41 +- lib/Transforms/Utils/LoopUnroll.cpp | 52 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 4 +- lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 50 +- lib/Transforms/Utils/LowerSwitch.cpp | 62 +- lib/Transforms/Utils/ModuleUtils.cpp | 2 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 4 +- lib/Transforms/Utils/SSAUpdater.cpp | 57 +- lib/Transforms/Utils/SimplifyCFG.cpp | 324 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 2 - lib/Transforms/Vectorize/BBVectorize.cpp | 813 ++++- lib/Transforms/Vectorize/CMakeLists.txt | 2 + lib/VMCore/AsmWriter.cpp | 80 +- lib/VMCore/Attributes.cpp | 32 +- lib/VMCore/AutoUpgrade.cpp | 219 +- lib/VMCore/CMakeLists.txt | 14 + lib/VMCore/ConstantFold.cpp | 69 +- lib/VMCore/Constants.cpp | 315 +- lib/VMCore/Core.cpp | 21 +- lib/VMCore/DIBuilder.cpp | 1019 ++++++ lib/VMCore/DebugInfo.cpp | 1168 ++++++ lib/VMCore/DebugLoc.cpp | 42 +- lib/VMCore/Dominators.cpp | 83 +- lib/VMCore/Function.cpp | 243 +- lib/VMCore/GCOV.cpp | 28 +- lib/VMCore/Globals.cpp | 12 +- lib/VMCore/IRBuilder.cpp | 14 +- lib/VMCore/Instruction.cpp | 53 +- lib/VMCore/Instructions.cpp | 40 +- lib/VMCore/Metadata.cpp | 165 +- lib/VMCore/Module.cpp | 162 +- lib/VMCore/PassManager.cpp | 23 +- lib/VMCore/Type.cpp | 30 +- lib/VMCore/TypeFinder.cpp | 148 + lib/VMCore/Value.cpp | 9 +- lib/VMCore/ValueTypes.cpp | 8 + lib/VMCore/Verifier.cpp | 488 +-- 814 files changed, 70634 insertions(+), 33867 deletions(-) delete mode 100644 lib/Analysis/DIBuilder.cpp delete mode 100644 lib/Analysis/DebugInfo.cpp create mode 100644 lib/CodeGen/EarlyIfConversion.cpp create mode 100644 lib/CodeGen/LiveRegMatrix.cpp create mode 100644 lib/CodeGen/LiveRegMatrix.h create mode 100644 lib/CodeGen/MachineTraceMetrics.cpp create mode 100644 lib/CodeGen/MachineTraceMetrics.h delete mode 100644 lib/CodeGen/RegisterClassInfo.h create mode 100644 lib/CodeGen/RegisterPressure.cpp delete mode 100644 lib/CodeGen/RenderMachineFunction.cpp delete mode 100644 lib/CodeGen/RenderMachineFunction.h create mode 100644 lib/MC/MCRegisterInfo.cpp create mode 100644 lib/Support/FileOutputBuffer.cpp create mode 100644 lib/TableGen/StringMatcher.cpp create mode 100644 lib/Target/Hexagon/HexagonInstrInfoV5.td create mode 100644 lib/Target/Hexagon/HexagonIntrinsicsV5.td create mode 100644 lib/Target/Hexagon/HexagonMCInst.h create mode 100644 lib/Target/Hexagon/HexagonNewValueJump.cpp create mode 100644 lib/Target/Hexagon/HexagonVLIWPacketizer.cpp create mode 100644 lib/Target/Mips/Mips16FrameLowering.cpp create mode 100644 lib/Target/Mips/Mips16FrameLowering.h create mode 100644 lib/Target/Mips/Mips16InstrFormats.td create mode 100644 lib/Target/Mips/Mips16InstrInfo.cpp create mode 100644 lib/Target/Mips/Mips16InstrInfo.h create mode 100644 lib/Target/Mips/Mips16InstrInfo.td create mode 100644 lib/Target/Mips/Mips16RegisterInfo.cpp create mode 100644 lib/Target/Mips/Mips16RegisterInfo.h delete mode 100644 lib/Target/Mips/MipsEmitGPRestore.cpp delete mode 100644 lib/Target/Mips/MipsExpandPseudo.cpp create mode 100644 lib/Target/Mips/MipsLongBranch.cpp create mode 100644 lib/Target/Mips/MipsSEFrameLowering.cpp create mode 100644 lib/Target/Mips/MipsSEFrameLowering.h create mode 100644 lib/Target/Mips/MipsSEInstrInfo.cpp create mode 100644 lib/Target/Mips/MipsSEInstrInfo.h create mode 100644 lib/Target/Mips/MipsSERegisterInfo.cpp create mode 100644 lib/Target/Mips/MipsSERegisterInfo.h create mode 100644 lib/Target/NVPTX/CMakeLists.txt create mode 100644 lib/Target/NVPTX/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/NVPTX/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/NVPTX/InstPrinter/Makefile create mode 100644 lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp create mode 100644 lib/Target/NVPTX/LLVMBuild.txt create mode 100644 lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/NVPTX/MCTargetDesc/Makefile create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp create mode 100644 lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h create mode 100644 lib/Target/NVPTX/Makefile create mode 100644 lib/Target/NVPTX/ManagedStringPool.h create mode 100644 lib/Target/NVPTX/NVPTX.h create mode 100644 lib/Target/NVPTX/NVPTX.td create mode 100644 lib/Target/NVPTX/NVPTXAllocaHoisting.cpp create mode 100644 lib/Target/NVPTX/NVPTXAllocaHoisting.h create mode 100644 lib/Target/NVPTX/NVPTXAsmPrinter.cpp create mode 100644 lib/Target/NVPTX/NVPTXAsmPrinter.h create mode 100644 lib/Target/NVPTX/NVPTXFrameLowering.cpp create mode 100644 lib/Target/NVPTX/NVPTXFrameLowering.h create mode 100644 lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp create mode 100644 lib/Target/NVPTX/NVPTXISelDAGToDAG.h create mode 100644 lib/Target/NVPTX/NVPTXISelLowering.cpp create mode 100644 lib/Target/NVPTX/NVPTXISelLowering.h create mode 100644 lib/Target/NVPTX/NVPTXInstrFormats.td create mode 100644 lib/Target/NVPTX/NVPTXInstrInfo.cpp create mode 100644 lib/Target/NVPTX/NVPTXInstrInfo.h create mode 100644 lib/Target/NVPTX/NVPTXInstrInfo.td create mode 100644 lib/Target/NVPTX/NVPTXIntrinsics.td create mode 100644 lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp create mode 100644 lib/Target/NVPTX/NVPTXLowerAggrCopies.h create mode 100644 lib/Target/NVPTX/NVPTXNumRegisters.h create mode 100644 lib/Target/NVPTX/NVPTXRegisterInfo.cpp create mode 100644 lib/Target/NVPTX/NVPTXRegisterInfo.h create mode 100644 lib/Target/NVPTX/NVPTXRegisterInfo.td create mode 100644 lib/Target/NVPTX/NVPTXSection.h create mode 100644 lib/Target/NVPTX/NVPTXSplitBBatBar.cpp create mode 100644 lib/Target/NVPTX/NVPTXSplitBBatBar.h create mode 100644 lib/Target/NVPTX/NVPTXSubtarget.cpp create mode 100644 lib/Target/NVPTX/NVPTXSubtarget.h create mode 100644 lib/Target/NVPTX/NVPTXTargetMachine.cpp create mode 100644 lib/Target/NVPTX/NVPTXTargetMachine.h create mode 100644 lib/Target/NVPTX/NVPTXTargetObjectFile.h create mode 100644 lib/Target/NVPTX/NVPTXUtilities.cpp create mode 100644 lib/Target/NVPTX/NVPTXUtilities.h create mode 100644 lib/Target/NVPTX/NVPTXVector.td create mode 100644 lib/Target/NVPTX/NVPTXutil.cpp create mode 100644 lib/Target/NVPTX/NVPTXutil.h create mode 100644 lib/Target/NVPTX/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/NVPTX/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/NVPTX/TargetInfo/Makefile create mode 100644 lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp create mode 100644 lib/Target/NVPTX/VectorElementize.cpp create mode 100644 lib/Target/NVPTX/cl_common_defines.h create mode 100644 lib/Target/NVPTX/gen-register-defs.py delete mode 100644 lib/Target/PTX/CMakeLists.txt delete mode 100644 lib/Target/PTX/InstPrinter/CMakeLists.txt delete mode 100644 lib/Target/PTX/InstPrinter/LLVMBuild.txt delete mode 100644 lib/Target/PTX/InstPrinter/Makefile delete mode 100644 lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp delete mode 100644 lib/Target/PTX/InstPrinter/PTXInstPrinter.h delete mode 100644 lib/Target/PTX/LLVMBuild.txt delete mode 100644 lib/Target/PTX/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/PTX/MCTargetDesc/LLVMBuild.txt delete mode 100644 lib/Target/PTX/MCTargetDesc/Makefile delete mode 100644 lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h delete mode 100644 lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp delete mode 100644 lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h delete mode 100644 lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp delete mode 100644 lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h delete mode 100644 lib/Target/PTX/Makefile delete mode 100644 lib/Target/PTX/PTX.h delete mode 100644 lib/Target/PTX/PTX.td delete mode 100644 lib/Target/PTX/PTXAsmPrinter.cpp delete mode 100644 lib/Target/PTX/PTXAsmPrinter.h delete mode 100644 lib/Target/PTX/PTXFPRoundingModePass.cpp delete mode 100644 lib/Target/PTX/PTXFrameLowering.cpp delete mode 100644 lib/Target/PTX/PTXFrameLowering.h delete mode 100644 lib/Target/PTX/PTXISelDAGToDAG.cpp delete mode 100644 lib/Target/PTX/PTXISelLowering.cpp delete mode 100644 lib/Target/PTX/PTXISelLowering.h delete mode 100644 lib/Target/PTX/PTXInstrFormats.td delete mode 100644 lib/Target/PTX/PTXInstrInfo.cpp delete mode 100644 lib/Target/PTX/PTXInstrInfo.h delete mode 100644 lib/Target/PTX/PTXInstrInfo.td delete mode 100644 lib/Target/PTX/PTXInstrLoadStore.td delete mode 100644 lib/Target/PTX/PTXIntrinsicInstrInfo.td delete mode 100644 lib/Target/PTX/PTXMCAsmStreamer.cpp delete mode 100644 lib/Target/PTX/PTXMCInstLower.cpp delete mode 100644 lib/Target/PTX/PTXMFInfoExtract.cpp delete mode 100644 lib/Target/PTX/PTXMachineFunctionInfo.cpp delete mode 100644 lib/Target/PTX/PTXMachineFunctionInfo.h delete mode 100644 lib/Target/PTX/PTXParamManager.cpp delete mode 100644 lib/Target/PTX/PTXParamManager.h delete mode 100644 lib/Target/PTX/PTXRegAlloc.cpp delete mode 100644 lib/Target/PTX/PTXRegisterInfo.cpp delete mode 100644 lib/Target/PTX/PTXRegisterInfo.h delete mode 100644 lib/Target/PTX/PTXRegisterInfo.td delete mode 100644 lib/Target/PTX/PTXSelectionDAGInfo.cpp delete mode 100644 lib/Target/PTX/PTXSelectionDAGInfo.h delete mode 100644 lib/Target/PTX/PTXSubtarget.cpp delete mode 100644 lib/Target/PTX/PTXSubtarget.h delete mode 100644 lib/Target/PTX/PTXTargetMachine.cpp delete mode 100644 lib/Target/PTX/PTXTargetMachine.h delete mode 100644 lib/Target/PTX/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/PTX/TargetInfo/LLVMBuild.txt delete mode 100644 lib/Target/PTX/TargetInfo/Makefile delete mode 100644 lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp create mode 100644 lib/Target/PowerPC/PPCCTRLoops.cpp create mode 100644 lib/Transforms/Instrumentation/BoundsChecking.cpp create mode 100644 lib/VMCore/DIBuilder.cpp create mode 100644 lib/VMCore/DebugInfo.cpp create mode 100644 lib/VMCore/TypeFinder.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 95c834b..3b6aab1 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -25,6 +25,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Pass.h" #include "llvm/BasicBlock.h" #include "llvm/Function.h" @@ -356,6 +359,86 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; } +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + return true; + } + + bool captured(Use *U) { + Instruction *I = cast(U->getUser()); + BasicBlock *BB = I->getParent(); + if (BeforeHere != I && + (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +// FIXME: this is really just shoring-up a deficiency in alias analysis. +// BasicAA isn't willing to spend linear time determining whether an alloca +// was captured before or after this particular call, while we are. However, +// with a smarter AA in place, this test is just wasting compile time. +AliasAnalysis::ModRefResult +AliasAnalysis::callCapturesBefore(const Instruction *I, + const AliasAnalysis::Location &MemLoc, + DominatorTree *DT) { + if (!DT || !TD) return AliasAnalysis::ModRef; + + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa(Object) || + isa(Object)) + return AliasAnalysis::ModRef; + + ImmutableCallSite CS(I); + if (!CS.getInstruction() || CS.getInstruction() == Object) + return AliasAnalysis::ModRef; + + CapturesBefore CB(I, DT); + llvm::PointerMayBeCaptured(Object, &CB); + if (CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) { + return AliasAnalysis::ModRef; + } + } + return AliasAnalysis::NoModRef; +} // AliasAnalysis destructor: DO NOT move this to the header file for // AliasAnalysis or else clients of the AliasAnalysis class may not depend on diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index f80e2fb..92e8906 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -501,7 +501,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { } // First, look up the PointerRec for this pointer. - PointerMapType::iterator I = PointerMap.find(PtrVal); + PointerMapType::iterator I = PointerMap.find_as(PtrVal); if (I == PointerMap.end()) return; // Noop // If we found one, remove the pointer from the alias set it is in. @@ -527,7 +527,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { AA.copyValue(From, To); // First, look up the PointerRec for this pointer. - PointerMapType::iterator I = PointerMap.find(From); + PointerMapType::iterator I = PointerMap.find_as(From); if (I == PointerMap.end()) return; // Noop assert(I->second->hasAliasSet() && "Dead entry?"); @@ -536,7 +536,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { if (Entry.hasAliasSet()) return; // Already in the tracker! // Add it to the alias set it aliases... - I = PointerMap.find(From); + I = PointerMap.find_as(From); AliasSet *AS = I->second->getAliasSet(*this); AS->addPointer(*this, Entry, I->second->getSize(), I->second->getTBAAInfo(), diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 20ecfd2..1d028c2 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -86,47 +86,10 @@ static bool isEscapeSource(const Value *V) { /// UnknownSize if unknown. static uint64_t getObjectSize(const Value *V, const TargetData &TD, bool RoundToAlign = false) { - Type *AccessTy; - unsigned Align; - if (const GlobalVariable *GV = dyn_cast(V)) { - if (!GV->hasDefinitiveInitializer()) - return AliasAnalysis::UnknownSize; - AccessTy = GV->getType()->getElementType(); - Align = GV->getAlignment(); - } else if (const AllocaInst *AI = dyn_cast(V)) { - if (!AI->isArrayAllocation()) - AccessTy = AI->getType()->getElementType(); - else - return AliasAnalysis::UnknownSize; - Align = AI->getAlignment(); - } else if (const CallInst* CI = extractMallocCall(V)) { - if (!RoundToAlign && !isArrayMalloc(V, &TD)) - // The size is the argument to the malloc call. - if (const ConstantInt* C = dyn_cast(CI->getArgOperand(0))) - return C->getZExtValue(); - return AliasAnalysis::UnknownSize; - } else if (const Argument *A = dyn_cast(V)) { - if (A->hasByValAttr()) { - AccessTy = cast(A->getType())->getElementType(); - Align = A->getParamAlignment(); - } else { - return AliasAnalysis::UnknownSize; - } - } else { - return AliasAnalysis::UnknownSize; - } - - if (!AccessTy->isSized()) - return AliasAnalysis::UnknownSize; - - uint64_t Size = TD.getTypeAllocSize(AccessTy); - // If there is an explicitly specified alignment, and we need to - // take alignment into account, round up the size. (If the alignment - // is implicit, getTypeAllocSize is sufficient.) - if (RoundToAlign && Align) - Size = RoundUpToAlignment(Size, Align); - - return Size; + uint64_t Size; + if (getObjectSize(V, Size, &TD, RoundToAlign)) + return Size; + return AliasAnalysis::UnknownSize; } /// isObjectSmallerThan - Return true if we can prove that the object specified diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 2e3ec8b..96e68b4 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -12,9 +12,7 @@ add_llvm_library(LLVMAnalysis CaptureTracking.cpp CodeMetrics.cpp ConstantFolding.cpp - DIBuilder.cpp DbgInfoPrinter.cpp - DebugInfo.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp @@ -59,4 +57,6 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) +add_dependencies(LLVMAnalysis intrinsics_gen) + add_subdirectory(IPA) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index dd33eeb..974b906 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -34,7 +34,7 @@ namespace { bool captured(Use *U) { if (isa(U->getUser()) && !ReturnCaptures) - return false; + return false; Captured = true; return true; diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 316e7bc9..acda34b 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -22,7 +22,11 @@ using namespace llvm; /// callIsSmall - If a call is likely to lower to a single target instruction, /// or is otherwise deemed small return true. /// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(const Function *F) { +bool llvm::callIsSmall(ImmutableCallSite CS) { + if (isa(CS.getInstruction())) + return true; + + const Function *F = CS.getCalledFunction(); if (!F) return false; if (F->hasLocalLinkage()) return false; @@ -79,8 +83,24 @@ bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { if (const CastInst *CI = dyn_cast(I)) { // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa(CI) || isa(CI)) + if (CI->isLosslessCast()) + return true; + + Value *Op = CI->getOperand(0); + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (isa(CI) && TD && + TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && + Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) return true; + + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (isa(CI) && TD && + TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && + Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) + return true; + // trunc to a native type is free (assuming the target has compare and // shift-right of the same width). if (TD && isa(CI) && @@ -126,7 +146,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, isRecursive = true; } - if (!isa(II) && !callIsSmall(CS.getCalledFunction())) { + if (!callIsSmall(CS)) { // Each argument to a call takes on average one instruction to set up. NumInsts += CS.arg_size(); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 783c32e..f5e619c 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -358,17 +358,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, NumElts = AT->getNumElements(); else NumElts = cast(C->getType())->getNumElements(); - + for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, TD)) return false; - if (EltSize >= BytesLeft) + + uint64_t BytesWritten = EltSize - Offset; + assert(BytesWritten <= EltSize && "Not indexing into this element?"); + if (BytesWritten >= BytesLeft) return true; - + Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; + BytesLeft -= BytesWritten; + CurPtr += BytesWritten; } return true; } @@ -600,6 +603,22 @@ static Constant *CastGEPIndices(ArrayRef Ops, return C; } +/// Strip the pointer casts, but preserve the address space information. +static Constant* StripPtrCastKeepAS(Constant* Ptr) { + assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); + PointerType *OldPtrTy = cast(Ptr->getType()); + Ptr = cast(Ptr->stripPointerCasts()); + PointerType *NewPtrTy = cast(Ptr->getType()); + + // Preserve the address space number of the pointer. + if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { + NewPtrTy = NewPtrTy->getElementType()->getPointerTo( + OldPtrTy->getAddressSpace()); + Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy); + } + return Ptr; +} + /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, @@ -636,13 +655,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, } return 0; } - + unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), makeArrayRef((Value **)Ops.data() + 1, Ops.size() - 1))); - Ptr = cast(Ptr->stripPointerCasts()); + Ptr = StripPtrCastKeepAS(Ptr); // If this is a GEP of a GEP, fold it all into a single GEP. while (GEPOperator *GEP = dyn_cast(Ptr)) { @@ -661,7 +680,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, Ptr = cast(GEP->getOperand(0)); Offset += APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), NestedOps)); - Ptr = cast(Ptr->stripPointerCasts()); + Ptr = StripPtrCastKeepAS(Ptr); } // If the base value for this address is a literal integer value, fold the @@ -780,14 +799,21 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // all operands are constants. if (isa(Incoming)) continue; - // If the incoming value is not a constant, or is a different constant to - // the one we saw previously, then give up. + // If the incoming value is not a constant, then give up. Constant *C = dyn_cast(Incoming); - if (!C || (CommonValue && C != CommonValue)) + if (!C) + return 0; + // Fold the PHI's operands. + if (ConstantExpr *NewC = dyn_cast(C)) + C = ConstantFoldConstantExpression(NewC, TD, TLI); + // If the incoming value is a different constant to + // the one we saw previously, then give up. + if (CommonValue && C != CommonValue) return 0; CommonValue = C; } + // If we reach here, all incoming values are the same constant or undef. return CommonValue ? CommonValue : UndefValue::get(PN->getType()); } @@ -795,12 +821,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // Scan the operand list, checking to see if they are all constants, if so, // hand off to ConstantFoldInstOperands. SmallVector Ops; - for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (Constant *Op = dyn_cast(*i)) - Ops.push_back(Op); - else + for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { + Constant *Op = dyn_cast(*i); + if (!Op) return 0; // All operands not constant! + // Fold the Instruction's operands. + if (ConstantExpr *NewCE = dyn_cast(Op)) + Op = ConstantFoldConstantExpression(NewCE, TD, TLI); + + Ops.push_back(Op); + } + if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], TD, TLI); diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp deleted file mode 100644 index 85913b1..0000000 --- a/lib/Analysis/DIBuilder.cpp +++ /dev/null @@ -1,1015 +0,0 @@ -//===--- DIBuilder.cpp - Debug Information Builder ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the DIBuilder. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/DIBuilder.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Constants.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" - -using namespace llvm; -using namespace llvm::dwarf; - -static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { - assert((Tag & LLVMDebugVersionMask) == 0 && - "Tag too large for debug encoding!"); - return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion); -} - -DIBuilder::DIBuilder(Module &m) - : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0), - TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0), - ValueFn(0) -{} - -/// finalize - Construct any deferred debug info descriptors. -void DIBuilder::finalize() { - DIArray Enums = getOrCreateArray(AllEnumTypes); - DIType(TempEnumTypes).replaceAllUsesWith(Enums); - - DIArray RetainTypes = getOrCreateArray(AllRetainTypes); - DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); - - DIArray SPs = getOrCreateArray(AllSubprograms); - DIType(TempSubprograms).replaceAllUsesWith(SPs); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) { - SmallVector Variables; - for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii) - Variables.push_back(NMD->getOperand(ii)); - if (MDNode *Temp = SP.getVariablesNodes()) { - DIArray AV = getOrCreateArray(Variables); - DIType(Temp).replaceAllUsesWith(AV); - } - NMD->eraseFromParent(); - } - } - - DIArray GVs = getOrCreateArray(AllGVs); - DIType(TempGVs).replaceAllUsesWith(GVs); -} - -/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return -/// N. -static MDNode *getNonCompileUnitScope(MDNode *N) { - if (DIDescriptor(N).isCompileUnit()) - return NULL; - return N; -} - -/// createCompileUnit - A CompileUnit provides an anchor for all debugging -/// information generated during this instance of compilation. -void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, - StringRef Directory, StringRef Producer, - bool isOptimized, StringRef Flags, - unsigned RunTimeVer) { - assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || - (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && - "Invalid Language tag"); - assert(!Filename.empty() && - "Unable to create compile unit without filename"); - Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - TempEnumTypes = MDNode::getTemporary(VMContext, TElts); - Value *THElts[] = { TempEnumTypes }; - MDNode *EnumHolder = MDNode::get(VMContext, THElts); - - TempRetainTypes = MDNode::getTemporary(VMContext, TElts); - Value *TRElts[] = { TempRetainTypes }; - MDNode *RetainHolder = MDNode::get(VMContext, TRElts); - - TempSubprograms = MDNode::getTemporary(VMContext, TElts); - Value *TSElts[] = { TempSubprograms }; - MDNode *SPHolder = MDNode::get(VMContext, TSElts); - - TempGVs = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { TempGVs }; - MDNode *GVHolder = MDNode::get(VMContext, TVElts); - - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - ConstantInt::get(Type::getInt32Ty(VMContext), Lang), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - MDString::get(VMContext, Producer), - // Deprecate isMain field. - ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - MDString::get(VMContext, Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), - EnumHolder, - RetainHolder, - SPHolder, - GVHolder - }; - TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); - - // Create a named metadata so that it is easier to find cu in a module. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); - NMD->addOperand(TheCU); -} - -/// createFile - Create a file descriptor to hold debugging information -/// for a file. -DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { - assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); - assert(!Filename.empty() && "Unable to create file without name"); - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_file_type), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - NULL // TheCU - }; - return DIFile(MDNode::get(VMContext, Elts)); -} - -/// createEnumerator - Create a single enumerator value. -DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { - assert(!Name.empty() && "Unable to create enumerator without name"); - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), - MDString::get(VMContext, Name), - ConstantInt::get(Type::getInt64Ty(VMContext), Val) - }; - return DIEnumerator(MDNode::get(VMContext, Elts)); -} - -/// createNullPtrType - Create C++0x nullptr type. -DIType DIBuilder::createNullPtrType(StringRef Name) { - assert(!Name.empty() && "Unable to create type without name"); - // nullptr is encoded in DIBasicType format. Line number, filename, - // ,size, alignment, offset and flags are always empty here. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), - NULL, //TheCU, - MDString::get(VMContext, Name), - NULL, // Filename - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Encoding - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createBasicType - Create debugging information entry for a basic -/// type, e.g 'char'. -DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, - uint64_t AlignInBits, - unsigned Encoding) { - assert(!Name.empty() && "Unable to create type without name"); - // Basic types are encoded in DIBasicType format. Line number, filename, - // offset and flags are always empty here. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_base_type), - NULL, //TheCU, - MDString::get(VMContext, Name), - NULL, // Filename - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; - ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createQualifiedType - Create debugging information entry for a qualified -/// type, e.g. 'const int'. -DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { - // Qualified types are encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, Tag), - NULL, //TheCU, - MDString::get(VMContext, StringRef()), // Empty name. - NULL, // Filename - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - FromTy - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createPointerType - Create debugging information entry for a pointer. -DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, - uint64_t AlignInBits, StringRef Name) { - // Pointer types are encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), - NULL, //TheCU, - MDString::get(VMContext, Name), - NULL, // Filename - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - PointeeTy - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createReferenceType - Create debugging information entry for a reference. -DIType DIBuilder::createReferenceType(DIType RTy) { - assert(RTy.Verify() && "Unable to create reference type"); - // References are encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), - NULL, // TheCU, - NULL, // Name - NULL, // Filename - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - RTy - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createTypedef - Create debugging information entry for a typedef. -DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, - unsigned LineNo, DIDescriptor Context) { - // typedefs are encoded in DIDerivedType format. - assert(Ty.Verify() && "Invalid typedef type!"); - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_typedef), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - Ty - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createFriend - Create debugging information entry for a 'friend'. -DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { - // typedefs are encoded in DIDerivedType format. - assert(Ty.Verify() && "Invalid type!"); - assert(FriendTy.Verify() && "Invalid friend type!"); - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_friend), - Ty, - NULL, // Name - Ty.getFile(), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - FriendTy - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createInheritance - Create debugging information entry to establish -/// inheritance relationship between two types. -DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, - uint64_t BaseOffset, unsigned Flags) { - assert(Ty.Verify() && "Unable to create inheritance"); - // TAG_inheritance is encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), - Ty, - NULL, // Name - Ty.getFile(), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size - ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align - ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - BaseTy - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createMemberType - Create debugging information entry for a member. -DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty) { - // TAG_member is encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_member), - getNonCompileUnitScope(Scope), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Ty - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createObjCIVar - Create debugging information entry for Objective-C -/// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty, StringRef PropertyName, - StringRef GetterName, StringRef SetterName, - unsigned PropertyAttributes) { - // TAG_member is encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_member), - getNonCompileUnitScope(File), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Ty, - MDString::get(VMContext, PropertyName), - MDString::get(VMContext, GetterName), - MDString::get(VMContext, SetterName), - ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createObjCIVar - Create debugging information entry for Objective-C -/// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty, MDNode *PropertyNode) { - // TAG_member is encoded in DIDerivedType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_member), - getNonCompileUnitScope(File), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Ty, - PropertyNode - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createObjCProperty - Create debugging information entry for Objective-C -/// property. -DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, - DIFile File, unsigned LineNumber, - StringRef GetterName, - StringRef SetterName, - unsigned PropertyAttributes, - DIType Ty) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - MDString::get(VMContext, GetterName), - MDString::get(VMContext, SetterName), - ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes), - Ty - }; - return DIObjCProperty(MDNode::get(VMContext, Elts)); -} - -/// createClassType - Create debugging information entry for a class. -DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType DerivedFrom, DIArray Elements, - MDNode *VTableHolder, MDNode *TemplateParams) { - // TAG_class_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_class_type), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - VTableHolder, - TemplateParams - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createTemplateTypeParameter - Create debugging information for template -/// type parameter. -DITemplateTypeParameter -DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, - DIType Ty, MDNode *File, unsigned LineNo, - unsigned ColumnNo) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - Ty, - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) - }; - return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); -} - -/// createTemplateValueParameter - Create debugging information for template -/// value parameter. -DITemplateValueParameter -DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, - DIType Ty, uint64_t Val, - MDNode *File, unsigned LineNo, - unsigned ColumnNo) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - Ty, - ConstantInt::get(Type::getInt64Ty(VMContext), Val), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) - }; - return DITemplateValueParameter(MDNode::get(VMContext, Elts)); -} - -/// createStructType - Create debugging information entry for a struct. -DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - unsigned Flags, DIArray Elements, - unsigned RunTimeLang) { - // TAG_structure_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - NULL, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createUnionType - Create debugging information entry for an union. -DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, - DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, unsigned Flags, - DIArray Elements, unsigned RunTimeLang) { - // TAG_union_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_union_type), - getNonCompileUnitScope(Scope), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - NULL, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createSubroutineType - Create subroutine type. -DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { - // TAG_subroutine_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - MDString::get(VMContext, ""), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - ParameterTypes, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createEnumerationType - Create debugging information entry for an -/// enumeration. -DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - DIArray Elements) { - // TAG_enumeration_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), - getNonCompileUnitScope(Scope), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - MDNode *Node = MDNode::get(VMContext, Elts); - AllEnumTypes.push_back(Node); - return DIType(Node); -} - -/// createArrayType - Create debugging information entry for an array. -DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { - // TAG_array_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_array_type), - NULL, //TheCU, - MDString::get(VMContext, ""), - NULL, //TheCU, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), Size), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Ty, - Subscripts, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createVectorType - Create debugging information entry for a vector. -DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { - // TAG_vector_type is encoded in DICompositeType format. - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), - NULL, //TheCU, - MDString::get(VMContext, ""), - NULL, //TheCU, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), Size), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Ty, - Subscripts, - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - return DIType(MDNode::get(VMContext, Elts)); -} - -/// createArtificialType - Create a new DIType with "artificial" flag set. -DIType DIBuilder::createArtificialType(DIType Ty) { - if (Ty.isArtificial()) - return Ty; - - SmallVector Elts; - MDNode *N = Ty; - assert (N && "Unexpected input DIType!"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - Elts.push_back(V); - else - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - } - - unsigned CurFlags = Ty.getFlags(); - CurFlags = CurFlags | DIType::FlagArtificial; - - // Flags are stored at this slot. - Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - - return DIType(MDNode::get(VMContext, Elts)); -} - -/// retainType - Retain DIType in a module even if it is not referenced -/// through debug info anchors. -void DIBuilder::retainType(DIType T) { - AllRetainTypes.push_back(T); -} - -/// createUnspecifiedParameter - Create unspeicified type descriptor -/// for the subroutine type. -DIDescriptor DIBuilder::createUnspecifiedParameter() { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) - }; - return DIDescriptor(MDNode::get(VMContext, Elts)); -} - -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType() { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType(DIFile F) { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(VMContext, DW_TAG_base_type), - TheCU, - NULL, - F - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - -/// createForwardDecl - Create a temporary forward-declared type that -/// can be RAUW'd if the full type is seen. -DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIFile F, - unsigned Line, unsigned RuntimeLang) { - // Create a temporary MDNode. - Value *Elts[] = { - GetTagConstant(VMContext, Tag), - NULL, // TheCU - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), Line), - // To ease transition include sizes etc of 0. - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), - DIDescriptor::FlagFwdDecl), - NULL, - DIArray(), - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - -/// getOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::getOrCreateArray(ArrayRef Elements) { - if (Elements.empty()) { - Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, Null)); - } - return DIArray(MDNode::get(VMContext, Elements)); -} - -/// getOrCreateSubrange - Create a descriptor for a value range. This -/// implicitly uniques the values returned. -DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), - ConstantInt::get(Type::getInt64Ty(VMContext), Lo), - ConstantInt::get(Type::getInt64Ty(VMContext), Hi) - }; - - return DISubrange(MDNode::get(VMContext, Elts)); -} - -/// createGlobalVariable - Create a new descriptor for the specified global. -DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, llvm::Value *Val) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - NULL, // TheCU, - MDString::get(VMContext, Name), - MDString::get(VMContext, Name), - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ - Val - }; - MDNode *Node = MDNode::get(VMContext, Elts); - AllGVs.push_back(Node); - return DIGlobalVariable(Node); -} - -/// createStaticVariable - Create a new descriptor for the specified static -/// variable. -DIGlobalVariable DIBuilder:: -createStaticVariable(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, llvm::Value *Val) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - MDString::get(VMContext, Name), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ - Val - }; - MDNode *Node = MDNode::get(VMContext, Elts); - AllGVs.push_back(Node); - return DIGlobalVariable(Node); -} - -/// createVariable - Create a new descriptor for the specified variable. -DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, - StringRef Name, DIFile File, - unsigned LineNo, DIType Ty, - bool AlwaysPreserve, unsigned Flags, - unsigned ArgNo) { - Value *Elts[] = { - GetTagConstant(VMContext, Tag), - getNonCompileUnitScope(Scope), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), - Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Constant::getNullValue(Type::getInt32Ty(VMContext)), - }; - MDNode *Node = MDNode::get(VMContext, Elts); - if (AlwaysPreserve) { - // The optimizer may remove local variable. If there is an interest - // to preserve variable info in such situation then stash it in a - // named mdnode. - DISubprogram Fn(getDISubprogram(Scope)); - NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); - FnLocals->addOperand(Node); - } - return DIVariable(Node); -} - -/// createComplexVariable - Create a new descriptor for the specified variable -/// which has a complex address expression for its address. -DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, - StringRef Name, DIFile F, - unsigned LineNo, - DIType Ty, ArrayRef Addr, - unsigned ArgNo) { - SmallVector Elts; - Elts.push_back(GetTagConstant(VMContext, Tag)); - Elts.push_back(getNonCompileUnitScope(Scope)), - Elts.push_back(MDString::get(VMContext, Name)); - Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), - (LineNo | (ArgNo << 24)))); - Elts.push_back(Ty); - Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); - Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); - Elts.append(Addr.begin(), Addr.end()); - - return DIVariable(MDNode::get(VMContext, Elts)); -} - -/// createFunction - Create a new descriptor for the specified function. -DISubprogram DIBuilder::createFunction(DIDescriptor Context, - StringRef Name, - StringRef LinkageName, - DIFile File, unsigned LineNo, - DIType Ty, - bool isLocalToUnit, bool isDefinition, - unsigned ScopeLine, - unsigned Flags, bool isOptimized, - Function *Fn, - MDNode *TParams, - MDNode *Decl) { - Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Temp = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { Temp }; - MDNode *THolder = MDNode::get(VMContext, TVElts); - - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - MDString::get(VMContext, Name), - MDString::get(VMContext, LinkageName), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn, - TParams, - Decl, - THolder, - ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) - }; - MDNode *Node = MDNode::get(VMContext, Elts); - - // Create a named metadata so that we do not lose this mdnode. - AllSubprograms.push_back(Node); - return DISubprogram(Node); -} - -/// createMethod - Create a new descriptor for the specified C++ method. -DISubprogram DIBuilder::createMethod(DIDescriptor Context, - StringRef Name, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, - bool isLocalToUnit, - bool isDefinition, - unsigned VK, unsigned VIndex, - MDNode *VTableHolder, - unsigned Flags, - bool isOptimized, - Function *Fn, - MDNode *TParam) { - Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Temp = MDNode::getTemporary(VMContext, TElts); - Value *TVElts[] = { Temp }; - MDNode *THolder = MDNode::get(VMContext, TVElts); - - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - getNonCompileUnitScope(Context), - MDString::get(VMContext, Name), - MDString::get(VMContext, Name), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), - ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - VTableHolder, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn, - TParam, - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - THolder, - // FIXME: Do we want to use a different scope lines? - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) - }; - MDNode *Node = MDNode::get(VMContext, Elts); - return DISubprogram(Node); -} - -/// createNameSpace - This creates new descriptor for a namespace -/// with the specified parent scope. -DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNo) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_namespace), - getNonCompileUnitScope(Scope), - MDString::get(VMContext, Name), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) - }; - return DINameSpace(MDNode::get(VMContext, Elts)); -} - -/// createLexicalBlockFile - This creates a new MDNode that encapsulates -/// an existing scope with a new filename. -DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, - DIFile File) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), - Scope, - File - }; - return DILexicalBlockFile(MDNode::get(VMContext, Elts)); -} - -DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, - unsigned Line, unsigned Col) { - // Defeat MDNode uniqing for lexical blocks by using unique id. - static unsigned int unique_id = 0; - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), - getNonCompileUnitScope(Scope), - ConstantInt::get(Type::getInt32Ty(VMContext), Line), - ConstantInt::get(Type::getInt32Ty(VMContext), Col), - File, - ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) - }; - return DILexicalBlock(MDNode::get(VMContext, Elts)); -} - -/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, - Instruction *InsertBefore) { - assert(Storage && "no storage passed to dbg.declare"); - assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; - return CallInst::Create(DeclareFn, Args, "", InsertBefore); -} - -/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, - BasicBlock *InsertAtEnd) { - assert(Storage && "no storage passed to dbg.declare"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; - - // If this block already has a terminator then insert this intrinsic - // before the terminator. - if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return CallInst::Create(DeclareFn, Args, "", T); - else - return CallInst::Create(DeclareFn, Args, "", InsertAtEnd); -} - -/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable VarInfo, - Instruction *InsertBefore) { - assert(V && "no value passed to dbg.value"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), V), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - VarInfo }; - return CallInst::Create(ValueFn, Args, "", InsertBefore); -} - -/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable VarInfo, - BasicBlock *InsertAtEnd) { - assert(V && "no value passed to dbg.value"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), V), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - VarInfo }; - return CallInst::Create(ValueFn, Args, "", InsertAtEnd); -} diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index cd832ab..41cd34c 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -16,14 +16,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp deleted file mode 100644 index f61a8f3..0000000 --- a/lib/Analysis/DebugInfo.cpp +++ /dev/null @@ -1,1229 +0,0 @@ -//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the helper classes used to build and interpret debug -// information in LLVM IR form. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; -using namespace llvm::dwarf; - -//===----------------------------------------------------------------------===// -// DIDescriptor -//===----------------------------------------------------------------------===// - -DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { -} - -StringRef -DIDescriptor::getStringField(unsigned Elt) const { - if (DbgNode == 0) - return StringRef(); - - if (Elt < DbgNode->getNumOperands()) - if (MDString *MDS = dyn_cast_or_null(DbgNode->getOperand(Elt))) - return MDS->getString(); - - return StringRef(); -} - -uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { - if (DbgNode == 0) - return 0; - - if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI = dyn_cast_or_null(DbgNode->getOperand(Elt))) - return CI->getZExtValue(); - - return 0; -} - -DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { - if (DbgNode == 0) - return DIDescriptor(); - - if (Elt < DbgNode->getNumOperands()) - return - DIDescriptor(dyn_cast_or_null(DbgNode->getOperand(Elt))); - return DIDescriptor(); -} - -GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { - if (DbgNode == 0) - return 0; - - if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; -} - -Constant *DIDescriptor::getConstantField(unsigned Elt) const { - if (DbgNode == 0) - return 0; - - if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; -} - -Function *DIDescriptor::getFunctionField(unsigned Elt) const { - if (DbgNode == 0) - return 0; - - if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; -} - -unsigned DIVariable::getNumAddrElements() const { - if (getVersion() <= llvm::LLVMDebugVersion8) - return DbgNode->getNumOperands()-6; - if (getVersion() == llvm::LLVMDebugVersion9) - return DbgNode->getNumOperands()-7; - return DbgNode->getNumOperands()-8; -} - -/// getInlinedAt - If this variable is inlined then return inline location. -MDNode *DIVariable::getInlinedAt() const { - if (getVersion() <= llvm::LLVMDebugVersion9) - return NULL; - return dyn_cast_or_null(DbgNode->getOperand(7)); -} - -//===----------------------------------------------------------------------===// -// Predicates -//===----------------------------------------------------------------------===// - -/// isBasicType - Return true if the specified tag is legal for -/// DIBasicType. -bool DIDescriptor::isBasicType() const { - if (!DbgNode) return false; - switch (getTag()) { - case dwarf::DW_TAG_base_type: - case dwarf::DW_TAG_unspecified_type: - return true; - default: - return false; - } -} - -/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. -bool DIDescriptor::isDerivedType() const { - if (!DbgNode) return false; - switch (getTag()) { - case dwarf::DW_TAG_typedef: - case dwarf::DW_TAG_pointer_type: - case dwarf::DW_TAG_reference_type: - case dwarf::DW_TAG_const_type: - case dwarf::DW_TAG_volatile_type: - case dwarf::DW_TAG_restrict_type: - case dwarf::DW_TAG_member: - case dwarf::DW_TAG_inheritance: - case dwarf::DW_TAG_friend: - return true; - default: - // CompositeTypes are currently modelled as DerivedTypes. - return isCompositeType(); - } -} - -/// isCompositeType - Return true if the specified tag is legal for -/// DICompositeType. -bool DIDescriptor::isCompositeType() const { - if (!DbgNode) return false; - switch (getTag()) { - case dwarf::DW_TAG_array_type: - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_enumeration_type: - case dwarf::DW_TAG_vector_type: - case dwarf::DW_TAG_subroutine_type: - case dwarf::DW_TAG_class_type: - return true; - default: - return false; - } -} - -/// isVariable - Return true if the specified tag is legal for DIVariable. -bool DIDescriptor::isVariable() const { - if (!DbgNode) return false; - switch (getTag()) { - case dwarf::DW_TAG_auto_variable: - case dwarf::DW_TAG_arg_variable: - case dwarf::DW_TAG_return_variable: - return true; - default: - return false; - } -} - -/// isType - Return true if the specified tag is legal for DIType. -bool DIDescriptor::isType() const { - return isBasicType() || isCompositeType() || isDerivedType(); -} - -/// isSubprogram - Return true if the specified tag is legal for -/// DISubprogram. -bool DIDescriptor::isSubprogram() const { - return DbgNode && getTag() == dwarf::DW_TAG_subprogram; -} - -/// isGlobalVariable - Return true if the specified tag is legal for -/// DIGlobalVariable. -bool DIDescriptor::isGlobalVariable() const { - return DbgNode && (getTag() == dwarf::DW_TAG_variable || - getTag() == dwarf::DW_TAG_constant); -} - -/// isGlobal - Return true if the specified tag is legal for DIGlobal. -bool DIDescriptor::isGlobal() const { - return isGlobalVariable(); -} - -/// isUnspecifiedParmeter - Return true if the specified tag is -/// DW_TAG_unspecified_parameters. -bool DIDescriptor::isUnspecifiedParameter() const { - return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters; -} - -/// isScope - Return true if the specified tag is one of the scope -/// related tag. -bool DIDescriptor::isScope() const { - if (!DbgNode) return false; - switch (getTag()) { - case dwarf::DW_TAG_compile_unit: - case dwarf::DW_TAG_lexical_block: - case dwarf::DW_TAG_subprogram: - case dwarf::DW_TAG_namespace: - return true; - default: - break; - } - return false; -} - -/// isTemplateTypeParameter - Return true if the specified tag is -/// DW_TAG_template_type_parameter. -bool DIDescriptor::isTemplateTypeParameter() const { - return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter; -} - -/// isTemplateValueParameter - Return true if the specified tag is -/// DW_TAG_template_value_parameter. -bool DIDescriptor::isTemplateValueParameter() const { - return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter; -} - -/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. -bool DIDescriptor::isCompileUnit() const { - return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; -} - -/// isFile - Return true if the specified tag is DW_TAG_file_type. -bool DIDescriptor::isFile() const { - return DbgNode && getTag() == dwarf::DW_TAG_file_type; -} - -/// isNameSpace - Return true if the specified tag is DW_TAG_namespace. -bool DIDescriptor::isNameSpace() const { - return DbgNode && getTag() == dwarf::DW_TAG_namespace; -} - -/// isLexicalBlockFile - Return true if the specified descriptor is a -/// lexical block with an extra file. -bool DIDescriptor::isLexicalBlockFile() const { - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - (DbgNode->getNumOperands() == 3); -} - -/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. -bool DIDescriptor::isLexicalBlock() const { - return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - (DbgNode->getNumOperands() > 3); -} - -/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. -bool DIDescriptor::isSubrange() const { - return DbgNode && getTag() == dwarf::DW_TAG_subrange_type; -} - -/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. -bool DIDescriptor::isEnumerator() const { - return DbgNode && getTag() == dwarf::DW_TAG_enumerator; -} - -/// isObjCProperty - Return true if the specified tag is DW_TAG -bool DIDescriptor::isObjCProperty() const { - return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; -} -//===----------------------------------------------------------------------===// -// Simple Descriptor Constructors and other Methods -//===----------------------------------------------------------------------===// - -DIType::DIType(const MDNode *N) : DIScope(N) { - if (!N) return; - if (!isBasicType() && !isDerivedType() && !isCompositeType()) { - DbgNode = 0; - } -} - -unsigned DIArray::getNumElements() const { - if (!DbgNode) - return 0; - return DbgNode->getNumOperands(); -} - -/// replaceAllUsesWith - Replace all uses of debug info referenced by -/// this descriptor. -void DIType::replaceAllUsesWith(DIDescriptor &D) { - if (!DbgNode) - return; - - // Since we use a TrackingVH for the node, its easy for clients to manufacture - // legitimate situations where they want to replaceAllUsesWith() on something - // which, due to uniquing, has merged with the source. We shield clients from - // this detail by allowing a value to be replaced with replaceAllUsesWith() - // itself. - if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); - const MDNode *DN = D; - const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); - MDNode::deleteTemporary(Node); - } -} - -/// replaceAllUsesWith - Replace all uses of debug info referenced by -/// this descriptor. -void DIType::replaceAllUsesWith(MDNode *D) { - if (!DbgNode) - return; - - // Since we use a TrackingVH for the node, its easy for clients to manufacture - // legitimate situations where they want to replaceAllUsesWith() on something - // which, due to uniquing, has merged with the source. We shield clients from - // this detail by allowing a value to be replaced with replaceAllUsesWith() - // itself. - if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); - const MDNode *DN = D; - const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); - MDNode::deleteTemporary(Node); - } -} - -/// isUnsignedDIType - Return true if type encoding is unsigned. -bool DIType::isUnsignedDIType() { - DIDerivedType DTy(DbgNode); - if (DTy.Verify()) - return DTy.getTypeDerivedFrom().isUnsignedDIType(); - - DIBasicType BTy(DbgNode); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char) - return true; - } - return false; -} - -/// Verify - Verify that a compile unit is well formed. -bool DICompileUnit::Verify() const { - if (!DbgNode) - return false; - StringRef N = getFilename(); - if (N.empty()) - return false; - // It is possible that directory and produce string is empty. - return true; -} - -/// Verify - Verify that an ObjC property is well formed. -bool DIObjCProperty::Verify() const { - if (!DbgNode) - return false; - unsigned Tag = getTag(); - if (Tag != dwarf::DW_TAG_APPLE_property) return false; - DIType Ty = getType(); - if (!Ty.Verify()) return false; - - // Don't worry about the rest of the strings for now. - return true; -} - -/// Verify - Verify that a type descriptor is well formed. -bool DIType::Verify() const { - if (!DbgNode) - return false; - if (getContext() && !getContext().Verify()) - return false; - unsigned Tag = getTag(); - if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && - Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && - Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type - && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type - && Tag != dwarf::DW_TAG_enumeration_type - && Tag != dwarf::DW_TAG_subroutine_type - && getFilename().empty()) - return false; - return true; -} - -/// Verify - Verify that a basic type descriptor is well formed. -bool DIBasicType::Verify() const { - return isBasicType(); -} - -/// Verify - Verify that a derived type descriptor is well formed. -bool DIDerivedType::Verify() const { - return isDerivedType(); -} - -/// Verify - Verify that a composite type descriptor is well formed. -bool DICompositeType::Verify() const { - if (!DbgNode) - return false; - if (getContext() && !getContext().Verify()) - return false; - - return true; -} - -/// Verify - Verify that a subprogram descriptor is well formed. -bool DISubprogram::Verify() const { - if (!DbgNode) - return false; - - if (getContext() && !getContext().Verify()) - return false; - - DICompositeType Ty = getType(); - if (!Ty.Verify()) - return false; - return true; -} - -/// Verify - Verify that a global variable descriptor is well formed. -bool DIGlobalVariable::Verify() const { - if (!DbgNode) - return false; - - if (getDisplayName().empty()) - return false; - - if (getContext() && !getContext().Verify()) - return false; - - DIType Ty = getType(); - if (!Ty.Verify()) - return false; - - if (!getGlobal() && !getConstant()) - return false; - - return true; -} - -/// Verify - Verify that a variable descriptor is well formed. -bool DIVariable::Verify() const { - if (!DbgNode) - return false; - - if (getContext() && !getContext().Verify()) - return false; - - DIType Ty = getType(); - if (!Ty.Verify()) - return false; - - return true; -} - -/// Verify - Verify that a location descriptor is well formed. -bool DILocation::Verify() const { - if (!DbgNode) - return false; - - return DbgNode->getNumOperands() == 4; -} - -/// Verify - Verify that a namespace descriptor is well formed. -bool DINameSpace::Verify() const { - if (!DbgNode) - return false; - if (getName().empty()) - return false; - return true; -} - -/// getOriginalTypeSize - If this type is derived from a base type then -/// return base type size. -uint64_t DIDerivedType::getOriginalTypeSize() const { - unsigned Tag = getTag(); - - if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef || - Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || - Tag == dwarf::DW_TAG_restrict_type) { - DIType BaseType = getTypeDerivedFrom(); - // If this type is not derived from any type then take conservative - // approach. - if (!BaseType.isValid()) - return getSizeInBits(); - // If this is a derived type, go ahead and get the base type, unless - // it's a reference then it's just the size of the field. Pointer types - // have no need of this since they're a different type of qualification - // on the type. - if (BaseType.getTag() == dwarf::DW_TAG_reference_type) - return getSizeInBits(); - else if (BaseType.isDerivedType()) - return DIDerivedType(BaseType).getOriginalTypeSize(); - else - return BaseType.getSizeInBits(); - } - - return getSizeInBits(); -} - -/// getObjCProperty - Return property node, if this ivar is associated with one. -MDNode *DIDerivedType::getObjCProperty() const { - if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10) - return NULL; - return dyn_cast_or_null(DbgNode->getOperand(10)); -} - -/// isInlinedFnArgument - Return true if this variable provides debugging -/// information for an inlined function arguments. -bool DIVariable::isInlinedFnArgument(const Function *CurFn) { - assert(CurFn && "Invalid function"); - if (!getContext().isSubprogram()) - return false; - // This variable is not inlined function argument if its scope - // does not describe current function. - return !(DISubprogram(getContext()).describes(CurFn)); -} - -/// describes - Return true if this subprogram provides debugging -/// information for the function F. -bool DISubprogram::describes(const Function *F) { - assert(F && "Invalid function"); - if (F == getFunction()) - return true; - StringRef Name = getLinkageName(); - if (Name.empty()) - Name = getName(); - if (F->getName() == Name) - return true; - return false; -} - -unsigned DISubprogram::isOptimized() const { - assert (DbgNode && "Invalid subprogram descriptor!"); - if (DbgNode->getNumOperands() == 16) - return getUnsignedField(15); - return 0; -} - -MDNode *DISubprogram::getVariablesNodes() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) - return NULL; - if (MDNode *Temp = dyn_cast_or_null(DbgNode->getOperand(19))) - return dyn_cast_or_null(Temp->getOperand(0)); - return NULL; -} - -DIArray DISubprogram::getVariables() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) - return DIArray(); - if (MDNode *T = dyn_cast_or_null(DbgNode->getOperand(19))) - if (MDNode *A = dyn_cast_or_null(T->getOperand(0))) - return DIArray(A); - return DIArray(); -} - -StringRef DIScope::getFilename() const { - if (!DbgNode) - return StringRef(); - if (isLexicalBlockFile()) - return DILexicalBlockFile(DbgNode).getFilename(); - if (isLexicalBlock()) - return DILexicalBlock(DbgNode).getFilename(); - if (isSubprogram()) - return DISubprogram(DbgNode).getFilename(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getFilename(); - if (isNameSpace()) - return DINameSpace(DbgNode).getFilename(); - if (isType()) - return DIType(DbgNode).getFilename(); - if (isFile()) - return DIFile(DbgNode).getFilename(); - llvm_unreachable("Invalid DIScope!"); -} - -StringRef DIScope::getDirectory() const { - if (!DbgNode) - return StringRef(); - if (isLexicalBlockFile()) - return DILexicalBlockFile(DbgNode).getDirectory(); - if (isLexicalBlock()) - return DILexicalBlock(DbgNode).getDirectory(); - if (isSubprogram()) - return DISubprogram(DbgNode).getDirectory(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getDirectory(); - if (isNameSpace()) - return DINameSpace(DbgNode).getDirectory(); - if (isType()) - return DIType(DbgNode).getDirectory(); - if (isFile()) - return DIFile(DbgNode).getDirectory(); - llvm_unreachable("Invalid DIScope!"); -} - -DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) - return DIArray(); - - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(10))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); - return DIArray(); -} - -DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) - return DIArray(); - - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(11))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); - return DIArray(); -} - -DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) - return DIArray(); - - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(12))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); - return DIArray(); -} - - -DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) - return DIArray(); - - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(13))) - if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) - return DIArray(A); - return DIArray(); -} - -//===----------------------------------------------------------------------===// -// DIDescriptor: vtable anchors for all descriptors. -//===----------------------------------------------------------------------===// - -void DIScope::anchor() { } - -void DICompileUnit::anchor() { } - -void DIFile::anchor() { } - -void DIType::anchor() { } - -void DIBasicType::anchor() { } - -void DIDerivedType::anchor() { } - -void DICompositeType::anchor() { } - -void DISubprogram::anchor() { } - -void DILexicalBlock::anchor() { } - -void DINameSpace::anchor() { } - -void DILexicalBlockFile::anchor() { } - -//===----------------------------------------------------------------------===// -// DIDescriptor: dump routines for all descriptors. -//===----------------------------------------------------------------------===// - - -/// print - Print descriptor. -void DIDescriptor::print(raw_ostream &OS) const { - OS << "[" << dwarf::TagString(getTag()) << "] "; - OS.write_hex((intptr_t) &*DbgNode) << ']'; -} - -/// print - Print compile unit. -void DICompileUnit::print(raw_ostream &OS) const { - if (getLanguage()) - OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; - - OS << " [" << getDirectory() << "/" << getFilename() << "]"; -} - -/// print - Print type. -void DIType::print(raw_ostream &OS) const { - if (!DbgNode) return; - - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" - << "line " << getLineNumber() << ", " - << getSizeInBits() << " bits, " - << getAlignInBits() << " bit alignment, " - << getOffsetInBits() << " bit offset" - << "] "; - - if (isPrivate()) - OS << " [private] "; - else if (isProtected()) - OS << " [protected] "; - - if (isForwardDecl()) - OS << " [fwd] "; - - if (isBasicType()) - DIBasicType(DbgNode).print(OS); - else if (isDerivedType()) { - DIDerivedType DTy = DIDerivedType(DbgNode); - DTy.print(OS); - DICompositeType CTy = getDICompositeType(DTy); - if (CTy.Verify()) - CTy.print(OS); - } - else if (isCompositeType()) - DICompositeType(DbgNode).print(OS); - else { - OS << "Invalid DIType\n"; - return; - } - - OS << "\n"; -} - -/// print - Print basic type. -void DIBasicType::print(raw_ostream &OS) const { - OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; -} - -/// print - Print derived type. -void DIDerivedType::print(raw_ostream &OS) const { - OS << "\n\t Derived From: "; - getTypeDerivedFrom().print(OS); - OS << "\n\t"; -} - -/// print - Print composite type. -void DICompositeType::print(raw_ostream &OS) const { - DIArray A = getTypeArray(); - OS << " [" << A.getNumElements() << " elements]"; -} - -/// print - Print subprogram. -void DISubprogram::print(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" << getLineNumber() << "] "; - - if (isLocalToUnit()) - OS << " [local] "; - - if (isDefinition()) - OS << " [def] "; - - if (getScopeLineNumber() != getLineNumber()) - OS << " [Scope: " << getScopeLineNumber() << "] "; - - OS << "\n"; -} - -/// print - Print global variable. -void DIGlobalVariable::print(raw_ostream &OS) const { - OS << " ["; - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - unsigned Tag = getTag(); - OS << " [" << dwarf::TagString(Tag) << "] "; - - // TODO : Print context - OS << " [" << getLineNumber() << "] "; - - if (isLocalToUnit()) - OS << " [local] "; - - if (isDefinition()) - OS << " [def] "; - - if (isGlobalVariable()) - DIGlobalVariable(DbgNode).print(OS); - OS << "]\n"; -} - -static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, - const LLVMContext &Ctx) { - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << ""; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << " @[ "; - printDebugLoc(InlinedAtDL, CommentOS, Ctx); - CommentOS << " ]"; - } - } -} - -void DIVariable::printExtendedName(raw_ostream &OS) const { - const LLVMContext &Ctx = DbgNode->getContext(); - StringRef Res = getName(); - if (!Res.empty()) - OS << Res << "," << getLineNumber(); - if (MDNode *InlinedAt = getInlinedAt()) { - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown()) { - OS << " @["; - printDebugLoc(InlinedAtDL, OS, Ctx); - OS << "]"; - } - } -} - -/// print - Print variable. -void DIVariable::print(raw_ostream &OS) const { - StringRef Res = getName(); - if (!Res.empty()) - OS << " [" << Res << "] "; - - OS << " [" << getLineNumber() << "] "; - getType().print(OS); - OS << "\n"; - - // FIXME: Dump complex addresses -} - -/// dump - Print descriptor to dbgs() with a newline. -void DIDescriptor::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print compile unit to dbgs() with a newline. -void DICompileUnit::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print type to dbgs() with a newline. -void DIType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print basic type to dbgs() with a newline. -void DIBasicType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print derived type to dbgs() with a newline. -void DIDerivedType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print composite type to dbgs() with a newline. -void DICompositeType::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print subprogram to dbgs() with a newline. -void DISubprogram::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print global variable. -void DIGlobalVariable::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// dump - Print variable. -void DIVariable::dump() const { - print(dbgs()); dbgs() << '\n'; -} - -/// fixupObjcLikeName - Replace contains special characters used -/// in a typical Objective-C names with '.' in a given string. -static void fixupObjcLikeName(StringRef Str, SmallVectorImpl &Out) { - bool isObjCLike = false; - for (size_t i = 0, e = Str.size(); i < e; ++i) { - char C = Str[i]; - if (C == '[') - isObjCLike = true; - - if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' || - C == '+' || C == '(' || C == ')')) - Out.push_back('.'); - else - Out.push_back(C); - } -} - -/// getFnSpecificMDNode - Return a NameMDNode, if available, that is -/// suitable to hold function specific information. -NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) { - SmallString<32> Name = StringRef("llvm.dbg.lv."); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - else - FName = Fn.getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - fixupObjcLikeName(FName, Name); - return M.getNamedMetadata(Name.str()); -} - -/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable -/// to hold function specific information. -NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) { - SmallString<32> Name = StringRef("llvm.dbg.lv."); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - else - FName = Fn.getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - fixupObjcLikeName(FName, Name); - - return M.getOrInsertNamedMetadata(Name.str()); -} - -/// createInlinedVariable - Create a new inlined variable based on current -/// variable. -/// @param DV Current Variable. -/// @param InlinedScope Location at current variable is inlined. -DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, - LLVMContext &VMContext) { - SmallVector Elts; - // Insert inlined scope as 7th element. - for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) - i == 7 ? Elts.push_back(InlinedScope) : - Elts.push_back(DV->getOperand(i)); - return DIVariable(MDNode::get(VMContext, Elts)); -} - -/// cleanseInlinedVariable - Remove inlined scope from the variable. -DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { - SmallVector Elts; - // Insert inlined scope as 7th element. - for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) - i == 7 ? - Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))): - Elts.push_back(DV->getOperand(i)); - return DIVariable(MDNode::get(VMContext, Elts)); -} - -//===----------------------------------------------------------------------===// -// DebugInfoFinder implementations. -//===----------------------------------------------------------------------===// - -/// processModule - Process entire module and collect debug info. -void DebugInfoFinder::processModule(Module &M) { - if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CU(CU_Nodes->getOperand(i)); - addCompileUnit(CU); - if (CU.getVersion() > LLVMDebugVersion10) { - DIArray GVs = CU.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { - DIGlobalVariable DIG(GVs.getElement(i)); - if (addGlobalVariable(DIG)) - processType(DIG.getType()); - } - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - processSubprogram(DISubprogram(SPs.getElement(i))); - DIArray EnumTypes = CU.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - processType(DIType(EnumTypes.getElement(i))); - DIArray RetainedTypes = CU.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - processType(DIType(RetainedTypes.getElement(i))); - return; - } - } - } - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) - for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; - ++BI) { - if (DbgDeclareInst *DDI = dyn_cast(BI)) - processDeclare(DDI); - - DebugLoc Loc = BI->getDebugLoc(); - if (Loc.isUnknown()) - continue; - - LLVMContext &Ctx = BI->getContext(); - DIDescriptor Scope(Loc.getScope(Ctx)); - - if (Scope.isCompileUnit()) - addCompileUnit(DICompileUnit(Scope)); - else if (Scope.isSubprogram()) - processSubprogram(DISubprogram(Scope)); - else if (Scope.isLexicalBlockFile()) { - DILexicalBlockFile DBF = DILexicalBlockFile(Scope); - processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - else if (Scope.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(Scope)); - - if (MDNode *IA = Loc.getInlinedAt(Ctx)) - processLocation(DILocation(IA)); - } - - if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIGlobalVariable DIG(cast(NMD->getOperand(i))); - if (addGlobalVariable(DIG)) { - if (DIG.getVersion() <= LLVMDebugVersion10) - addCompileUnit(DIG.getCompileUnit()); - processType(DIG.getType()); - } - } - } - - if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - processSubprogram(DISubprogram(NMD->getOperand(i))); -} - -/// processLocation - Process DILocation. -void DebugInfoFinder::processLocation(DILocation Loc) { - if (!Loc.Verify()) return; - DIDescriptor S(Loc.getScope()); - if (S.isCompileUnit()) - addCompileUnit(DICompileUnit(S)); - else if (S.isSubprogram()) - processSubprogram(DISubprogram(S)); - else if (S.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(S)); - else if (S.isLexicalBlockFile()) { - DILexicalBlockFile DBF = DILexicalBlockFile(S); - processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - processLocation(Loc.getOrigLocation()); -} - -/// processType - Process DIType. -void DebugInfoFinder::processType(DIType DT) { - if (!addType(DT)) - return; - if (DT.getVersion() <= LLVMDebugVersion10) - addCompileUnit(DT.getCompileUnit()); - if (DT.isCompositeType()) { - DICompositeType DCT(DT); - processType(DCT.getTypeDerivedFrom()); - DIArray DA = DCT.getTypeArray(); - for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { - DIDescriptor D = DA.getElement(i); - if (D.isType()) - processType(DIType(D)); - else if (D.isSubprogram()) - processSubprogram(DISubprogram(D)); - } - } else if (DT.isDerivedType()) { - DIDerivedType DDT(DT); - processType(DDT.getTypeDerivedFrom()); - } -} - -/// processLexicalBlock -void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { - DIScope Context = LB.getContext(); - if (Context.isLexicalBlock()) - return processLexicalBlock(DILexicalBlock(Context)); - else if (Context.isLexicalBlockFile()) { - DILexicalBlockFile DBF = DILexicalBlockFile(Context); - return processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - else - return processSubprogram(DISubprogram(Context)); -} - -/// processSubprogram - Process DISubprogram. -void DebugInfoFinder::processSubprogram(DISubprogram SP) { - if (!addSubprogram(SP)) - return; - if (SP.getVersion() <= LLVMDebugVersion10) - addCompileUnit(SP.getCompileUnit()); - processType(SP.getType()); -} - -/// processDeclare - Process DbgDeclareInst. -void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { - MDNode *N = dyn_cast(DDI->getVariable()); - if (!N) return; - - DIDescriptor DV(N); - if (!DV.isVariable()) - return; - - if (!NodesSeen.insert(DV)) - return; - if (DIVariable(N).getVersion() <= LLVMDebugVersion10) - addCompileUnit(DIVariable(N).getCompileUnit()); - processType(DIVariable(N).getType()); -} - -/// addType - Add type into Tys. -bool DebugInfoFinder::addType(DIType DT) { - if (!DT.isValid()) - return false; - - if (!NodesSeen.insert(DT)) - return false; - - TYs.push_back(DT); - return true; -} - -/// addCompileUnit - Add compile unit into CUs. -bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { - if (!CU.Verify()) - return false; - - if (!NodesSeen.insert(CU)) - return false; - - CUs.push_back(CU); - return true; -} - -/// addGlobalVariable - Add global variable into GVs. -bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { - if (!DIDescriptor(DIG).isGlobalVariable()) - return false; - - if (!NodesSeen.insert(DIG)) - return false; - - GVs.push_back(DIG); - return true; -} - -// addSubprogram - Add subprgoram into SPs. -bool DebugInfoFinder::addSubprogram(DISubprogram SP) { - if (!DIDescriptor(SP).isSubprogram()) - return false; - - if (!NodesSeen.insert(SP)) - return false; - - SPs.push_back(SP); - return true; -} - -/// getDISubprogram - Find subprogram that is enclosing this scope. -DISubprogram llvm::getDISubprogram(const MDNode *Scope) { - DIDescriptor D(Scope); - if (D.isSubprogram()) - return DISubprogram(Scope); - - if (D.isLexicalBlockFile()) - return getDISubprogram(DILexicalBlockFile(Scope).getContext()); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(Scope).getContext()); - - return DISubprogram(); -} - -/// getDICompositeType - Find underlying composite type. -DICompositeType llvm::getDICompositeType(DIType T) { - if (T.isCompositeType()) - return DICompositeType(T); - - if (T.isDerivedType()) - return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); - - return DICompositeType(); -} - -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -bool llvm::isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - DIDescriptor D(Context); - if (D.isSubprogram()) - return true; - if (D.isType()) - return isSubprogramContext(DIType(Context).getContext()); - return false; -} - diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 8ffef29..34d6d1b 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,3 +5,5 @@ add_llvm_library(LLVMipa GlobalsModRef.cpp IPA.cpp ) + +add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 963da75..449b7ee 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -246,7 +246,9 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { CallSite CS(cast(I)); - if (!CS || isa(I)) continue; + if (!CS) continue; + Function *Callee = CS.getCalledFunction(); + if (Callee && Callee->isIntrinsic()) continue; // If this call site already existed in the callgraph, just verify it // matches up to expectations and remove it from CallSites. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index c1d8e3e..22f6e96 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -329,15 +329,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = GetUnderlyingObject(SI->getOperand(0)); - if (isMalloc(Ptr)) { - // Okay, easy case. - } else if (CallInst *CI = dyn_cast(Ptr)) { - Function *F = CI->getCalledFunction(); - if (!F || !F->isDeclaration()) return false; // Too hard to analyze. - if (F->getName() != "calloc") return false; // Not calloc. - } else { + if (!isAllocLikeFn(Ptr)) return false; // Too hard to analyze. - } // Analyze all uses of the allocation. If any of them are used in a // non-simple way (e.g. stored to another global) bail out. @@ -454,19 +447,18 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { for (inst_iterator II = inst_begin(SCC[i]->getFunction()), E = inst_end(SCC[i]->getFunction()); II != E && FunctionEffect != ModRef; ++II) - if (isa(*II)) { + if (LoadInst *LI = dyn_cast(&*II)) { FunctionEffect |= Ref; - if (cast(*II).isVolatile()) + if (LI->isVolatile()) // Volatile loads may have side-effects, so mark them as writing // memory (for example, a flag inside the processor). FunctionEffect |= Mod; - } else if (isa(*II)) { + } else if (StoreInst *SI = dyn_cast(&*II)) { FunctionEffect |= Mod; - if (cast(*II).isVolatile()) + if (SI->isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isMalloc(&cast(*II)) || - isFreeCall(&cast(*II))) { + } else if (isAllocationFn(&*II) || isFreeCall(&*II)) { FunctionEffect |= ModRef; } else if (IntrinsicInst *Intrinsic = dyn_cast(&*II)) { // The callgraph doesn't include intrinsic calls. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index b80966b..0a6682a 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" @@ -120,6 +121,12 @@ bool IVUsers::AddUsersImpl(Instruction *I, if (!SE->isSCEVable(I->getType())) return false; // Void and FP expressions cannot be reduced. + // IVUsers is used by LSR which assumes that all SCEV expressions are safe to + // pass to SCEVExpander. Expressions are not safe to expand if they represent + // operations that are not safe to speculate, namely integer division. + if (!isa(I) && !isSafeToSpeculativelyExecute(I, TD)) + return false; + // LSR is not APInt clean, do not touch integers bigger than 64-bits. // Also avoid creating IVs of non-native types. For example, we don't want a // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 3e3d2ab..bc1ecd2 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -178,7 +178,7 @@ bool CallAnalyzer::lookupSROAArgAndCost( /// \brief Disable SROA for the candidate marked by this cost iterator. /// -/// This markes the candidate as no longer viable for SROA, and adds the cost +/// This marks the candidate as no longer viable for SROA, and adds the cost /// savings associated with it back into the inline cost measurement. void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { // If we're no longer able to perform SROA we need to undo its cost savings @@ -398,10 +398,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - // A ptrtoint cast is free so long as the result is large enough to store the - // pointer, and a legal integer type. - return TD && TD->isLegalInteger(IntegerSize) && - IntegerSize >= TD->getPointerSizeInBits(); + return isInstructionFree(&I, TD); } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { @@ -428,10 +425,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) SROAArgValues[&I] = SROAArg; - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - return TD && TD->isLegalInteger(IntegerSize) && - IntegerSize <= TD->getPointerSizeInBits(); + return isInstructionFree(&I, TD); } bool CallAnalyzer::visitCastInst(CastInst &I) { @@ -445,24 +439,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); - // No-op casts don't have any cost. - if (I.isLosslessCast()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa(I) && - TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) - return true; - - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // no-ops on most sane targets. - if (isa(I.getOperand(0))) - return true; - - // Assume the rest of the casts require work. - return false; + return isInstructionFree(&I, TD); } bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { @@ -636,21 +613,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { default: return Base::visitCallSite(CS); - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // SROA can usually chew through these intrinsics and they have no cost - // so don't pay the price of analyzing them in detail. - return true; + // SROA can usually chew through these intrinsics, but they aren't free. + return false; } } @@ -662,7 +629,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return false; } - if (!callIsSmall(F)) { + if (!callIsSmall(CS)) { // We account for the average 1 instruction per call argument setup // here. Cost += CS.arg_size() * InlineConstants::InstrCost; @@ -706,6 +673,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { } bool CallAnalyzer::visitInstruction(Instruction &I) { + // Some instructions are free. All of the free intrinsics can also be + // handled by SROA, etc. + if (isInstructionFree(&I, TD)) + return true; + // We found something we don't understand or can't handle. Mark any SROA-able // values in the operand list as no longer viable. for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) @@ -825,9 +797,33 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; - // Subtract off one instruction per call argument as those will be free after - // inlining. - Cost -= CS.arg_size() * InlineConstants::InstrCost; + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // TargetData. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 16e7a72..379a35a 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -47,7 +47,7 @@ struct Query { const DominatorTree *DT; Query(const TargetData *td, const TargetLibraryInfo *tli, - const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}; + const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {} }; static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); @@ -1719,10 +1719,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ConstantInt::get(ITy, false); // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null. - if (isa(LHSPtr) && isa(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); + // incoming argument, unless they're both null or they belong to + // different functions. The latter happens during inlining. + if (Instruction *LHSInst = dyn_cast(LHSPtr)) + if (Argument *RHSArg = dyn_cast(RHSPtr)) + if (LHSInst->getParent()->getParent() == RHSArg->getParent() && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); } // Assume that the constant null is on the right. @@ -1732,14 +1735,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, else if (Pred == CmpInst::ICMP_NE) return ConstantInt::get(ITy, true); } - } else if (isa(LHSPtr)) { + } else if (Argument *LHSArg = dyn_cast(LHSPtr)) { RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument. - if (isa(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); + // An alloca can't be equal to an argument unless they come from separate + // functions via inlining. + if (AllocaInst *RHSInst = dyn_cast(RHSPtr)) { + if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } } } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 5ca2746..9140786 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -172,7 +172,7 @@ public: if (NewR.isEmptySet()) return markOverdefined(); - bool changed = Range == NewR; + bool changed = Range != NewR; Range = NewR; return changed; } @@ -457,8 +457,10 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { void LazyValueInfoCache::solve() { while (!BlockValueStack.empty()) { std::pair &e = BlockValueStack.top(); - if (solveBlockValue(e.second, e.first)) + if (solveBlockValue(e.second, e.first)) { + assert(BlockValueStack.top() == e); BlockValueStack.pop(); + } } } @@ -766,15 +768,10 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, return true; } -/// getEdgeValue - This method attempts to infer more complex -bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result) { - // If already a constant, there is nothing to compute. - if (Constant *VC = dyn_cast(Val)) { - Result = LVILatticeVal::get(VC); - return true; - } - +/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if +/// Val is not constrained on the edge. +static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we // know that v != 0. if (BranchInst *BI = dyn_cast(BBFrom->getTerminator())) { @@ -818,7 +815,7 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, ConstantInt *CI = dyn_cast(ICI->getOperand(1)); if (CI && (ICI->getOperand(0) == Val || NegOffset)) { // Calculate the range of values that would satisfy the comparison. - ConstantRange CmpRange(CI->getValue(), CI->getValue()+1); + ConstantRange CmpRange(CI->getValue()); ConstantRange TrueValues = ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); @@ -827,25 +824,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If we're interested in the false dest, invert the condition. if (!isTrueDest) TrueValues = TrueValues.inverse(); - - // Figure out the possible values of the query BEFORE this branch. - if (!hasBlockValue(Val, BBFrom)) { - BlockValueStack.push(std::make_pair(BBFrom, Val)); - return false; - } - - LVILatticeVal InBlock = getBlockValue(Val, BBFrom); - if (!InBlock.isConstantRange()) { - Result = LVILatticeVal::getRange(TrueValues); - return true; - } - - // Find all potential values that satisfy both the input and output - // conditions. - ConstantRange PossibleValues = - TrueValues.intersectWith(InBlock.getConstantRange()); - - Result = LVILatticeVal::getRange(PossibleValues); + + Result = LVILatticeVal::getRange(TrueValues); return true; } } @@ -855,40 +835,71 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // If the edge was formed by a switch on the value, then we may know exactly // what it is. if (SwitchInst *SI = dyn_cast(BBFrom->getTerminator())) { - if (SI->getCondition() == Val) { - // We don't know anything in the default case. - if (SI->getDefaultDest() == BBTo) { - Result.markOverdefined(); - return true; - } - - // We only know something if there is exactly one value that goes from - // BBFrom to BBTo. - unsigned NumEdges = 0; - ConstantInt *EdgeVal = 0; - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); - i != e; ++i) { - if (i.getCaseSuccessor() != BBTo) continue; - if (NumEdges++) break; - EdgeVal = i.getCaseValue(); - } - assert(EdgeVal && "Missing successor?"); - if (NumEdges == 1) { - Result = LVILatticeVal::get(EdgeVal); - return true; - } + if (SI->getCondition() != Val) + return false; + + bool DefaultCase = SI->getDefaultDest() == BBTo; + unsigned BitWidth = Val->getType()->getIntegerBitWidth(); + ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/); + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + ConstantRange EdgeVal(i.getCaseValue()->getValue()); + if (DefaultCase) + EdgesVals = EdgesVals.difference(EdgeVal); + else if (i.getCaseSuccessor() == BBTo) + EdgesVals = EdgesVals.unionWith(EdgeVal); } - } - - // Otherwise see if the value is known in the block. - if (hasBlockValue(Val, BBFrom)) { - Result = getBlockValue(Val, BBFrom); + Result = LVILatticeVal::getRange(EdgesVals); return true; } - BlockValueStack.push(std::make_pair(BBFrom, Val)); return false; } +/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at +/// the basic block if the edge does not constraint Val. +bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, + BasicBlock *BBTo, LVILatticeVal &Result) { + // If already a constant, there is nothing to compute. + if (Constant *VC = dyn_cast(Val)) { + Result = LVILatticeVal::get(VC); + return true; + } + + if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) { + if (!Result.isConstantRange() || + Result.getConstantRange().getSingleElement()) + return true; + + // FIXME: this check should be moved to the beginning of the function when + // LVI better supports recursive values. Even for the single value case, we + // can intersect to detect dead code (an empty range). + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // Try to intersect ranges of the BB and the constraint on the edge. + LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + if (!InBlock.isConstantRange()) + return true; + + ConstantRange Range = + Result.getConstantRange().intersectWith(InBlock.getConstantRange()); + Result = LVILatticeVal::getRange(Range); + return true; + } + + if (!hasBlockValue(Val, BBFrom)) { + BlockValueStack.push(std::make_pair(BBFrom, Val)); + return false; + } + + // if we couldn't compute the value on the edge, use the value from the BB + Result = getBlockValue(Val, BBFrom); + return true; +} + LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index f7a60a1..20c33a3 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" @@ -29,6 +30,10 @@ #include using namespace llvm; +// Explicitly instantiate methods in LoopInfoImpl.h for IR-level Loops. +template class llvm::LoopBase; +template class llvm::LoopInfoBase; + // Always verify loopinfo if expensive checking is enabled. #ifdef XDEBUG static bool VerifyLoopInfo = true; @@ -507,7 +512,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // bool LoopInfo::runOnFunction(Function &) { releaseMemory(); - LI.Calculate(getAnalysis().getBase()); // Update + LI.Analyze(getAnalysis().getBase()); return false; } @@ -589,9 +594,6 @@ void LoopInfo::verifyAnalysis() const { } // Verify that blocks are mapped to valid loops. - // - // FIXME: With an up-to-date DFS (see LoopIterator.h) and DominatorTree, we - // could also verify that the blocks are still in the correct loops. for (DenseMap::const_iterator I = LI.BBMap.begin(), E = LI.BBMap.end(); I != E; ++I) { assert(Loops.count(I->second) && "orphaned loop"); diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index aba700a..1540112 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -162,7 +162,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { // Recurse through all subloops and all loops into LQ. static void addLoopIntoQueue(Loop *L, std::deque &LQ) { LQ.push_back(L); - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); } @@ -183,8 +183,12 @@ bool LPPassManager::runOnFunction(Function &F) { // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); - // Populate Loop Queue - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + // Populate the loop queue in reverse program order. There is no clear need to + // process sibling loops in either forward or reverse order. There may be some + // advantage in deleting uses in a later loop before optimizing the + // definitions in an earlier loop. If we find a clear reason to process in + // forward order, then a forward variant of LoopPassManager should be created. + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); if (LQ.empty()) // No loops, skip calling finalizers diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 22414b3..8578a63 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -32,7 +32,7 @@ namespace { Unknown }; - static const char* DepTypeStr[]; + static const char *const DepTypeStr[]; typedef PointerIntPair InstTypePair; typedef std::pair Dep; @@ -88,7 +88,7 @@ FunctionPass *llvm::createMemDepPrinter() { return new MemDepPrinter(); } -const char* MemDepPrinter::DepTypeStr[] +const char *const MemDepPrinter::DepTypeStr[] = {"Clobber", "Def", "NonFuncLocal", "Unknown"}; bool MemDepPrinter::runOnFunction(Function &F) { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b145650..c0cc27b 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -12,80 +12,168 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "memory-builtins" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Constants.h" +#include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// malloc Call Utility Functions. -// +enum AllocType { + MallocLike = 1<<0, // allocates + CallocLike = 1<<1, // allocates + bzero + ReallocLike = 1<<2, // reallocates + StrDupLike = 1<<3, + AllocLike = MallocLike | CallocLike | StrDupLike, + AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike +}; + +struct AllocFnsTy { + const char *Name; + AllocType AllocTy; + unsigned char NumParams; + // First and Second size parameters (or -1 if unused) + signed char FstParam, SndParam; +}; + +// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to +// know which functions are nounwind, noalias, nocapture parameters, etc. +static const AllocFnsTy AllocationFnData[] = { + {"malloc", MallocLike, 1, 0, -1}, + {"valloc", MallocLike, 1, 0, -1}, + {"_Znwj", MallocLike, 1, 0, -1}, // new(unsigned int) + {"_ZnwjRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {"_Znwm", MallocLike, 1, 0, -1}, // new(unsigned long) + {"_ZnwmRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) + {"_Znaj", MallocLike, 1, 0, -1}, // new[](unsigned int) + {"_ZnajRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {"_Znam", MallocLike, 1, 0, -1}, // new[](unsigned long) + {"_ZnamRKSt9nothrow_t", MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {"posix_memalign", MallocLike, 3, 2, -1}, + {"calloc", CallocLike, 2, 0, 1}, + {"realloc", ReallocLike, 2, 1, -1}, + {"reallocf", ReallocLike, 2, 1, -1}, + {"strdup", StrDupLike, 1, -1, -1}, + {"strndup", StrDupLike, 2, 1, -1} +}; + + +static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { + if (LookThroughBitCast) + V = V->stripPointerCasts(); -/// isMalloc - Returns true if the value is either a malloc call or a -/// bitcast of the result of a malloc call. -bool llvm::isMalloc(const Value *I) { - return extractMallocCall(I) || extractMallocCallFromBitCast(I); + CallSite CS(const_cast(V)); + if (!CS.getInstruction()) + return 0; + + Function *Callee = CS.getCalledFunction(); + if (!Callee || !Callee->isDeclaration()) + return 0; + return Callee; } -static bool isMallocCall(const CallInst *CI) { - if (!CI) - return false; +/// \brief Returns the allocation data for the given value if it is a call to a +/// known allocation function, and NULL otherwise. +static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, + bool LookThroughBitCast = false) { + Function *Callee = getCalledFunction(V, LookThroughBitCast); + if (!Callee) + return 0; - Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return false; - if (Callee->getName() != "malloc" && - Callee->getName() != "_Znwj" && // operator new(unsigned int) - Callee->getName() != "_Znwm" && // operator new(unsigned long) - Callee->getName() != "_Znaj" && // operator new[](unsigned int) - Callee->getName() != "_Znam") // operator new[](unsigned long) - return false; + unsigned i = 0; + bool found = false; + for ( ; i < array_lengthof(AllocationFnData); ++i) { + if (Callee->getName() == AllocationFnData[i].Name) { + found = true; + break; + } + } + if (!found) + return 0; - // Check malloc prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin - // attribute will exist. + const AllocFnsTy *FnData = &AllocationFnData[i]; + if ((FnData->AllocTy & AllocTy) == 0) + return 0; + + // Check function prototype. + // FIXME: Check the nobuiltin metadata?? (PR5130) + int FstParam = FnData->FstParam; + int SndParam = FnData->SndParam; FunctionType *FTy = Callee->getFunctionType(); - return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && - FTy->getNumParams() == 1 && - (FTy->getParamType(0)->isIntegerTy(32) || - FTy->getParamType(0)->isIntegerTy(64)); + + if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == FnData->NumParams && + (FstParam < 0 || + (FTy->getParamType(FstParam)->isIntegerTy(32) || + FTy->getParamType(FstParam)->isIntegerTy(64))) && + (SndParam < 0 || + FTy->getParamType(SndParam)->isIntegerTy(32) || + FTy->getParamType(SndParam)->isIntegerTy(64))) + return FnData; + return 0; } -/// extractMallocCall - Returns the corresponding CallInst if the instruction -/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we -/// ignore InvokeInst here. -const CallInst *llvm::extractMallocCall(const Value *I) { - const CallInst *CI = dyn_cast(I); - return (isMallocCall(CI)) ? CI : NULL; +static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { + ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); + return CS && CS.hasFnAttr(Attribute::NoAlias); } -CallInst *llvm::extractMallocCall(Value *I) { - CallInst *CI = dyn_cast(I); - return (isMallocCall(CI)) ? CI : NULL; + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup +/// like). +bool llvm::isAllocationFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, AnyAlloc, LookThroughBitCast); } -static bool isBitCastOfMallocCall(const BitCastInst *BCI) { - if (!BCI) - return false; - - return isMallocCall(dyn_cast(BCI->getOperand(0))); +/// \brief Tests if a value is a call or invoke to a function that returns a +/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). +bool llvm::isNoAliasFn(const Value *V, bool LookThroughBitCast) { + // it's safe to consider realloc as noalias since accessing the original + // pointer is undefined behavior + return isAllocationFn(V, LookThroughBitCast) || + hasNoAliasAttr(V, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates uninitialized memory (such as malloc). +bool llvm::isMallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, MallocLike, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates zero-filled memory (such as calloc). +bool llvm::isCallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, CallocLike, LookThroughBitCast); +} + +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory (either malloc, calloc, or strdup like). +bool llvm::isAllocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, AllocLike, LookThroughBitCast); } -/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the -/// instruction is a bitcast of the result of a malloc call. -CallInst *llvm::extractMallocCallFromBitCast(Value *I) { - BitCastInst *BCI = dyn_cast(I); - return (isBitCastOfMallocCall(BCI)) ? cast(BCI->getOperand(0)) - : NULL; +/// \brief Tests if a value is a call or invoke to a library function that +/// reallocates memory (such as realloc). +bool llvm::isReallocLikeFn(const Value *V, bool LookThroughBitCast) { + return getAllocationData(V, ReallocLike, LookThroughBitCast); } -const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) { - const BitCastInst *BCI = dyn_cast(I); - return (isBitCastOfMallocCall(BCI)) ? cast(BCI->getOperand(0)) - : NULL; +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst *llvm::extractMallocCall(const Value *I) { + return isMallocLikeFn(I) ? dyn_cast(I) : 0; } static Value *computeArraySize(const CallInst *CI, const TargetData *TD, @@ -134,7 +222,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { /// 1: PointerType is the bitcast's result type. /// >1: Unique PointerType cannot be determined, return NULL. PointerType *llvm::getMallocType(const CallInst *CI) { - assert(isMalloc(CI) && "getMallocType and not malloc call"); + assert(isMallocLikeFn(CI) && "getMallocType and not malloc call"); PointerType *MallocType = NULL; unsigned NumOfBitCastUses = 0; @@ -176,13 +264,17 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI) { /// determined. Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, bool LookThroughSExt) { - assert(isMalloc(CI) && "getMallocArraySize and not malloc call"); + assert(isMallocLikeFn(CI) && "getMallocArraySize and not malloc call"); return computeArraySize(CI, TD, LookThroughSExt); } -//===----------------------------------------------------------------------===// -// free Call Utility Functions. -// + +/// extractCallocCall - Returns the corresponding CallInst if the instruction +/// is a calloc call. +const CallInst *llvm::extractCallocCall(const Value *I) { + return isCallocLikeFn(I) ? cast(I) : 0; +} + /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I) { @@ -211,3 +303,438 @@ const CallInst *llvm::isFreeCall(const Value *I) { return CI; } + + + +//===----------------------------------------------------------------------===// +// Utility functions to compute size of objects. +// + + +/// \brief Compute the size of the object pointed by Ptr. Returns true and the +/// object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const TargetData *TD, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast(Ptr)); + if (!Visitor.bothKnown(Data)) + return false; + + APInt ObjSize = Data.first, Offset = Data.second; + // check for overflow + if (Offset.slt(0) || ObjSize.ult(Offset)) + Size = 0; + else + Size = (ObjSize - Offset).getZExtValue(); + return true; +} + + +STATISTIC(ObjectVisitorArgument, + "Number of arguments with unsolved size and offset"); +STATISTIC(ObjectVisitorLoad, + "Number of load instructions with unsolved size and offset"); + + +APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { + if (RoundToAlign && Align) + return APInt(IntTyBits, RoundUpToAlignment(Size.getZExtValue(), Align)); + return Size; +} + +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, + LLVMContext &Context, + bool RoundToAlign) +: TD(TD), RoundToAlign(RoundToAlign) { + IntegerType *IntTy = TD->getIntPtrType(Context); + IntTyBits = IntTy->getBitWidth(); + Zero = APInt::getNullValue(IntTyBits); +} + +SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { + V = V->stripPointerCasts(); + + if (GEPOperator *GEP = dyn_cast(V)) + return visitGEPOperator(*GEP); + if (Instruction *I = dyn_cast(V)) + return visit(*I); + if (Argument *A = dyn_cast(V)) + return visitArgument(*A); + if (ConstantPointerNull *P = dyn_cast(V)) + return visitConstantPointerNull(*P); + if (GlobalVariable *GV = dyn_cast(V)) + return visitGlobalVariable(*GV); + if (UndefValue *UV = dyn_cast(V)) + return visitUndefValue(*UV); + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::IntToPtr) + return unknown(); // clueless + + DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V + << '\n'); + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType())); + if (!I.isArrayAllocation()) + return std::make_pair(align(Size, I.getAlignment()), Zero); + + Value *ArraySize = I.getArraySize(); + if (const ConstantInt *C = dyn_cast(ArraySize)) { + Size *= C->getValue().zextOrSelf(IntTyBits); + return std::make_pair(align(Size, I.getAlignment()), Zero); + } + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { + // no interprocedural analysis is done at the moment + if (!A.hasByValAttr()) { + ++ObjectVisitorArgument; + return unknown(); + } + PointerType *PT = cast(A.getType()); + APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType())); + return std::make_pair(align(Size, A.getParamAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); + if (!Size) + return unknown(); + + // strndup limits strlen + if (FnData->FstParam > 0) { + ConstantInt *Arg= dyn_cast(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); + if (Size.ugt(MaxSize)) + Size = MaxSize + 1; + } + return std::make_pair(Size, Zero); + } + + ConstantInt *Arg = dyn_cast(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt Size = Arg->getValue().zextOrSelf(IntTyBits); + // size determined by just 1 parameter + if (FnData->SndParam < 0) + return std::make_pair(Size, Zero); + + Arg = dyn_cast(CS.getArgument(FnData->SndParam)); + if (!Arg) + return unknown(); + + Size *= Arg->getValue().zextOrSelf(IntTyBits); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetType +ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { + // Easy cases were already folded by previous passes. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetType PtrData = compute(GEP.getPointerOperand()); + if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) + return unknown(); + + SmallVector Ops(GEP.idx_begin(), GEP.idx_end()); + APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops)); + return std::make_pair(PtrData.first, PtrData.second + Offset); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ + if (!GV.hasDefinitiveInitializer()) + return unknown(); + + APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType())); + return std::make_pair(align(Size, GV.getAlignment()), Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { + ++ObjectVisitorLoad; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { + // too complex to analyze statically. + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { + // ignore malformed self-looping selects + if (I.getTrueValue() == &I || I.getFalseValue() == &I) + return unknown(); + + SizeOffsetType TrueSide = compute(I.getTrueValue()); + SizeOffsetType FalseSide = compute(I.getFalseValue()); + if (bothKnown(TrueSide) && bothKnown(FalseSide) && TrueSide == FalseSide) + return TrueSide; + return unknown(); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) { + return std::make_pair(Zero, Zero); +} + +SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I << '\n'); + return unknown(); +} + + +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, + LLVMContext &Context) +: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)), +Visitor(TD, Context) { + IntTy = TD->getIntPtrType(Context); + Zero = ConstantInt::get(IntTy, 0); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { + SizeOffsetEvalType Result = compute_(V); + + if (!bothKnown(Result)) { + // erase everything that was computed in this iteration from the cache, so + // that no dangling references are left behind. We could be a bit smarter if + // we kept a dependency graph. It's probably not worth the complexity. + for (PtrSetTy::iterator I=SeenVals.begin(), E=SeenVals.end(); I != E; ++I) { + CacheMapTy::iterator CacheIt = CacheMap.find(*I); + // non-computable results can be safely cached + if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second)) + CacheMap.erase(CacheIt); + } + } + + SeenVals.clear(); + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { + SizeOffsetType Const = Visitor.compute(V); + if (Visitor.bothKnown(Const)) + return std::make_pair(ConstantInt::get(Context, Const.first), + ConstantInt::get(Context, Const.second)); + + V = V->stripPointerCasts(); + + // check cache + CacheMapTy::iterator CacheIt = CacheMap.find(V); + if (CacheIt != CacheMap.end()) + return CacheIt->second; + + // always generate code immediately before the instruction being + // processed, so that the generated code dominates the same BBs + Instruction *PrevInsertPoint = Builder.GetInsertPoint(); + if (Instruction *I = dyn_cast(V)) + Builder.SetInsertPoint(I); + + // record the pointers that were handled in this run, so that they can be + // cleaned later if something fails + SeenVals.insert(V); + + // now compute the size and offset + SizeOffsetEvalType Result; + if (GEPOperator *GEP = dyn_cast(V)) { + Result = visitGEPOperator(*GEP); + } else if (Instruction *I = dyn_cast(V)) { + Result = visit(*I); + } else if (isa(V) || + (isa(V) && + cast(V)->getOpcode() == Instruction::IntToPtr) || + isa(V)) { + // ignore values where we cannot do more than what ObjectSizeVisitor can + Result = unknown(); + } else { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " + << *V << '\n'); + Result = unknown(); + } + + if (PrevInsertPoint) + Builder.SetInsertPoint(PrevInsertPoint); + + // Don't reuse CacheIt since it may be invalid at this point. + CacheMap[V] = Result; + return Result; +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { + if (!I.getAllocatedType()->isSized()) + return unknown(); + + // must be a VLA + assert(I.isArrayAllocation()); + Value *ArraySize = I.getArraySize(); + Value *Size = ConstantInt::get(ArraySize->getType(), + TD->getTypeAllocSize(I.getAllocatedType())); + Size = Builder.CreateMul(Size, ArraySize); + return std::make_pair(Size, Zero); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { + const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc); + if (!FnData) + return unknown(); + + // handle strdup-like functions separately + if (FnData->AllocTy == StrDupLike) { + // TODO + return unknown(); + } + + Value *FirstArg = CS.getArgument(FnData->FstParam); + FirstArg = Builder.CreateZExt(FirstArg, IntTy); + if (FnData->SndParam < 0) + return std::make_pair(FirstArg, Zero); + + Value *SecondArg = CS.getArgument(FnData->SndParam); + SecondArg = Builder.CreateZExt(SecondArg, IntTy); + Value *Size = Builder.CreateMul(FirstArg, SecondArg); + return std::make_pair(Size, Zero); + + // TODO: handle more standard functions (+ wchar cousins): + // - strdup / strndup + // - strcpy / strncpy + // - strcat / strncat + // - memcpy / memmove + // - strcat / strncat + // - memset +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) { + return unknown(); +} + +SizeOffsetEvalType +ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { + SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand()); + if (!bothKnown(PtrData)) + return unknown(); + + Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true); + Offset = Builder.CreateAdd(PtrData.second, Offset); + return std::make_pair(PtrData.first, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) { + // clueless + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) { + return unknown(); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { + // create 2 PHIs: one for size and another for offset + PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + + // insert right away in the cache to handle recursive PHIs + CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI); + + // compute offset/size for each PHI incoming pointer + for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { + Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); + SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i)); + + if (!bothKnown(EdgeData)) { + OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy)); + OffsetPHI->eraseFromParent(); + SizePHI->replaceAllUsesWith(UndefValue::get(IntTy)); + SizePHI->eraseFromParent(); + return unknown(); + } + SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i)); + OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i)); + } + + Value *Size = SizePHI, *Offset = OffsetPHI, *Tmp; + if ((Tmp = SizePHI->hasConstantValue())) { + Size = Tmp; + SizePHI->replaceAllUsesWith(Size); + SizePHI->eraseFromParent(); + } + if ((Tmp = OffsetPHI->hasConstantValue())) { + Offset = Tmp; + OffsetPHI->replaceAllUsesWith(Offset); + OffsetPHI->eraseFromParent(); + } + return std::make_pair(Size, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) { + // ignore malformed self-looping selects + if (I.getTrueValue() == &I || I.getFalseValue() == &I) + return unknown(); + + SizeOffsetEvalType TrueSide = compute_(I.getTrueValue()); + SizeOffsetEvalType FalseSide = compute_(I.getFalseValue()); + + if (!bothKnown(TrueSide) || !bothKnown(FalseSide)) + return unknown(); + if (TrueSide == FalseSide) + return TrueSide; + + Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first, + FalseSide.first); + Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second, + FalseSide.second); + return std::make_pair(Size, Offset); +} + +SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) { + DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I <<'\n'); + return unknown(); +} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3a544f3..059e574 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,13 +16,11 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -229,13 +227,18 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Otherwise if the two calls don't interact (e.g. InstCS is readnone) // keep scanning. - break; + continue; default: return MemDepResult::getClobber(Inst); } } + + // If we could not obtain a pointer for the instruction and the instruction + // touches memory then assume that this is a dependency. + if (MR != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -339,86 +342,6 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, } } -namespace { - /// Only find pointer captures which happen before the given instruction. Uses - /// the dominator tree to determine whether one instruction is before another. - struct CapturesBefore : public CaptureTracker { - CapturesBefore(const Instruction *I, DominatorTree *DT) - : BeforeHere(I), DT(DT), Captured(false) {} - - void tooManyUses() { Captured = true; } - - bool shouldExplore(Use *U) { - Instruction *I = cast(U->getUser()); - BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) - return false; - return true; - } - - bool captured(Use *U) { - Instruction *I = cast(U->getUser()); - BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) - return false; - Captured = true; - return true; - } - - const Instruction *BeforeHere; - DominatorTree *DT; - - bool Captured; - }; -} - -AliasAnalysis::ModRefResult -MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, - const AliasAnalysis::Location &MemLoc) { - AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); - if (MR != AliasAnalysis::ModRef) return MR; - - // FIXME: this is really just shoring-up a deficiency in alias analysis. - // BasicAA isn't willing to spend linear time determining whether an alloca - // was captured before or after this particular call, while we are. However, - // with a smarter AA in place, this test is just wasting compile time. - if (!DT) return AliasAnalysis::ModRef; - const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); - if (!isIdentifiedObject(Object) || isa(Object)) - return AliasAnalysis::ModRef; - ImmutableCallSite CS(Inst); - if (!CS.getInstruction()) return AliasAnalysis::ModRef; - - CapturesBefore CB(Inst, DT); - llvm::PointerMayBeCaptured(Object, &CB); - - if (isa(Object) || CS.getInstruction() == Object || CB.Captured) - return AliasAnalysis::ModRef; - - unsigned ArgNo = 0; - for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); - CI != CE; ++CI, ++ArgNo) { - // Only look at the no-capture or byval pointer arguments. If this - // pointer were passed to arguments that were neither of these, then it - // couldn't be no-capture. - if (!(*CI)->getType()->isPointerTy() || - (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) - continue; - - // If this is a no-capture pointer argument, see if we can tell that it - // is impossible to alias the pointer we're checking. If not, we have to - // assume that the call could touch the pointer, even though it doesn't - // escape. - if (!AA->isNoAlias(AliasAnalysis::Location(*CI), - AliasAnalysis::Location(Object))) { - return AliasAnalysis::ModRef; - } - } - return AliasAnalysis::NoModRef; -} - /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -556,8 +479,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // a subsequent bitcast of the malloc call result. There can be stores to // the malloced memory between the malloc call and its bitcast uses, and we // need to continue scanning until the malloc call. - if (isa(Inst) || - (isa(Inst) && extractMallocCall(Inst))) { + if (isa(Inst) || isNoAliasFn(Inst)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) @@ -566,7 +488,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (getModRefInfo(Inst, MemLoc)) { + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + // If necessary, perform additional analysis. + if (MR == AliasAnalysis::ModRef) + MR = AA->callCapturesBefore(Inst, MemLoc, DT); + switch (MR) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; @@ -984,7 +910,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (!Pair.second) { if (CacheInfo->Size < Loc.Size) { // The query's Size is greater than the cached one. Throw out the - // cached data and procede with the query at the greater size. + // cached data and proceed with the query at the greater size. CacheInfo->Pair = BBSkipFirstBlockPair(); CacheInfo->Size = Loc.Size; for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index e7e999c..f8c7514 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -16,10 +16,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Pass.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 80c5222..d4ad726 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -31,11 +31,11 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeBuilder.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index eaa38da..5c7c97c 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -83,10 +83,8 @@ const unsigned ProfileInfoLoader::Uncounted = ~0U; // program if the file is invalid or broken. // ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, - const std::string &Filename, - Module &TheModule) : - Filename(Filename), - M(TheModule), Warned(false) { + const std::string &Filename) + : Filename(Filename) { FILE *F = fopen(Filename.c_str(), "rb"); if (F == 0) { errs() << ToolName << ": Error opening '" << Filename << "': "; diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index c4da807..5ecf052 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -152,7 +152,7 @@ void LoaderPass::readEdge(ProfileInfo::Edge e, } bool LoaderPass::runOnModule(Module &M) { - ProfileInfoLoader PIL("profile-loader", Filename, M); + ProfileInfoLoader PIL("profile-loader", Filename); EdgeInformation.clear(); std::vector Counters = PIL.getRawEdgeCounts(); diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index b507b1e..868f483 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -47,7 +47,7 @@ static cl::opt printStyle("print-region-style", cl::values( clEnumValN(Region::PrintNone, "none", "print no details"), clEnumValN(Region::PrintBB, "bb", - "print regions in detail with block_iterator"), + "print regions in detail with block_node_iterator"), clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); @@ -246,19 +246,19 @@ void Region::verifyRegionNest() const { verifyRegion(); } -Region::block_iterator Region::block_begin() { +Region::block_node_iterator Region::block_node_begin() { return GraphTraits >::nodes_begin(this); } -Region::block_iterator Region::block_end() { +Region::block_node_iterator Region::block_node_end() { return GraphTraits >::nodes_end(this); } -Region::const_block_iterator Region::block_begin() const { +Region::const_block_node_iterator Region::block_node_begin() const { return GraphTraits >::nodes_begin(this); } -Region::const_block_iterator Region::block_end() const { +Region::const_block_node_iterator Region::block_node_end() const { return GraphTraits >::nodes_end(this); } @@ -425,7 +425,9 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level, OS.indent(level*2 + 2); if (Style == PrintBB) { - for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) + for (const_block_node_iterator I = block_node_begin(), + E = block_node_end(); + I != E; ++I) OS << **I << ", "; // TODO: remove the last "," } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 3a3529b..c97b5eb 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -195,7 +195,8 @@ public: virtual bool runOnRegion(Region *R, RGPassManager &RGM) { Out << Banner; - for (Region::block_iterator I = R->block_begin(), E = R->block_end(); + for (Region::block_node_iterator I = R->block_node_begin(), + E = R->block_node_end(); I != E; ++I) (*I)->getEntry()->print(Out); diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index a1730b0..8b23cc7 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -122,13 +122,11 @@ struct DOTGraphTraits : public DOTGraphTraits { RegionInfo *RI = R->getRegionInfo(); for (Region::const_block_iterator BI = R->block_begin(), - BE = R->block_end(); BI != BE; ++BI) { - BasicBlock *BB = (*BI)->getNodeAs(); - if (RI->getRegionFor(BB) == R) + BE = R->block_end(); BI != BE; ++BI) + if (RI->getRegionFor(*BI) == R) O.indent(2 * (depth + 1)) << "Node" - << static_cast(RI->getTopLevelRegion()->getBBNode(BB)) + << static_cast(RI->getTopLevelRegion()->getBBNode(*BI)) << ";\n"; - } O.indent(2 * depth) << "}\n"; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 205227c..a654648 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -826,8 +826,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getTrunc(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) @@ -879,13 +878,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } - // As a special case, fold trunc(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. @@ -906,8 +898,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getZExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) @@ -976,12 +967,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); + const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. @@ -991,13 +985,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, SMul); OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); @@ -1164,8 +1156,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getSExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) @@ -1242,12 +1233,15 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); + const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1257,13 +1251,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. - const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, UMul); OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1345,13 +1337,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } - // As a special case, fold anyext(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // If the expression is obviously signed, use the sext cast value. if (isa(Op)) return SExt; @@ -1839,7 +1824,7 @@ static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { /// Compute the result of "n choose k", the binomial coefficient. If an /// intermediate computation overflows, Overflow will be set and the return will -/// be garbage. Overflow is not cleared on absense of overflow. +/// be garbage. Overflow is not cleared on absence of overflow. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { // We use the multiplicative formula: // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . @@ -2038,63 +2023,67 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { - if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { - // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} - // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ - // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z - // ]]],+,...up to x=2n}. - // Note that the arguments to choose() are always integers with values - // known at compile time, never SCEV objects. - // - // The implementation avoids pointless extra computations when the two - // addrec's are of different length (mathematically, it's equivalent to - // an infinite stream of zeros on the right). - bool OpsModified = false; - for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const SCEVAddRecExpr *OtherAddRec = - dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - bool Overflow = false; - Type *Ty = AddRec->getType(); - bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; - SmallVector AddRecOps; - for (int x = 0, xe = AddRec->getNumOperands() + - OtherAddRec->getNumOperands() - 1; - x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); - for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { - uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); - for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), - ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); - z < ze && !Overflow; ++z) { - uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); - uint64_t Coeff; - if (LargerThan64Bits) - Coeff = umul_ov(Coeff1, Coeff2, Overflow); - else - Coeff = Coeff1*Coeff2; - const SCEV *CoeffTerm = getConstant(Ty, Coeff); - const SCEV *Term1 = AddRec->getOperand(y-z); - const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); - } - } - AddRecOps.push_back(Term); - } - if (!Overflow) { - const SCEV *NewAddRec = getAddRecExpr(AddRecOps, - AddRec->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = AddRec = cast(NewAddRec); - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; - OpsModified = true; - } + if (AddRecLoop != cast(Ops[OtherIdx])->getLoop()) + continue; + + // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; + for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); + ++OtherIdx) { + const SCEVAddRecExpr *OtherAddRec = + dyn_cast(Ops[OtherIdx]); + if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) + continue; + + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); } - if (OpsModified) - return getMulExpr(Ops); + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = NewAddRec; + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + AddRec = dyn_cast(NewAddRec); + if (!AddRec) + break; + } } + if (OpsModified) + return getMulExpr(Ops); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -2723,7 +2712,7 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find(V); + ValueExprMapType::const_iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) return I->second; const SCEV *S = createSCEV(V); @@ -2960,7 +2949,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3017,7 +3006,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find(PN) == ValueExprMap.end() && + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); @@ -4081,7 +4070,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -4132,7 +4121,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4165,7 +4155,8 @@ void ScalarEvolution::forgetValue(Value *V) { I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -5379,6 +5370,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { SqrtTerm *= B; SqrtTerm -= Four * (A * C); + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. APInt SqrtVal(SqrtTerm.sqrt()); @@ -5481,7 +5478,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (StepC == 0) + if (StepC == 0 || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -5602,9 +5599,14 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { /// predicate Pred. Return true iff any changes were made. /// bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, - const SCEV *&LHS, const SCEV *&RHS) { + const SCEV *&LHS, const SCEV *&RHS, + unsigned Depth) { bool Changed = false; + // If we hit the max recursion limit bail out. + if (Depth >= 3) + return false; + // Canonicalize a constant to the right side. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { // Check for both operands constant. @@ -5642,6 +5644,16 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: + // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. + if (!RA) + if (const SCEVAddExpr *AE = dyn_cast(LHS)) + if (const SCEVMulExpr *ME = dyn_cast(AE->getOperand(0))) + if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && + ME->getOperand(0)->isAllOnesValue()) { + RHS = AE->getOperand(1); + LHS = ME->getOperand(1); + Changed = true; + } break; case ICmpInst::ICMP_UGE: if ((RA - 1).isMinValue()) { @@ -5843,6 +5855,11 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // TODO: More simplifications are possible here. + // Recursively simplify until we either hit a recursion limit or nothing + // changes. + if (Changed) + return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); + return Changed; trivially_true: @@ -6040,12 +6057,34 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +/// RAII wrapper to prevent recursive application of isImpliedCond. +/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are +/// currently evaluating isImpliedCond. +struct MarkPendingLoopPredicate { + Value *Cond; + DenseSet &LoopPreds; + bool Pending; + + MarkPendingLoopPredicate(Value *C, DenseSet &LP) + : Cond(C), LoopPreds(LP) { + Pending = !LoopPreds.insert(Cond).second; + } + ~MarkPendingLoopPredicate() { + if (!Pending) + LoopPreds.erase(Cond); + } +}; + /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse) { + MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); + if (Mark.Pending) + return false; + // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { @@ -6572,6 +6611,8 @@ void ScalarEvolution::releaseMemory() { I->second.clear(); } + assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); @@ -6859,44 +6900,27 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { - switch (S->getSCEVType()) { - case scConstant: - return false; - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *Cast = cast(S); - const SCEV *CastOp = Cast->getOperand(); - return Op == CastOp || hasOperand(CastOp, Op); - } - case scAddRecExpr: - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: { - const SCEVNAryExpr *NAry = cast(S); - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - const SCEV *NAryOp = *I; - if (NAryOp == Op || hasOperand(NAryOp, Op)) - return true; - } - return false; - } - case scUDivExpr: { - const SCEVUDivExpr *UDiv = cast(S); - const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); - return LHS == Op || hasOperand(LHS, Op) || - RHS == Op || hasOperand(RHS, Op); - } - case scUnknown: - return false; - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: - llvm_unreachable("Unknown SCEV kind!"); +namespace { +// Search for a SCEV expression node within an expression tree. +// Implements SCEVTraversal::Visitor. +struct SCEVSearch { + const SCEV *Node; + bool IsFound; + + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; } + bool isDone() const { return IsFound; } +}; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + SCEVSearch Search(Op); + visitAll(S, Search); + return Search.IsFound; } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 69507be..62710c5 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -37,7 +37,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // We use this precondition to produce a cast that will dominate all its // uses. In particular, this is crucial for the case where the builder's // insertion point *is* the point where we were asked to put the cast. - // Since we don't know the the builder's insertion point is actually + // Since we don't know the builder's insertion point is actually // where the uses will be added (only that it dominates it), we are // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); @@ -955,7 +955,8 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { // InsertPos must itself dominate IncV so that IncV's new position satisfies // its existing users. - if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) + if (isa(InsertPos) + || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) return false; // Check that the chain of IV operands leading back to Phi can be hoisted. @@ -1699,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } return NumElim; } + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +struct SCEVFindUnsafe { + bool IsUnsafe; + + SCEVFindUnsafe(): IsUnsafe(false) {} + + bool follow(const SCEV *S) { + const SCEVUDivExpr *D = dyn_cast(S); + if (!D) + return true; + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (SC && !SC->getValue()->isZero()) + return true; + IsUnsafe = true; + return false; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S) { + SCEVFindUnsafe Search; + visitAll(S, Search); + return !Search.IsUnsafe; +} +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 1418e01..cea34e1 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -694,7 +694,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // taking conservative care to avoid excessive recursion. if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { // Skip if every incoming value references to ourself. - if (P->hasConstantValue() == P) + if (dyn_cast_or_null(P->hasConstantValue())) break; KnownZero = APInt::getAllOnesValue(BitWidth); @@ -1796,6 +1796,37 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { return V; } +void +llvm::GetUnderlyingObjects(Value *V, + SmallVectorImpl &Objects, + const TargetData *TD, + unsigned MaxLookup) { + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(V); + do { + Value *P = Worklist.pop_back_val(); + P = GetUnderlyingObject(P, TD, MaxLookup); + + if (!Visited.insert(P)) + continue; + + if (SelectInst *SI = dyn_cast(P)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + if (PHINode *PN = dyn_cast(P)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + Worklist.push_back(PN->getIncomingValue(i)); + continue; + } + + Objects.push_back(P); + } while (!Worklist.empty()); +} + /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer /// are lifetime markers. /// diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 68873e2..5cfc810 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -82,14 +82,9 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; At += sizeof(ArchiveMemberHeader); - // Extract the size and determine if the file is - // compressed or not (negative length). int flags = 0; int MemberSize = atoi(Hdr->size); - if (MemberSize < 0) { - flags |= ArchiveMember::CompressedFlag; - MemberSize = -MemberSize; - } + assert(MemberSize >= 0); // Check the size of the member for sanity if (At + MemberSize > End) { diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 9ef2943..ec6b4b8 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -204,7 +204,6 @@ Archive::writeMember( std::ofstream& ARFile, bool CreateSymbolTable, bool TruncateNames, - bool ShouldCompress, std::string* ErrMsg ) { @@ -349,7 +348,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) { // table, flattening the file names (no directories, 15 chars max) and // compressing each archive member. bool -Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, +Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, std::string* ErrMsg) { // Make sure they haven't opened up the file, not loaded it, @@ -394,7 +393,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // builds the symbol table, symTab. for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, - TruncateNames, Compress, ErrMsg)) { + TruncateNames, ErrMsg)) { TmpArchive.eraseFromDisk(); ArchiveFile.close(); return true; @@ -446,7 +445,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // compatibility with other ar(1) implementations as well as allowing the // archive to store both native .o and LLVM .bc files, both indexed. if (foreignST) { - if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { + if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) { FinalFile.close(); TmpArchive.eraseFromDisk(); return true; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 8818168..481733d 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -474,6 +474,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(extern_weak); KEYWORD(external); KEYWORD(thread_local); + KEYWORD(localdynamic); + KEYWORD(initialexec); + KEYWORD(localexec); KEYWORD(zeroinitializer); KEYWORD(undef); KEYWORD(null); @@ -550,6 +553,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); + KEYWORD(ia_nsdialect); KEYWORD(type); KEYWORD(opaque); @@ -673,11 +677,12 @@ lltok::Kind LLLexer::LexIdentifier() { /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ +/// HexHalfConstant 0xH[0-9A-Fa-f]+ lltok::Kind LLLexer::Lex0x() { CurPtr = TokStart + 2; char Kind; - if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { + if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') { Kind = *CurPtr++; } else { Kind = 'J'; @@ -718,6 +723,9 @@ lltok::Kind LLLexer::Lex0x() { HexToIntPair(TokStart+3, CurPtr, Pair); APFloatVal = APFloat(APInt(128, Pair)); return lltok::APFloat; + case 'H': + APFloatVal = APFloat(APInt(16,HexIntToVal(TokStart+3, CurPtr))); + return lltok::APFloat; } } diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 068be3d..0ff8edd 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -645,12 +645,13 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, unsigned Visibility) { unsigned AddrSpace; - bool ThreadLocal, IsConstant, UnnamedAddr; + bool IsConstant, UnnamedAddr; + GlobalVariable::ThreadLocalMode TLM; LocTy UnnamedAddrLoc; LocTy TyLoc; Type *Ty = 0; - if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) || + if (ParseOptionalThreadLocal(TLM) || ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || @@ -691,7 +692,8 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (GV == 0) { GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0, - Name, 0, false, AddrSpace); + Name, 0, GlobalVariable::NotThreadLocal, + AddrSpace); } else { if (GV->getType()->getElementType() != Ty) return Error(TyLoc, @@ -710,7 +712,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setConstant(IsConstant); GV->setLinkage((GlobalValue::LinkageTypes)Linkage); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); - GV->setThreadLocal(ThreadLocal); + GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); // Parse attributes on the global. @@ -858,6 +860,46 @@ bool LLParser::ParseUInt32(unsigned &Val) { return false; } +/// ParseTLSModel +/// := 'localdynamic' +/// := 'initialexec' +/// := 'localexec' +bool LLParser::ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM) { + switch (Lex.getKind()) { + default: + return TokError("expected localdynamic, initialexec or localexec"); + case lltok::kw_localdynamic: + TLM = GlobalVariable::LocalDynamicTLSModel; + break; + case lltok::kw_initialexec: + TLM = GlobalVariable::InitialExecTLSModel; + break; + case lltok::kw_localexec: + TLM = GlobalVariable::LocalExecTLSModel; + break; + } + + Lex.Lex(); + return false; +} + +/// ParseOptionalThreadLocal +/// := /*empty*/ +/// := 'thread_local' +/// := 'thread_local' '(' tlsmodel ')' +bool LLParser::ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM) { + TLM = GlobalVariable::NotThreadLocal; + if (!EatIfPresent(lltok::kw_thread_local)) + return false; + + TLM = GlobalVariable::GeneralDynamicTLSModel; + if (Lex.getKind() == lltok::lparen) { + Lex.Lex(); + return ParseTLSModel(TLM) || + ParseToken(lltok::rparen, "expected ')' after thread local model"); + } + return false; +} /// ParseOptionalAddrSpace /// := /*empty*/ @@ -920,6 +962,7 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; + case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -2692,7 +2735,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (FuncAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs)); - AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + AttrListPtr PAL = AttrListPtr::get(Attrs); if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); @@ -3239,7 +3282,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + AttrListPtr PAL = AttrListPtr::get(Attrs); InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); @@ -3635,7 +3678,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); + AttrListPtr PAL = AttrListPtr::get(Attrs); CallInst *CI = CallInst::Create(Callee, Args); CI->setTailCall(isTail); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index dda8808..257c726 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -171,6 +171,9 @@ namespace llvm { Loc = Lex.getLoc(); return ParseUInt32(Val); } + + bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); + bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index adf5d4f..0b0b980 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -44,13 +44,14 @@ namespace lltok { kw_unnamed_addr, kw_extern_weak, kw_external, kw_thread_local, + kw_localdynamic, kw_initialexec, kw_localexec, kw_zeroinitializer, kw_undef, kw_null, kw_to, kw_tail, kw_target, kw_triple, - kw_unwind, + kw_unwind, kw_deplibs, kw_datalayout, kw_volatile, @@ -104,6 +105,7 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, + kw_ia_nsdialect, kw_type, kw_opaque, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index e399040..4ffee38 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,6 +28,10 @@ #include "llvm/OperandTraits.h" using namespace llvm; +enum { + SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex +}; + void BitcodeReader::materializeForwardReferencedFunctions() { while (!BlockAddrFwdRefs.empty()) { Function *F = BlockAddrFwdRefs.begin()->first; @@ -57,7 +61,7 @@ void BitcodeReader::FreeState() { /// ConvertToString - Convert a string from a record into an std::string, return /// true on failure. template -static bool ConvertToString(SmallVector &Record, unsigned Idx, +static bool ConvertToString(ArrayRef Record, unsigned Idx, StrTy &Result) { if (Idx > Record.size()) return true; @@ -98,6 +102,17 @@ static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) { } } +static GlobalVariable::ThreadLocalMode GetDecodedThreadLocalMode(unsigned Val) { + switch (Val) { + case 0: return GlobalVariable::NotThreadLocal; + default: // Map unknown non-zero value to general dynamic. + case 1: return GlobalVariable::GeneralDynamicTLSModel; + case 2: return GlobalVariable::LocalDynamicTLSModel; + case 3: return GlobalVariable::InitialExecTLSModel; + case 4: return GlobalVariable::LocalExecTLSModel; + } +} + static int GetDecodedCastOpcode(unsigned Val) { switch (Val) { default: return -1; @@ -458,61 +473,19 @@ bool BitcodeReader::ParseAttributeBlock() { if (Record.size() & 1) return Error("Invalid ENTRY record"); - // FIXME : Remove this autoupgrade code in LLVM 3.0. - // If Function attributes are using index 0 then transfer them - // to index ~0. Index 0 is used for return value attributes but used to be - // used for function attributes. - Attributes RetAttribute; - Attributes FnAttribute; for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - // FIXME: remove in LLVM 3.0 - // The alignment is stored as a 16-bit raw value from bits 31--16. - // We shift the bits above 31 down by 11 bits. - - unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16; - if (Alignment && !isPowerOf2_32(Alignment)) - return Error("Alignment is not a power of two."); - - Attributes ReconstitutedAttr(Record[i+1] & 0xffff); - if (Alignment) - ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment); - ReconstitutedAttr |= - Attributes((Record[i+1] & (0xffffull << 32)) >> 11); - + Attributes ReconstitutedAttr = + Attribute::decodeLLVMAttributesForBitcode(Record[i+1]); Record[i+1] = ReconstitutedAttr.Raw(); - if (Record[i] == 0) - RetAttribute = ReconstitutedAttr; - else if (Record[i] == ~0U) - FnAttribute = ReconstitutedAttr; - } - - Attributes OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn| - Attribute::ReadOnly|Attribute::ReadNone); - - if (FnAttribute == Attribute::None && RetAttribute != Attribute::None && - (RetAttribute & OldRetAttrs)) { - if (FnAttribute == Attribute::None) { // add a slot so they get added. - Record.push_back(~0U); - Record.push_back(0); - } - - FnAttribute |= RetAttribute & OldRetAttrs; - RetAttribute &= ~OldRetAttrs; } for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - if (Record[i] == 0) { - if (RetAttribute != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(0, RetAttribute)); - } else if (Record[i] == ~0U) { - if (FnAttribute != Attribute::None) - Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute)); - } else if (Attributes(Record[i+1]) != Attribute::None) + if (Attributes(Record[i+1]) != Attribute::None) Attrs.push_back(AttributeWithIndex::get(Record[i], Attributes(Record[i+1]))); } - MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end())); + MAttributes.push_back(AttrListPtr::get(Attrs)); Attrs.clear(); break; } @@ -621,7 +594,7 @@ bool BitcodeReader::ParseTypeTableBody() { break; } case bitc::TYPE_CODE_FUNCTION_OLD: { - // FIXME: attrid is dead, remove it in LLVM 3.0 + // FIXME: attrid is dead, remove it in LLVM 4.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) return Error("Invalid FUNCTION type record"); @@ -851,11 +824,7 @@ bool BitcodeReader::ParseMetadata() { break; case bitc::METADATA_NAME: { // Read named of the named metadata. - unsigned NameLength = Record.size(); - SmallString<8> Name; - Name.resize(NameLength); - for (unsigned i = 0; i != NameLength; ++i) - Name[i] = Record[i]; + SmallString<8> Name(Record.begin(), Record.end()); Record.clear(); Code = Stream.ReadCode(); @@ -899,26 +868,18 @@ bool BitcodeReader::ParseMetadata() { break; } case bitc::METADATA_STRING: { - unsigned MDStringLength = Record.size(); - SmallString<8> String; - String.resize(MDStringLength); - for (unsigned i = 0; i != MDStringLength; ++i) - String[i] = Record[i]; - Value *V = MDString::get(Context, - StringRef(String.data(), String.size())); + SmallString<8> String(Record.begin(), Record.end()); + Value *V = MDString::get(Context, String); MDValueList.AssignValue(V, NextMDValueNo++); break; } case bitc::METADATA_KIND: { - unsigned RecordLength = Record.size(); - if (Record.empty() || RecordLength < 2) + if (Record.size() < 2) return Error("Invalid METADATA_KIND record"); - SmallString<8> Name; - Name.resize(RecordLength-1); + unsigned Kind = Record[0]; - for (unsigned i = 1; i != RecordLength; ++i) - Name[i-1] = Record[i]; - + SmallString<8> Name(Record.begin()+1, Record.end()); + unsigned NewKind = TheModule->getMDKindID(Name.str()); if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) return Error("Conflicting METADATA_KIND records"); @@ -977,6 +938,14 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { return false; } +static APInt ReadWideAPInt(ArrayRef Vals, unsigned TypeBits) { + SmallVector Words(Vals.size()); + std::transform(Vals.begin(), Vals.end(), Words.begin(), + DecodeSignRotatedValue); + + return APInt(TypeBits, Words); +} + bool BitcodeReader::ParseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) return Error("Malformed block record"); @@ -1032,14 +1001,10 @@ bool BitcodeReader::ParseConstants() { if (!CurTy->isIntegerTy() || Record.empty()) return Error("Invalid WIDE_INTEGER record"); - unsigned NumWords = Record.size(); - SmallVector Words; - Words.resize(NumWords); - for (unsigned i = 0; i != NumWords; ++i) - Words[i] = DecodeSignRotatedValue(Record[i]); - V = ConstantInt::get(Context, - APInt(cast(CurTy)->getBitWidth(), - Words)); + APInt VInt = ReadWideAPInt(Record, + cast(CurTy)->getBitWidth()); + V = ConstantInt::get(Context, VInt); + break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] @@ -1098,10 +1063,7 @@ bool BitcodeReader::ParseConstants() { if (Record.empty()) return Error("Invalid CST_STRING record"); - unsigned Size = Record.size(); - SmallString<16> Elts; - for (unsigned i = 0; i != Size; ++i) - Elts.push_back(Record[i]); + SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, BitCode == bitc::CST_CODE_CSTRING); break; @@ -1138,23 +1100,16 @@ bool BitcodeReader::ParseConstants() { else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isFloatTy()) { - SmallVector Elts; - for (unsigned i = 0; i != Size; ++i) { - union { uint32_t I; float F; }; - I = Record[i]; - Elts.push_back(F); - } + SmallVector Elts(Size); + std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isDoubleTy()) { - SmallVector Elts; - for (unsigned i = 0; i != Size; ++i) { - union { uint64_t I; double F; }; - I = Record[i]; - Elts.push_back(F); - } + SmallVector Elts(Size); + std::transform(Record.begin(), Record.end(), Elts.begin(), + BitsToDouble); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else @@ -1600,9 +1555,10 @@ bool BitcodeReader::ParseModule(bool Resume) { GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; if (Record.size() > 6) Visibility = GetDecodedVisibility(Record[6]); - bool isThreadLocal = false; + + GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; if (Record.size() > 7) - isThreadLocal = Record[7]; + TLM = GetDecodedThreadLocalMode(Record[7]); bool UnnamedAddr = false; if (Record.size() > 8) @@ -1610,12 +1566,11 @@ bool BitcodeReader::ParseModule(bool Resume) { GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, - isThreadLocal, AddressSpace); + TLM, AddressSpace); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); NewGV->setVisibility(Visibility); - NewGV->setThreadLocal(isThreadLocal); NewGV->setUnnamedAddr(UnnamedAddr); ValueList.push_back(NewGV); @@ -1732,7 +1687,7 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { // have to read and ignore these final 4 bytes :-( if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && - Stream.AtEndOfStream()) + Stream.AtEndOfStream()) return false; return Error("Invalid record at top-level"); @@ -2271,6 +2226,65 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] + // Check magic + if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { + // New SwitchInst format with case ranges. + + Type *OpTy = getTypeByID(Record[1]); + unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); + + Value *Cond = getFnValueByID(Record[2], OpTy); + BasicBlock *Default = getBasicBlock(Record[3]); + if (OpTy == 0 || Cond == 0 || Default == 0) + return Error("Invalid SWITCH record"); + + unsigned NumCases = Record[4]; + + SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); + InstructionList.push_back(SI); + + unsigned CurIdx = 5; + for (unsigned i = 0; i != NumCases; ++i) { + IntegersSubsetToBB CaseBuilder; + unsigned NumItems = Record[CurIdx++]; + for (unsigned ci = 0; ci != NumItems; ++ci) { + bool isSingleNumber = Record[CurIdx++]; + + APInt Low; + unsigned ActiveWords = 1; + if (ValueBitWidth > 64) + ActiveWords = Record[CurIdx++]; + Low = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), + ValueBitWidth); + CurIdx += ActiveWords; + + if (!isSingleNumber) { + ActiveWords = 1; + if (ValueBitWidth > 64) + ActiveWords = Record[CurIdx++]; + APInt High = + ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), + ValueBitWidth); + + CaseBuilder.add(IntItem::fromType(OpTy, Low), + IntItem::fromType(OpTy, High)); + CurIdx += ActiveWords; + } else + CaseBuilder.add(IntItem::fromType(OpTy, Low)); + } + BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); + IntegersSubset Case = CaseBuilder.getCase(); + SI->addCase(Case, DestBB); + } + uint16_t Hash = SI->hash(); + if (Hash != (Record[0] & 0xFFFF)) + return Error("Invalid SWITCH record"); + I = SI; + break; + } + + // Old SwitchInst format without case ranges. + if (Record.size() < 3 || (Record.size() & 1) == 0) return Error("Invalid SWITCH record"); Type *OpTy = getTypeByID(Record[0]); diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt index 693d431..dfe7e10 100644 --- a/lib/Bitcode/Reader/CMakeLists.txt +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -2,3 +2,5 @@ add_llvm_library(LLVMBitReader BitReader.cpp BitcodeReader.cpp ) + +add_dependencies(LLVMBitReader intrinsics_gen) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b25d2e9..5b1725f 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -62,7 +62,10 @@ enum { FUNCTION_INST_CAST_ABBREV, FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, - FUNCTION_INST_UNREACHABLE_ABBREV + FUNCTION_INST_UNREACHABLE_ABBREV, + + // SwitchInst Magic + SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; static unsigned GetEncodedCastOpcode(unsigned Opcode) { @@ -174,18 +177,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE, for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { const AttributeWithIndex &PAWI = A.getSlot(i); Record.push_back(PAWI.Index); - - // FIXME: remove in LLVM 3.0 - // Store the alignment in the bitcode as a 16-bit raw value instead of a - // 5-bit log2 encoded value. Shift the bits above the alignment up by - // 11 bits. - uint64_t FauxAttr = PAWI.Attrs.Raw() & 0xffff; - if (PAWI.Attrs & Attribute::Alignment) - FauxAttr |= (1ull<<16)<< - (((PAWI.Attrs & Attribute::Alignment).Raw()-1) >> 16); - FauxAttr |= (PAWI.Attrs.Raw() & (0x3FFull << 21)) << 11; - - Record.push_back(FauxAttr); + Record.push_back(Attribute::encodeLLVMAttributesForBitcode(PAWI.Attrs)); } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); @@ -387,6 +379,17 @@ static unsigned getEncodedVisibility(const GlobalValue *GV) { llvm_unreachable("Invalid visibility"); } +static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) { + switch (GV->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: return 0; + case GlobalVariable::GeneralDynamicTLSModel: return 1; + case GlobalVariable::LocalDynamicTLSModel: return 2; + case GlobalVariable::InitialExecTLSModel: return 3; + case GlobalVariable::LocalExecTLSModel: return 4; + } + llvm_unreachable("Invalid TLS model"); +} + // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, @@ -495,7 +498,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, GV->getVisibility() != GlobalValue::DefaultVisibility || GV->hasUnnamedAddr()) { Vals.push_back(getEncodedVisibility(GV)); - Vals.push_back(GV->isThreadLocal()); + Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV->hasUnnamedAddr()); } else { AbbrevToUse = SimpleGVarAbbrev; @@ -719,6 +722,41 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +static void EmitAPInt(SmallVectorImpl &Vals, + unsigned &Code, unsigned &AbbrevToUse, const APInt &Val, + bool EmitSizeForWideNumbers = false + ) { + if (Val.getBitWidth() <= 64) { + uint64_t V = Val.getSExtValue(); + if ((int64_t)V >= 0) + Vals.push_back(V << 1); + else + Vals.push_back((-V << 1) | 1); + Code = bitc::CST_CODE_INTEGER; + AbbrevToUse = CONSTANTS_INTEGER_ABBREV; + } else { + // Wide integers, > 64 bits in size. + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer + // format it is likely that the high bits are going to be zero. + // So, we only write the number of active words. + unsigned NWords = Val.getActiveWords(); + + if (EmitSizeForWideNumbers) + Vals.push_back(NWords); + + const uint64_t *RawWords = Val.getRawData(); + for (unsigned i = 0; i != NWords; ++i) { + int64_t V = RawWords[i]; + if (V >= 0) + Vals.push_back(V << 1); + else + Vals.push_back((-V << 1) | 1); + } + Code = bitc::CST_CODE_WIDE_INTEGER; + } +} + static void WriteConstants(unsigned FirstVal, unsigned LastVal, const ValueEnumerator &VE, BitstreamWriter &Stream, bool isGlobal) { @@ -801,30 +839,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } else if (isa(C)) { Code = bitc::CST_CODE_UNDEF; } else if (const ConstantInt *IV = dyn_cast(C)) { - if (IV->getBitWidth() <= 64) { - uint64_t V = IV->getSExtValue(); - if ((int64_t)V >= 0) - Record.push_back(V << 1); - else - Record.push_back((-V << 1) | 1); - Code = bitc::CST_CODE_INTEGER; - AbbrevToUse = CONSTANTS_INTEGER_ABBREV; - } else { // Wide integers, > 64 bits in size. - // We have an arbitrary precision integer value to write whose - // bit width is > 64. However, in canonical unsigned integer - // format it is likely that the high bits are going to be zero. - // So, we only write the number of active words. - unsigned NWords = IV->getValue().getActiveWords(); - const uint64_t *RawWords = IV->getValue().getRawData(); - for (unsigned i = 0; i != NWords; ++i) { - int64_t V = RawWords[i]; - if (V >= 0) - Record.push_back(V << 1); - else - Record.push_back((-V << 1) | 1); - } - Code = bitc::CST_CODE_WIDE_INTEGER; - } + EmitAPInt(Record, Code, AbbrevToUse, IV->getValue()); } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; Type *Ty = CFP->getType(); @@ -1137,16 +1152,63 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; case Instruction::Switch: { + // Redefine Vals, since here we need to use 64 bit values + // explicitly to store large APInt numbers. + SmallVector Vals64; + Code = bitc::FUNC_CODE_INST_SWITCH; SwitchInst &SI = cast(I); - Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); - Vals.push_back(VE.getValueID(SI.getCondition())); - Vals.push_back(VE.getValueID(SI.getDefaultDest())); + + uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); + Vals64.push_back(SwitchRecordHeader); + + Vals64.push_back(VE.getTypeID(SI.getCondition()->getType())); + Vals64.push_back(VE.getValueID(SI.getCondition())); + Vals64.push_back(VE.getValueID(SI.getDefaultDest())); + Vals64.push_back(SI.getNumCases()); for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - Vals.push_back(VE.getValueID(i.getCaseValue())); - Vals.push_back(VE.getValueID(i.getCaseSuccessor())); + IntegersSubset& CaseRanges = i.getCaseValueEx(); + unsigned Code, Abbrev; // will unused. + + if (CaseRanges.isSingleNumber()) { + Vals64.push_back(1/*NumItems = 1*/); + Vals64.push_back(true/*IsSingleNumber = true*/); + EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true); + } else { + + Vals64.push_back(CaseRanges.getNumItems()); + + if (CaseRanges.isSingleNumbersOnly()) { + for (unsigned ri = 0, rn = CaseRanges.getNumItems(); + ri != rn; ++ri) { + + Vals64.push_back(true/*IsSingleNumber = true*/); + + EmitAPInt(Vals64, Code, Abbrev, + CaseRanges.getSingleNumber(ri), true); + } + } else + for (unsigned ri = 0, rn = CaseRanges.getNumItems(); + ri != rn; ++ri) { + IntegersSubset::Range r = CaseRanges.getItem(ri); + bool IsSingleNumber = CaseRanges.isSingleNumber(ri); + + Vals64.push_back(IsSingleNumber); + + EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true); + if (!IsSingleNumber) + EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true); + } + } + Vals64.push_back(VE.getValueID(i.getCaseSuccessor())); } + + Stream.EmitRecord(Code, Vals64, AbbrevToUse); + + // Also do expected action - clear external Vals collection: + Vals.clear(); + return; } break; case Instruction::IndirectBr: diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 822a564..205480a 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -16,10 +16,10 @@ #define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -157,8 +157,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - for (const uint16_t *Alias = TRI->getOverlaps(*I); - unsigned Reg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -173,8 +173,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - for (const uint16_t *Alias = TRI->getOverlaps(*I); - unsigned Reg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; State->UnionGroups(Reg, 0); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -189,8 +189,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; - for (const uint16_t *Alias = TRI->getOverlaps(Reg); - unsigned AliasReg = *Alias; ++Alias) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; State->UnionGroups(AliasReg, 0); KillIndices[AliasReg] = BB->size(); DefIndices[AliasReg] = ~0u; @@ -265,10 +265,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); PassthruRegs.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - PassthruRegs.insert(*Subreg); - } + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PassthruRegs.insert(*SubRegs); } } } @@ -333,9 +331,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; if (!State->IsLive(SubregReg)) { KillIndices[SubregReg] = KillIdx; DefIndices[SubregReg] = ~0u; @@ -392,8 +389,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Any aliased that are live at this point are completely or // partially defined here, so group those aliases with Reg. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (State->IsLive(AliasReg)) { State->UnionGroups(Reg, AliasReg); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << @@ -404,7 +401,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -423,9 +420,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - for (const uint16_t *Alias = TRI->getOverlaps(Reg); - unsigned AliasReg = *Alias; ++Alias) - DefIndices[AliasReg] = Count; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + DefIndices[*AI] = Count; } } @@ -479,7 +475,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = NULL; if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI); + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -678,9 +674,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } else { bool found = false; - for (const uint16_t *Alias = TRI->getAliasSet(NewReg); - *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 87f6431..32ad34a 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -15,9 +15,9 @@ //===----------------------------------------------------------------------===// #include "AllocationOrder.h" -#include "RegisterClassInfo.h" #include "VirtRegMap.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" using namespace llvm; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 00874d4..447f398 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -203,6 +203,63 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } } + +/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look +/// through it (and any transitive noop operands to it) and return its input +/// value. This is used to determine if a tail call can be formed. +/// +static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { + // If V is not an instruction, it can't be looked through. + const Instruction *I = dyn_cast(V); + if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V; + + Value *Op = I->getOperand(0); + + // Look through truly no-op truncates. + if (isa(I) && + TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType())) + return getNoopInput(I->getOperand(0), TLI); + + // Look through truly no-op bitcasts. + if (isa(I)) { + // No type change at all. + if (Op->getType() == I->getType()) + return getNoopInput(Op, TLI); + + // Pointer to pointer cast. + if (Op->getType()->isPointerTy() && I->getType()->isPointerTy()) + return getNoopInput(Op, TLI); + + if (isa(Op->getType()) && isa(I->getType()) && + TLI.isTypeLegal(EVT::getEVT(Op->getType())) && + TLI.isTypeLegal(EVT::getEVT(I->getType()))) + return getNoopInput(Op, TLI); + } + + // Look through inttoptr. + if (isa(I) && !isa(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast(Op->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + // Look through ptrtoint. + if (isa(I) && !isa(I->getType())) { + // Make sure this isn't a truncating or extending cast. We could support + // this eventually, but don't bother for now. + if (TLI.getPointerTy().getSizeInBits() == + cast(I->getType())->getBitWidth()) + return getNoopInput(Op, TLI); + } + + + // Otherwise it's not something we can look through. + return V; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -226,7 +283,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // been fully understood. if (!Ret && (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || - !isa(Term))) return false; + !isa(Term))) + return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. @@ -264,28 +322,28 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, return false; // Otherwise, make sure the unmodified return value of I is the return value. - for (const Instruction *U = dyn_cast(Ret->getOperand(0)); ; - U = dyn_cast(U->getOperand(0))) { - if (!U) - return false; - if (!U->hasOneUse()) + // We handle two cases: multiple return values + scalars. + Value *RetVal = Ret->getOperand(0); + if (!isa(RetVal) || !isa(RetVal->getType())) + // Handle scalars first. + return getNoopInput(Ret->getOperand(0), TLI) == I; + + // If this is an aggregate return, look through the insert/extract values and + // see if each is transparent. + for (unsigned i = 0, e =cast(RetVal->getType())->getNumElements(); + i != e; ++i) { + const Value *InScalar = FindInsertedValue(RetVal, i); + if (InScalar == 0) return false; + InScalar = getNoopInput(InScalar, TLI); + + // If the scalar value being inserted is an extractvalue of the right index + // from the call, then everything is good. + const ExtractValueInst *EVI = dyn_cast(InScalar); + if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 || + EVI->getIndices()[0] != i) return false; - if (U == I) - break; - // Check for a truly no-op truncate. - if (isa(U) && - TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) - continue; - // Check for a truly no-op bitcast. - if (isa(U) && - (U->getOperand(0)->getType() == U->getType() || - (U->getOperand(0)->getType()->isPointerTy() && - U->getType()->isPointerTy()))) - continue; - // Otherwise it's not a true no-op. - return false; } - + return true; } diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index b60fda8..bf5d8c4 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -44,9 +44,7 @@ EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, ARMException::ARMException(AsmPrinter *A) - : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) - {} + : DwarfException(A) {} ARMException::~ARMException() {} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b0b2ff4..d9be7a1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -475,10 +475,8 @@ void AsmPrinter::EmitFunctionHeader() { void AsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (CurrentFnSym->isUndefined()) { - OutStreamer.ForceCodeRegion(); + if (CurrentFnSym->isUndefined()) return OutStreamer.EmitLabel(CurrentFnSym); - } report_fatal_error("'" + Twine(CurrentFnSym->getName()) + "' label emitted multiple times to assembly file"); @@ -615,7 +613,7 @@ bool AsmPrinter::needsSEHMoves() { } bool AsmPrinter::needsRelocationsForDwarfStringPool() const { - return MAI->doesDwarfUseRelocationsForStringPool(); + return MAI->doesDwarfUseRelocationsAcrossSections(); } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { @@ -798,8 +796,8 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg()); - *SR && Reg < 0; ++SR) { + for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid() && Reg < 0; + ++SR) { Reg = TRI->getDwarfRegNum(*SR, false); // FIXME: Get the bit range this register uses of the superregister // so that we can produce a DW_OP_bit_piece @@ -1085,15 +1083,6 @@ void AsmPrinter::EmitJumpTableInfo() { EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - // If we know the form of the jump table, go ahead and tag it as such. - if (!JTInDiffSection) { - if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32) { - OutStreamer.EmitJumpTable32Region(); - } else { - OutStreamer.EmitDataRegion(); - } - } - for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector &JTBBs = JT[JTI].MBBs; @@ -1399,13 +1388,14 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size) const { - // Emit Label+Offset - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), - MCConstantExpr::Create(Offset, OutContext), - OutContext); + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(Offset, OutContext), + OutContext); - OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index d605854..db43b06 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -326,11 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - // We may have a location metadata attached to the end of the - // instruction, and at no point should see metadata at any - // other point while processing. It's an error if so. + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. if (OpNo >= MI->getNumOperands() || - MI->getOperand(OpNo).isMetadata()) { + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); @@ -409,9 +409,28 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O) { - // Target doesn't support this yet! + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'c': // Substitute immediate value without immediate syntax + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'n': // Negate the immediate constant. + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << -MO.getImm(); + return false; + } + } return true; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cc5b642..d30e5bb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf compile unit. +// This file contains support for constructing a dwarf compile unit. // //===----------------------------------------------------------------------===// @@ -17,9 +17,9 @@ #include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -33,7 +33,7 @@ using namespace llvm; /// CompileUnit - Compile unit constructor. CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW) + DwarfDebug *DW) : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -198,7 +198,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { return; DIFile File = Ty.getFile(); unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), - File.getDirectory()); + File.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -308,7 +308,8 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + if (!Location.isReg()) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIBuilder Opcode"); } @@ -418,27 +419,12 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // Decode the original location, and use that as the start of the byref // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -646,8 +632,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, - unsigned Attribute) { +void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { if (!Ty.Verify()) return; @@ -776,6 +761,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Buffer.addChild(ElemDie); } } + DIType DTy = CTy.getTypeDerivedFrom(); + if (DTy.Verify()) { + addType(&Buffer, DTy); + addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); + } } break; case dwarf::DW_TAG_subroutine_type: { @@ -801,9 +791,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add prototype flag if we're dealing with a C language and the // function has been prototyped. if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || - Language == dwarf::DW_LANG_ObjC)) + (Language == dwarf::DW_LANG_C89 || + Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); } break; @@ -846,19 +836,19 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { - DIDerivedType DDTy(Element); - if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else - ElemDie = createMemberDIE(DIDerivedType(Element)); + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - addType(ElemDie, Property.getType()); - addSourceLine(ElemDie, Property); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); @@ -925,19 +915,21 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { + else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + + // If we're a forward decl, say so. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add source line info if available. if (!CTy.isForwardDecl()) @@ -968,7 +960,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateValueParameter. DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ DIE *ParamDIE = getDIE(TPV); if (ParamDIE) return ParamDIE; @@ -1015,17 +1007,17 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPDie) return SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + insertDIE(SP, SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; if (SPDecl.isSubprogram()) { DeclDie = getOrCreateSubprogramDIE(SPDecl); } - SPDie = new DIE(dwarf::DW_TAG_subprogram); - - // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); - // Add to context owner. addToContextOwner(SPDie, SP.getContext()); @@ -1240,7 +1232,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, + DIE *IndexTy) { DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); uint64_t L = SR.getLo(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 45e407e..b4ff9e8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,7 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index cb78878..649684a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -17,9 +17,10 @@ #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DIBuilder.h" #include "llvm/Module.h" #include "llvm/Instructions.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -32,11 +33,10 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -117,7 +117,6 @@ DIType DbgVariable::getType() const { if (getName() == DT.getName()) return (DT.getTypeDerivedFrom()); } - return Ty; } return Ty; } @@ -127,6 +126,7 @@ DIType DbgVariable::getType() const { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), + SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator), PrevLabel(NULL) { NextStringPoolNumber = 0; @@ -566,7 +566,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. - if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, Asm->GetTempSymbol("section_line")); else @@ -1310,8 +1310,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg()); - unsigned Reg = *AI; ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); + AI.isValid(); ++AI) { + unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) continue; @@ -1381,7 +1382,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - 0); + DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0); } } @@ -1421,6 +1422,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIVariable DV(Variables.getElement(i)); if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) continue; + // Check that DbgVariable for DV wasn't created earlier, when + // findAbstractVariable() was called for inlined instance of DV. + LLVMContext &Ctx = DV->getContext(); + DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); + if (AbstractVariables.lookup(CleanDV)) + continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) addScopeVariable(Scope, new DbgVariable(DV, NULL)); } @@ -1623,7 +1630,7 @@ void DwarfDebug::emitDIE(DIE *Die) { // DW_AT_range Value encodes offset in debug_range section. DIEInteger *V = cast(Values[i]); - if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) { Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym, V->getValue(), 4); @@ -1636,10 +1643,14 @@ void DwarfDebug::emitDIE(DIE *Die) { break; } case dwarf::DW_AT_location: { - if (DIELabel *L = dyn_cast(Values[i])) - Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); - else + if (DIELabel *L = dyn_cast(Values[i])) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelReference(L->getValue(), 4); + else + Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); + } else { Values[i]->EmitValue(Asm, Form); + } break; } case dwarf::DW_AT_accessibility: { @@ -2049,9 +2060,11 @@ void DwarfDebug::emitDebugLoc() { if (Element == DIBuilder::OpPlus) { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) - Asm->EmitInt8(dwarf::DW_OP_deref); - else llvm_unreachable("unknown Opcode found in complex address"); + } else if (Element == DIBuilder::OpDeref) { + if (!Entry.Loc.isReg()) + Asm->EmitInt8(dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown Opcode found in complex address"); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 83f30f5..d1d6512 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,11 +14,11 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "DIE.h" +#include "llvm/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Analysis/DebugInfo.h" -#include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" @@ -188,6 +188,9 @@ class DwarfDebug { /// MMI - Collected machine module information. MachineModuleInfo *MMI; + /// DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + //===--------------------------------------------------------------------===// // Attributes used to construct specific Dwarf sections. // @@ -210,11 +213,11 @@ class DwarfDebug { /// SourceIdMap - Source id map, i.e. pair of source filename and directory, /// separated by a zero byte, mapped to a unique id. - StringMap SourceIdMap; + StringMap SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect /// references. - StringMap > StringPool; + StringMap, BumpPtrAllocator&> StringPool; unsigned NextStringPoolNumber; /// SectionMap - Provides a unique id per text section. @@ -232,7 +235,7 @@ class DwarfDebug { /// ScopeVariables - Collection of dbg variables of a scope. DenseMap > ScopeVariables; - /// AbstractVariables - Collection on abstract variables. + /// AbstractVariables - Collection of abstract variables. DenseMap AbstractVariables; /// DotDebugLocEntries - Collection of DotDebugLocEntry. @@ -292,9 +295,6 @@ class DwarfDebug { std::vector DebugFrames; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -333,9 +333,6 @@ private: /// of the function. DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, LexicalScope *S); - /// constructScopeDIE - Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); @@ -517,9 +514,6 @@ public: /// in the SourceIds map. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getStringPool - returns the entry into the start of the pool. MCSymbol *getStringPool(); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index b5f86ab..75f6056 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -175,17 +175,6 @@ public: }; class ARMException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index ef1d2ba..fb65bb7 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -137,9 +137,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - ImpDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); ++I; } if (ImpDefRegs.empty()) @@ -188,7 +187,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) RS = new RegScavenger(); else MRI.invalidateLiveness(); @@ -819,10 +818,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { - - if (!EnableTailMerge) return false; - bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. MergePotentials.clear(); @@ -839,6 +836,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() == TailMergeThreshold) for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) TriedMerging.insert(MergePotentials[i].getBlock()); + // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(NULL, NULL); @@ -864,88 +862,97 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { - if (I->pred_size() >= 2) { - SmallPtrSet UniquePreds; - MachineBasicBlock *IBB = I; - MachineBasicBlock *PredBB = prior(I); - MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), - E2 = I->pred_end(); - P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { - MachineBasicBlock *PBB = *P; - if (TriedMerging.count(PBB)) - continue; - // Skip blocks that loop to themselves, can't tail merge these. - if (PBB == IBB) - continue; - // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) - continue; - // Skip blocks which may jump to a landing pad. Can't tail merge these. - if (PBB->getLandingPadSuccessor()) - continue; - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { - // Failing case: IBB is the target of a cbr, and - // we cannot reverse the branch. - SmallVector NewCond(Cond); - if (!Cond.empty() && TBB == IBB) { - if (TII->ReverseBranchCondition(NewCond)) + if (I->pred_size() < 2) continue; + SmallPtrSet UniquePreds; + MachineBasicBlock *IBB = I; + MachineBasicBlock *PredBB = prior(I); + MergePotentials.clear(); + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + E2 = I->pred_end(); + P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) { + MachineBasicBlock *PBB = *P; + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) + FBB = llvm::next(MachineFunction::iterator(PBB)); + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isLandingPad()) { + MachineFunction::iterator IP = PBB; IP++; + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) + PredNextBB = IP; + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr continue; - // This is the QBB case described above - if (!FBB) - FBB = llvm::next(MachineFunction::iterator(PBB)); - } - // Failing case: the only way IBB can be reached from PBB is via - // exception handling. Happens for landing pads. Would be nice - // to have a bit in the edge so we didn't have to do all this. - if (IBB->isLandingPad()) { - MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; - if (IP != MF.end()) - PredNextBB = IP; - if (TBB == NULL) { - if (IBB != PredNextBB) // fallthrough - continue; - } else if (FBB) { - if (TBB != IBB && FBB != IBB) // cbr then ubr - continue; - } else if (Cond.empty()) { - if (TBB != IBB) // ubr - continue; - } else { - if (TBB != IBB && IBB != PredNextBB) // cbr - continue; - } - } - // Remove the unconditional branch at the end, if any. - if (TBB && (Cond.empty() || FBB)) { - DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*PBB); - if (!Cond.empty()) - // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. - if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(IBB, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in - // TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks - if (MergePotentials.size() == 1 && - MergePotentials.begin()->getBlock() != PredBB) - FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } + return MadeChange; } @@ -1459,7 +1466,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, } /// findHoistingInsertPosAndDeps - Find the location to move common instructions -/// in successors to. The location is ususally just before the terminator, +/// in successors to. The location is usually just before the terminator, /// however if the terminator is a conditional branch and its previous /// instruction is the flag setting instruction, the previous instruction is /// the preferred location. This function also gathers uses and defs of the @@ -1483,9 +1490,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } else if (!MO.isDead()) // Don't try to hoist code in the rare case the terminator defines a // register that is later used. @@ -1545,18 +1551,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!Reg) continue; if (MO.isUse()) { - Uses.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } else { if (Uses.count(Reg)) { Uses.erase(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Uses.erase(*SR); // Use getSubRegisters to be conservative + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative } - Defs.insert(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.insert(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); } } @@ -1683,8 +1687,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if (!Reg || !LocalDefsSet.count(Reg)) continue; - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.erase(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); } // Track local defs so we can update liveins. @@ -1696,8 +1700,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg) continue; LocalDefs.push_back(Reg); - for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR) - LocalDefsSet.insert(*OR); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.insert(*AI); } HasDups = true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 21729cd..2e189ad 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -11,6 +11,7 @@ add_llvm_library(LLVMCodeGen DeadMachineInstructionElim.cpp DFAPacketizer.cpp DwarfEHPrepare.cpp + EarlyIfConversion.cpp EdgeBundles.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp @@ -30,6 +31,7 @@ add_llvm_library(LLVMCodeGen LiveInterval.cpp LiveIntervalAnalysis.cpp LiveIntervalUnion.cpp + LiveRegMatrix.cpp LiveStackAnalysis.cpp LiveVariables.cpp LiveRangeCalc.cpp @@ -59,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp @@ -77,8 +80,8 @@ add_llvm_library(LLVMCodeGen RegAllocPBQP.cpp RegisterClassInfo.cpp RegisterCoalescer.cpp + RegisterPressure.cpp RegisterScavenging.cpp - RenderMachineFunction.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp @@ -103,5 +106,7 @@ add_llvm_library(LLVMCodeGen VirtRegMap.cpp ) +add_dependencies(LLVMCodeGen intrinsics_gen) + add_subdirectory(SelectionDAG) add_subdirectory(AsmPrinter) diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index ea16a25..939af3f 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -39,18 +39,20 @@ void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { MachineFunctionPass::getAnalysisUsage(au); } -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " - << fn.getFunction()->getName() << '\n'); - - LiveIntervals &lis = getAnalysis(); - VirtRegAuxInfo vrai(fn, lis, getAnalysis()); - for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) { - LiveInterval &li = *I->second; - if (TargetRegisterInfo::isVirtualRegister(li.reg)) - vrai.CalculateWeightAndHint(li); + << MF.getFunction()->getName() << '\n'); + + LiveIntervals &LIS = getAnalysis(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + VirtRegAuxInfo VRAI(MF, LIS, getAnalysis()); + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); } return false; } @@ -86,6 +88,27 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, return tri.getMatchingSuperReg(hreg, sub, rc); } +// Check if all values in LI are rematerializable +static bool isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + const TargetInstrInfo &TII) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + if (VNI->isPHIDef()) + return false; + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) + return false; + } + return true; +} + void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); @@ -171,17 +194,11 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If none of the defs are - // loads, then it's potentially very cheap to re-materialize. + // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - bool isLoad = false; - if (LIS.isReMaterializable(li, 0, isLoad)) { - if (isLoad) - totalWeight *= 0.9F; - else - totalWeight *= 0.5F; - } + if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) + totalWeight *= 0.5F; li.weight = normalizeSpillWeight(totalWeight, li.getSize()); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 2b7dfdb..0b747fd 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -49,8 +49,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; - if (MF.getFrameInfo()->getMaxAlignment() < Align) - MF.getFrameInfo()->setMaxAlignment(Align); + MF.getFrameInfo()->ensureMaxAlignment(Align); TM.getTargetLowering()->HandleByVal(this, Size); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); @@ -58,9 +57,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void CCState::MarkAllocated(unsigned Reg) { - for (const uint16_t *Alias = TRI.getOverlaps(Reg); - unsigned Reg = *Alias; ++Alias) - UsedRegs[Reg/32] |= 1 << (Reg&31); + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); } /// AnalyzeFormalArguments - Analyze an array of argument values, diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a81bb5c..fb2c2e8 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -23,6 +23,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeCalculateSpillWeightsPass(Registry); initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); initializeExpandISelPseudosPass(Registry); initializeFinalizeMachineBundlesPass(Registry); @@ -53,7 +54,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); initializeRegisterCoalescerPass(Registry); - initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); @@ -65,7 +65,9 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeUnreachableBlockElimPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); + initializeVirtRegRewriterPass(Registry); initializeLowerIntrinsicsPass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); } void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index c13c05e..99233df 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -201,7 +201,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, // fallthrough edge. if (!Prior->isSuccessor(End)) goto next_pred; - // Otherwise we can stop scanning and procede to move the blocks. + // Otherwise we can stop scanning and proceed to move the blocks. break; } // If we hit a switch or something complicated, don't move anything diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index bad5010..a9de1c749 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -62,17 +62,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } } @@ -84,17 +78,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } @@ -104,18 +92,12 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { - unsigned Reg = *I; - if (!IsReturnBlock && !Pristine.test(Reg)) continue; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - - // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BBSize; - DefIndices[AliasReg] = ~0u; + if (!IsReturnBlock && !Pristine.test(*I)) continue; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; } } } @@ -208,7 +190,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -218,11 +200,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { Classes[Reg] = reinterpret_cast(-1); // Now check for aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { // If an alias of the reg is used during the live range, give up. // Note that this allows us to skip checking if AntiDepReg // overlaps with any of the aliases, among other things. - unsigned AliasReg = *Alias; + unsigned AliasReg = *AI; if (Classes[AliasReg]) { Classes[AliasReg] = reinterpret_cast(-1); Classes[Reg] = reinterpret_cast(-1); @@ -236,9 +218,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { KeepRegs.set(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - KeepRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); } } } @@ -247,7 +228,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Count) { // Update liveness. - // Proceding upwards, registers that are defed but not used in this + // Proceeding upwards, registers that are defed but not used in this // instruction are now dead. if (!TII->isPredicated(MI)) { @@ -282,9 +263,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); @@ -292,11 +272,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. - for (const uint16_t *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast(-1); - } + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + Classes[*SR] = reinterpret_cast(-1); } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -308,7 +285,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, const TargetRegisterClass *NewRC = 0; if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI); + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -328,8 +305,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, "Kill and Def maps aren't consistent for Reg!"); } // Repeat, for all aliases. - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; DefIndices[AliasReg] = ~0u; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 7746259..ad95c48 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,11 +17,11 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" #include diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 5ff641c..ff2f113 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,10 +23,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" using namespace llvm; @@ -100,22 +100,23 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { reserveResources(&MID); } -namespace { +namespace llvm { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT, bool IsPostRA); + MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. void schedule(); }; -} // end anonymous namespace +} DefaultVLIWScheduler::DefaultVLIWScheduler( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA) : ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { + CanHandleTerminators = true; } void DefaultVLIWScheduler::schedule() { @@ -129,49 +130,25 @@ VLIWPacketizerList::VLIWPacketizerList( bool IsPostRA) : TM(MF.getTarget()), MF(MF) { TII = TM.getInstrInfo(); ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); - SchedulerImpl = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); } // VLIWPacketizerList Dtor VLIWPacketizerList::~VLIWPacketizerList() { - delete SchedulerImpl; - delete ResourceTracker; -} - -// ignorePseudoInstruction - ignore pseudo instructions. -bool VLIWPacketizerList::ignorePseudoInstruction(MachineInstr *MI, - MachineBasicBlock *MBB) { - if (MI->isDebugValue()) - return true; - - if (TII->isSchedulingBoundary(MI, MBB, MF)) - return true; - - return false; -} - -// isSoloInstruction - return true if instruction I must end previous -// packet. -bool VLIWPacketizerList::isSoloInstruction(MachineInstr *I) { - if (I->isInlineAsm()) - return true; - - return false; -} + if (VLIWScheduler) + delete VLIWScheduler; -// addToPacket - Add I to the current packet and reserve resource. -void VLIWPacketizerList::addToPacket(MachineInstr *MI) { - CurrentPacketMIs.push_back(MI); - ResourceTracker->reserveResources(MI); + if (ResourceTracker) + delete ResourceTracker; } // endPacket - End the current packet, bundle packet instructions and reset // DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, - MachineInstr *I) { + MachineInstr *MI) { if (CurrentPacketMIs.size() > 1) { MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst, I); + finalizeBundle(*MBB, MIFirst, MI); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); @@ -181,31 +158,35 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { - assert(MBB->end() == EndItr && "Bad EndIndex"); - - SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size()); - - // Build the DAG without reordering instructions. - SchedulerImpl->schedule(); - - // Remember scheduling units. - SUnits = SchedulerImpl->SUnits; + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->startBlock(MBB); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->schedule(); + + // Generate MI -> SU map. + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { MachineInstr *MI = BeginItr; - // Ignore pseudo instructions. - if (ignorePseudoInstruction(MI, MBB)) - continue; + this->initPacketizerState(); // End the current packet if needed. - if (isSoloInstruction(MI)) { + if (this->isSoloInstruction(MI)) { endPacket(MBB, MI); continue; } - SUnit *SUI = SchedulerImpl->getSUnit(MI); + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. @@ -215,13 +196,13 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (std::vector::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { MachineInstr *MJ = *VI; - SUnit *SUJ = SchedulerImpl->getSUnit(MJ); + SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); // Is it legal to packetize SUI and SUJ together. - if (!isLegalToPacketizeTogether(SUI, SUJ)) { + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { // Allow packetization if dependency can be pruned. - if (!isLegalToPruneDependencies(SUI, SUJ)) { + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { // End the packet if dependency cannot be pruned. endPacket(MBB, MI); break; @@ -234,11 +215,11 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, } // Add MI to the current packet. - addToPacket(MI); + BeginItr = this->addToPacket(MI); } // For all instructions in BB. // End any packet left behind. endPacket(MBB, EndItr); - - SchedulerImpl->exitRegion(); + VLIWScheduler->exitRegion(); + VLIWScheduler->finishBlock(); } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index aa10d1d..b4394e8 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -171,9 +171,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) - LivePhysRegs.reset(*SubRegs); + for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + LivePhysRegs.reset(*SR); } } else if (MO.isRegMask()) { // Register mask of preserved registers. All clobbers are dead. @@ -187,10 +186,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - LivePhysRegs.set(Reg); - for (const uint16_t *AliasSet = TRI->getAliasSet(Reg); - *AliasSet; ++AliasSet) - LivePhysRegs.set(*AliasSet); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LivePhysRegs.set(*AI); } } } diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 944dd4f..7095624 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -39,7 +39,7 @@ namespace { Constant *RewindFunction; bool InsertUnwindResumeCalls(Function &Fn); - Instruction *GetExceptionObject(ResumeInst *RI); + Value *GetExceptionObject(ResumeInst *RI); public: static char ID; // Pass identification, replacement for typeid. @@ -68,9 +68,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { /// GetExceptionObject - Return the exception object from the value passed into /// the 'resume' instruction (typically an aggregate). Clean up any dead /// instructions, including the 'resume' instruction. -Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { +Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { Value *V = RI->getOperand(0); - Instruction *ExnObj = 0; + Value *ExnObj = 0; InsertValueInst *SelIVI = dyn_cast(V); LoadInst *SelLoad = 0; InsertValueInst *ExcIVI = 0; @@ -81,7 +81,7 @@ Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { ExcIVI = dyn_cast(SelIVI->getOperand(0)); if (ExcIVI && isa(ExcIVI->getOperand(0)) && ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { - ExnObj = cast(ExcIVI->getOperand(1)); + ExnObj = ExcIVI->getOperand(1); SelLoad = dyn_cast(SelIVI->getOperand(1)); EraseIVIs = true; } @@ -139,7 +139,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { // _Unwind_Resume to the end of the single resume block. ResumeInst *RI = Resumes.front(); BasicBlock *UnwindBB = RI->getParent(); - Instruction *ExnObj = GetExceptionObject(RI); + Value *ExnObj = GetExceptionObject(RI); // Call the _Unwind_Resume function. CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); @@ -162,7 +162,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { BasicBlock *Parent = RI->getParent(); BranchInst::Create(UnwindBB, Parent); - Instruction *ExnObj = GetExceptionObject(RI); + Value *ExnObj = GetExceptionObject(RI); PN->addIncoming(ExnObj, Parent); ++NumResumesLowered; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp new file mode 100644 index 0000000..f9347ef --- /dev/null +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -0,0 +1,803 @@ +//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Early if-conversion is for out-of-order CPUs that don't have a lot of +// predicable instructions. The goal is to eliminate conditional branches that +// may mispredict. +// +// Instructions from both sides of the branch are executed specutatively, and a +// cmov instruction selects the result. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" +#include "llvm/Function.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt +BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +//===----------------------------------------------------------------------===// +// SSAIfConv +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +namespace { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg, FReg; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles, TCycles, FCycles; + + PHIInfo(MachineInstr *phi) + : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + }; + + SmallVector PHIs; + +private: + /// The branch condition determined by AnalyzeBranch. + SmallVector Cond; + + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); + } + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convertIf(SmallVectorImpl &RemovedBlocks); +}; +} // end anonymous namespace + + +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << *I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I->mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << *I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << *I); + return false; + } + + // Check for any dependencies on Head instructions. + for (MIOperands MO(I); MO.isValid(); ++MO) { + if (MO->isRegMask()) { + DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + + // Remember clobbered regunits. + if (MO->isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO->readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI)) + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + if (DefMI->isTerminator()) { + DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + } + return true; +} + + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(I)) { + DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (MIOperands MO(I); MO.isValid(); ++MO) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO->isDef()) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + LiveRegUnits.erase(*Units); + // Unless I reads Reg. + if (MO->readsReg()) + Reads.push_back(Reg); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); + ++Units) + if (ClobberedRegUnits.test(*Units)) + LiveRegUnits.insert(*Units); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + DEBUG({ + dbgs() << "Would clobber"; + for (SparseSet::const_iterator + i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) + dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + + + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = 0; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << "/BB#" << Succ1->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + } + + // This is a triangle or a diamond. + // If Tail doesn't have any phis, there must be side effects. + if (Tail->empty() || !Tail->front().isPHI()) { + DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + return false; + } + + // AnalyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i+1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i+1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, + PI.CondCycles, PI.TCycles, PI.FCycles)) { + DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); + unsigned DstReg = PI.PHI->getOperand(0).getReg(); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = 0; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB); + if (TBB != Tail) + TBB->removeSuccessor(Tail); + if (FBB != Tail) + FBB->removeSuccessor(Tail); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // Erase the now empty conditional blocks. It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemovedBlocks.push_back(TBB); + TBB->eraseFromParent(); + } + if (FBB != Tail) { + RemovedBlocks.push_back(FBB); + FBB->eraseFromParent(); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() + << " into head BB#" << Head->getNumber() << '\n'); + Head->splice(Head->end(), Tail, + Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemovedBlocks.push_back(Tail); + Tail->eraseFromParent(); + } else { + // We need a branch to Tail, let code placement work it out later. + DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector EmptyCond; + TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + DEBUG(dbgs() << *Head); +} + + +//===----------------------------------------------------------------------===// +// EarlyIfConverter Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfConverter : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &MF); + +private: + bool tryConvertIf(MachineBasicBlock*); + void updateDomTree(ArrayRef Removed); + void updateLoops(ArrayRef Removed); + void invalidateTraces(); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +char EarlyIfConverter::ID = 0; +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) + +void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void EarlyIfConverter::updateDomTree(ArrayRef Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + } + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void EarlyIfConverter::updateLoops(ArrayRef Removed) { + if (!Loops) + return; + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector RemovedBlocks; + IfConv.convertIf(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis(); + Loops = getAnalysisIfAvailable(); + Traces = &getAnalysis(); + MinInstr = 0; + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (po_iterator + I = po_begin(DomTree), E = po_end(DomTree); I != E; ++I) + if (tryConvertIf(I->getBlock())) + Changed = true; + + MF.verify(this, "After early if-conversion"); + return Changed; +} diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index a48c540..fee8e47 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -59,7 +59,7 @@ struct DomainValue { // Pointer to the next DomainValue in a chain. When two DomainValues are // merged, Victim.Next is set to point to Victor, so old DomainValue - // references can be updated by folowing the chain. + // references can be updated by following the chain. DomainValue *Next; // Twiddleable instructions using or defining these registers. @@ -666,7 +666,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // or -1. AliasMap.resize(TRI->getNumRegs(), -1); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); + AI.isValid(); ++AI) AliasMap[*AI] = i; } diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index b14afc2..7a17331 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); + DEBUG(dbgs() << "subreg: " << *CopyMI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 75ae5b9..4214ba1 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -155,7 +156,9 @@ namespace { const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; + MachineRegisterInfo *MRI; + bool PreRegAlloc; bool MadeChange; int FnNum; public: @@ -263,14 +266,20 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis(); + MRI = &MF.getRegInfo(); InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; - // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false); - bool BFChange = BF.OptimizeFunction(MF, TII, + PreRegAlloc = MRI->isSSA(); + + bool BFChange = false; + if (!PreRegAlloc) { + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true, false); + BFChange = BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable()); + } DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -621,7 +630,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (BBI.IsDone) return; - bool AlreadyPredicated = BBI.Predicate.size() > 0; + bool AlreadyPredicated = !BBI.Predicate.empty(); // First analyze the end of BB branches. BBI.TrueBB = BBI.FalseBB = NULL; BBI.BrCond.clear(); @@ -786,8 +795,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, unsigned Dups = 0; unsigned Dups2 = 0; - bool TNeedSub = TrueBBI.Predicate.size() > 0; - bool FNeedSub = FalseBBI.Predicate.size() > 0; + bool TNeedSub = !TrueBBI.Predicate.empty(); + bool FNeedSub = !FalseBBI.Predicate.empty(); bool Enqueued = false; BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); @@ -962,9 +971,8 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; Redefs.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - Redefs.insert(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); } } @@ -983,8 +991,8 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, Defs.push_back(Reg); else if (MO.isKill()) { Redefs.erase(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Redefs.erase(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.erase(*SubRegs); } } for (unsigned i = 0, e = Defs.size(); i != e; ++i) { @@ -993,11 +1001,12 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, if (AddImpUse) // Treat predicated update as read + write. MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, - true/*IsImp*/,false/*IsKill*/)); + true/*IsImp*/,false/*IsKill*/, + false/*IsDead*/,true/*IsUndef*/)); } else { Redefs.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - Redefs.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Redefs.insert(*SubRegs); } } } @@ -1335,8 +1344,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // These are defined before ctrl flow reach the 'false' instructions. // They cannot be modified by the 'true' instructions. ExtUses.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - ExtUses.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + ExtUses.insert(*SubRegs); } } @@ -1344,8 +1353,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, unsigned Reg = Defs[i]; if (!ExtUses.count(Reg)) { RedefsByFalse.insert(Reg); - for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR) - RedefsByFalse.insert(*SR); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RedefsByFalse.insert(*SubRegs); } } } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d5ea666..07e37af 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -52,7 +52,6 @@ static cl::opt DisableHoisting("disable-spill-hoist", cl::Hidden, namespace { class InlineSpiller : public Spiller { - MachineFunctionPass &Pass; MachineFunction &MF; LiveIntervals &LIS; LiveStacks &LSS; @@ -137,8 +136,7 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : Pass(pass), - MF(mf), + : MF(mf), LIS(pass.getAnalysis()), LSS(pass.getAnalysis()), AA(&pass.getAnalysis()), @@ -578,11 +576,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); - assert(SrcLR && "Copy from non-existing value"); + LiveRangeQuery SrcQ(SrcLI, VNI->def); + assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. - SVI->second.KillsSource = (SrcLR->end == VNI->def); - VNInfo *SrcVNI = SrcLR->valno; + SVI->second.KillsSource = SrcQ.isKill(); + VNInfo *SrcVNI = SrcQ.valueIn(); DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' << SrcVNI->id << '@' << SrcVNI->def << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); @@ -1083,6 +1081,10 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); + // Some (out-of-tree) targets have EC reload instructions. + if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) + if (MO->isEarlyClobber()) + LoadIdx = LoadIdx.getRegSlot(true); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); @@ -1275,8 +1277,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << LIS.getInterval(Original) << '\n'); + << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() + << "\nFrom original " << LIS.getInterval(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 8368b58..1541bf0 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -39,7 +39,7 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { unsigned E = PhysRegEntries[PhysReg]; if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { if (!Entries[E].valid(LIUArray, TRI)) - Entries[E].revalidate(); + Entries[E].revalidate(LIUArray, TRI); return &Entries[E]; } // No valid entry exists, pick the next round-robin entry. @@ -61,13 +61,15 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { } /// revalidate - LIU contents have changed, update tags. -void InterferenceCache::Entry::revalidate() { +void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { // Invalidate all block entries. ++Tag; // Invalidate all iterators. PrevPos = SlotIndex(); - for (unsigned i = 0, e = Aliases.size(); i != e; ++i) - Aliases[i].second = Aliases[i].first->getTag(); + unsigned i = 0; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) + RegUnits[i].VirtTag = LIUArray[*Units].getTag(); } void InterferenceCache::Entry::reset(unsigned physReg, @@ -79,28 +81,23 @@ void InterferenceCache::Entry::reset(unsigned physReg, ++Tag; PhysReg = physReg; Blocks.resize(MF->getNumBlockIDs()); - Aliases.clear(); - for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { - LiveIntervalUnion *LIU = LIUArray + *AS; - Aliases.push_back(std::make_pair(LIU, LIU->getTag())); - } // Reset iterators. PrevPos = SlotIndex(); - unsigned e = Aliases.size(); - Iters.resize(e); - for (unsigned i = 0; i != e; ++i) - Iters[i].setMap(Aliases[i].first->getMap()); + RegUnits.clear(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + RegUnits.push_back(LIUArray[*Units]); + RegUnits.back().Fixed = &LIS->getRegUnit(*Units); + } } bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI) { - unsigned i = 0, e = Aliases.size(); - for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { - LiveIntervalUnion *LIU = LIUArray + *AS; - if (i == e || Aliases[i].first != LIU) + unsigned i = 0, e = RegUnits.size(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) { + if (i == e) return false; - if (LIU->changedSince(Aliases[i].second)) + if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag)) return false; } return i == e; @@ -112,12 +109,20 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Use advanceTo only when possible. if (PrevPos != Start) { - if (!PrevPos.isValid() || Start < PrevPos) - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].find(Start); - else - for (unsigned i = 0, e = Iters.size(); i != e; ++i) - Iters[i].advanceTo(Start); + if (!PrevPos.isValid() || Start < PrevPos) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.find(Start); + RUI.FixedI = RUI.Fixed->find(Start); + } + } else { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.advanceTo(Start); + if (RUI.FixedI != RUI.Fixed->end()) + RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); + } + } PrevPos = Start; } @@ -129,9 +134,9 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->Tag = Tag; BI->First = BI->Last = SlotIndex(); - // Check for first interference. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + // Check for first interference from virtregs. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid()) continue; SlotIndex StartI = I.start(); @@ -141,6 +146,19 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { BI->First = StartI; } + // Same thing for fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::const_iterator I = RegUnits[i].FixedI; + LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + if (I == E) + continue; + SlotIndex StartI = I->start; + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + // Also check for register mask interference. RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); @@ -168,8 +186,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { } // Check for last interference in block. - for (unsigned i = 0, e = Iters.size(); i != e; ++i) { - Iter &I = Iters[i]; + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; if (!I.valid() || I.start() >= Stop) continue; I.advanceTo(Stop); @@ -183,6 +201,23 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { ++I; } + // Fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::iterator &I = RegUnits[i].FixedI; + LiveInterval *LI = RegUnits[i].Fixed; + if (I == LI->end() || I->start >= Stop) + continue; + I = LI->advanceTo(I, Stop); + bool Backup = I == LI->end() || I->start >= Stop; + if (Backup) + --I; + SlotIndex StopI = I->end; + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + // Also check for register mask interference. SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; for (unsigned i = RegMaskSlots.size(); diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 485a325..3c928a5 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// InterferenceCache remembers per-block interference from LiveIntervalUnions, +// fixed RegUnit interference, and register masks. // //===----------------------------------------------------------------------===// @@ -59,14 +60,31 @@ class InterferenceCache { /// PrevPos - The previous position the iterators were moved to. SlotIndex PrevPos; - /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of - /// PhysReg. - SmallVector, 8> Aliases; + /// RegUnitInfo - Information tracked about each RegUnit in PhysReg. + /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos) + /// had just been called. + struct RegUnitInfo { + /// Iterator pointing into the LiveIntervalUnion containing virtual + /// register interference. + LiveIntervalUnion::SegmentIter VirtI; - typedef LiveIntervalUnion::SegmentIter Iter; + /// Tag of the LIU last time we looked. + unsigned VirtTag; - /// Iters - an iterator for each alias - SmallVector Iters; + /// Fixed interference in RegUnit. + LiveInterval *Fixed; + + /// Iterator pointing into the fixed RegUnit interference. + LiveInterval::iterator FixedI; + + RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + VirtI.setMap(LIU.getMap()); + } + }; + + /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have + /// more than 4 RegUnits. + SmallVector RegUnits; /// Blocks - Interference for each block in the function. SmallVector Blocks; @@ -91,7 +109,7 @@ class InterferenceCache { bool hasRefs() const { return RefCount > 0; } - void revalidate(); + void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); /// valid - Return true if this is a valid entry for physReg. bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index a9ca42f..8d2282a 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; template diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a1f479a..cac0c83 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/PassManager.h" +#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -78,40 +79,15 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } -/// Turn exception handling constructs into something the code generators can -/// handle. -static void addPassesToHandleExceptions(TargetMachine *TM, - PassManagerBase &PM) { - switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { - case ExceptionHandling::SjLj: - // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both - // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, - // catch info can get misplaced when a selector ends up more than one block - // removed from the parent invoke(s). This could happen when a landing - // pad is shared by multiple invokes and is also a target of a normal - // edge from elsewhere. - PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); - // FALLTHROUGH - case ExceptionHandling::DwarfCFI: - case ExceptionHandling::ARM: - case ExceptionHandling::Win64: - PM.add(createDwarfEHPass(TM)); - break; - case ExceptionHandling::None: - PM.add(createLowerInvokePass(TM->getTargetLowering())); - - // The lower invoke pass may create unreachable code. Remove it. - PM.add(createUnreachableBlockEliminationPass()); - break; - } -} - /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific sublass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); + PassConfig->setStartStopPasses(StartAfter, StopAfter); // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); @@ -120,7 +96,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->addIRPasses(); - addPassesToHandleExceptions(TM, PM); + PassConfig->addPassesToHandleExceptions(); PassConfig->addISelPrepare(); @@ -155,16 +131,30 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, + StartAfter, StopAfter); if (!Context) return true; + if (StopAfter) { + // FIXME: The intent is that this should eventually write out a YAML file, + // containing the LLVM IR, the machine-level IR (when stopping after a + // machine-level pass), and whatever other information is needed to + // deserialize the code and resume compilation. For now, just write the + // LLVM IR. + PM.add(createPrintModulePass(&Out)); + return false; + } + if (hasMCSaveTempLabels()) Context->setAllowTemporaryLabels(false); const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget(); OwningPtr AsmStreamer; @@ -180,7 +170,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCAsmBackend *MAB = 0; if (ShowMCEncoding) { const MCSubtargetInfo &STI = getSubtarget(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, *Context); + MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, + *Context); MAB = getTarget().createMCAsmBackend(getTargetTriple()); } @@ -198,8 +189,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), STI, - *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; @@ -242,7 +233,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Context) return true; @@ -262,7 +253,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, raw_ostream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); if (!Ctx) return true; @@ -271,9 +262,10 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. + const MCRegisterInfo &MRI = *getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, - *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, + STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple()); if (MCE == 0 || MAB == 0) return true; diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index f1abcbb..6b6b9d0 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -16,8 +16,8 @@ #define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 2187833..d631726 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -23,9 +23,9 @@ #include "LiveDebugVariables.h" #include "VirtRegMap.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Metadata.h" #include "llvm/Value.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -243,7 +243,7 @@ public: /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(MachineRegisterInfo &MRI, + void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -618,6 +618,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -634,15 +635,32 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; + if (!Loc.isReg()) { + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + continue; + } + // Register locations are constrained to where the register value is live. - if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { - LiveInterval *LI = &LIS.getInterval(Loc.getReg()); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + LiveInterval *LI = 0; + const VNInfo *VNI = 0; + if (LIS.hasInterval(Loc.getReg())) { + LI = &LIS.getInterval(Loc.getReg()); + VNI = LI->getVNInfoAt(Idx); + } SmallVector Kills; extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); - addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); - } else - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + if (LI) + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + continue; + } + + // For physregs, use the live range of the first regunit as a guide. + unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); + LiveInterval *LI = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LI->getVNInfoAt(Idx); + // Don't track copies from physregs, it is too expensive. + extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -656,7 +674,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, void LDVImpl::computeIntervals() { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); - userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT, UVS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); userValues[i]->mapVirtRegs(this); } } @@ -721,7 +739,8 @@ renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { if (TargetRegisterInfo::isVirtualRegister(NewReg)) mapVirtReg(NewReg, UV); - virtRegToEqClass.erase(OldReg); + if (OldReg != NewReg) + virtRegToEqClass.erase(OldReg); do { UV->renameRegister(OldReg, NewReg, SubIdx, TRI); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ac18843..0a795e6 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -48,6 +48,26 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } +VNInfo *LiveInterval::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + iterator I = find(Def); + if (I == end()) { + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + if (SlotIndex::isSameInstr(Def, I->start)) { + assert(I->start == Def && "Cannot insert def, already live"); + assert(I->valno->def == Def && "Inconsistent existing value def"); + return I->valno; + } + assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); + VNInfo *VNI = getNextValue(Def, VNInfoAllocator); + ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + return VNI; +} + /// killedInRange - Return true if the interval has kills in [Start,End). bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { Ranges::const_iterator r = @@ -140,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { valnos.pop_back(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->setIsUnused(true); + ValNo->markUnused(); } } @@ -176,16 +196,16 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // If NewEnd was in the middle of an interval, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. - Ranges::iterator Next = llvm::next(I); - if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) { - I->end = Next->end; - ranges.erase(Next); + if (MergeTo != ranges.end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + I->end = MergeTo->end; + ++MergeTo; } + + // Erase any dead ranges. + ranges.erase(llvm::next(I), MergeTo); } @@ -353,18 +373,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { markValNoForDeletion(ValNo); } -/// findDefinedVNInfo - Find the VNInfo defined by the specified -/// index (register interval). -VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const { - for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); - i != e; ++i) { - if ((*i)->def == Idx) - return *i; - } - - return 0; -} - /// join - Join two live intervals (this, and other) together. This applies /// mappings to the value numbers in the LHS/RHS intervals as specified. If /// the intervals are not joinable, this aborts. @@ -373,6 +381,8 @@ void LiveInterval::join(LiveInterval &Other, const int *RHSValNoAssignments, SmallVector &NewVNInfo, MachineRegisterInfo *MRI) { + verify(); + // Determine if any of our live range values are mapped. This is uncommon, so // we want to avoid the interval scan if not. bool MustMapCurValNos = false; @@ -440,16 +450,148 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. - iterator InsertPos = begin(); unsigned RangeNo = 0; for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { // Map the valno in the other live range to the current live range. I->valno = NewVNInfo[OtherAssignments[RangeNo]]; assert(I->valno && "Adding a dead range?"); - InsertPos = addRangeFrom(*I, InsertPos); + } + mergeIntervalRanges(Other); + + verify(); +} + +/// \brief Helper function for merging in another LiveInterval's ranges. +/// +/// This is a helper routine implementing an efficient merge of another +/// LiveIntervals ranges into the current interval. +/// +/// \param LHSValNo If non-NULL, set as the new value number for every range +/// from RHS which is merged into the LHS. +/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value +/// number maches this value number will be merged into LHS. +void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, + VNInfo *LHSValNo, + const VNInfo *RHSValNo) { + if (RHS.empty()) + return; + + // Ensure we're starting with a valid range. Note that we don't verify RHS + // because it may have had its value numbers adjusted in preparation for + // merging. + verify(); + + // The strategy for merging these efficiently is as follows: + // + // 1) Find the beginning of the impacted ranges in the LHS. + // 2) Create a new, merged sub-squence of ranges merging from the position in + // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS + // entries being merged will be copied into this new range. + // 3) Replace the relevant section in LHS with these newly merged ranges. + // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. + // + // We don't follow the typical in-place merge strategy for sorted ranges of + // appending the new ranges to the back and then using std::inplace_merge + // because one step of the merge can both mutate the original elements and + // remove elements from the original. Essentially, because the merge includes + // collapsing overlapping ranges, a more complex approach is required. + + // We do an initial binary search to optimize for a common pattern: a large + // LHS, and a very small RHS. + const_iterator RI = RHS.begin(), RE = RHS.end(); + iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); + + // Merge into NewRanges until one of the ranges is exhausted. + SmallVector NewRanges; + + // Keep track of where to begin the replacement. + iterator ReplaceI = LI; + + // If there are preceding ranges in the LHS, put the last one into NewRanges + // so we can optionally extend it. Adjust the replacement point accordingly. + if (LI != begin()) { + ReplaceI = llvm::prior(LI); + NewRanges.push_back(*ReplaceI); + } + + // Now loop over the mergable portions of both LHS and RHS, merging into + // NewRanges. + while (LI != LE && RI != RE) { + // Skip incoming ranges with the wrong value. + if (RHSValNo && RI->valno != RHSValNo) { + ++RI; + continue; + } + + // Select the first range. We pick the earliest start point, and then the + // largest range. + LiveRange R = *LI; + if (*RI < R) { + R = *RI; + ++RI; + if (LHSValNo) + R.valno = LHSValNo; + } else { + ++LI; + } + + if (NewRanges.empty()) { + NewRanges.push_back(R); + continue; + } + + LiveRange &LastR = NewRanges.back(); + if (R.valno == LastR.valno) { + // Try to merge this range into the last one. + if (R.start <= LastR.end) { + LastR.end = std::max(LastR.end, R.end); + continue; + } + } else { + // We can't merge ranges across a value number. + assert(R.start >= LastR.end && + "Cannot overlap two LiveRanges with differing ValID's"); + } + + // If all else fails, just append the range. + NewRanges.push_back(R); + } + assert(RI == RE || LI == LE); + + // Check for being able to merge into the trailing sequence of ranges on the LHS. + if (!NewRanges.empty()) + for (; LI != LE && (LI->valno == NewRanges.back().valno && + LI->start <= NewRanges.back().end); + ++LI) + NewRanges.back().end = std::max(NewRanges.back().end, LI->end); + + // Replace the ranges in the LHS with the newly merged ones. It would be + // really nice if there were a move-supporting 'replace' directly in + // SmallVector, but as there is not, we pay the price of copies to avoid + // wasted memory allocations. + SmallVectorImpl::iterator NRI = NewRanges.begin(), + NRE = NewRanges.end(); + for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) + *ReplaceI = *NRI; + if (NRI == NRE) + ranges.erase(ReplaceI, LI); + else + ranges.insert(LI, NRI, NRE); + + // And finally insert any trailing end of RHS (if we have one). + for (; RI != RE; ++RI) { + LiveRange R = *RI; + if (LHSValNo) + R.valno = LHSValNo; + if (!ranges.empty() && + ranges.back().valno == R.valno && R.start <= ranges.back().end) + ranges.back().end = std::max(ranges.back().end, R.end); + else + ranges.push_back(R); } - ComputeJoinedWeight(Other); + // Ensure we finished with a valid new sequence of ranges. + verify(); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -458,38 +600,20 @@ void LiveInterval::join(LiveInterval &Other, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } + mergeIntervalRanges(RHS, LHSValNo); } - /// MergeValueInAsValue - Merge all of the live ranges of a specific val# /// in RHS into this live interval as the specified value number. /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the /// current interval, it will replace the value numbers of the overlaped /// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue( - const LiveInterval &RHS, - const VNInfo *RHSValNo, VNInfo *LHSValNo) { - // TODO: Make this more efficient. - iterator InsertPos = begin(); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - if (I->valno != RHSValNo) - continue; - // Map the valno in the other live range to the current live range. - LiveRange Tmp = *I; - Tmp.valno = LHSValNo; - InsertPos = addRangeFrom(Tmp, InsertPos); - } +void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { + mergeIntervalRanges(RHS, LHSValNo, RHSValNo); } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -543,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } - // Merge the relevant flags. - V2->mergeFlags(V1); - // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -569,6 +690,8 @@ void LiveInterval::Copy(const LiveInterval &RHS, const LiveRange &LR = RHS.ranges[i]; addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id))); } + + verify(); } unsigned LiveInterval::getSize() const { @@ -578,29 +701,6 @@ unsigned LiveInterval::getSize() const { return Sum; } -/// ComputeJoinedWeight - Set the weight of a live interval Joined -/// after Other has been merged into it. -void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) { - // If either of these intervals was spilled, the weight is the - // weight of the non-spilled interval. This can only happen with - // iterative coalescers. - - if (Other.weight != HUGE_VALF) { - weight += Other.weight; - } - else if (weight == HUGE_VALF && - !TargetRegisterInfo::isPhysicalRegister(reg)) { - // Remove this assert if you have an iterative coalescer - assert(0 && "Joining to spilled interval"); - weight = Other.weight; - } - else { - // Otherwise the weight stays the same - // Remove this assert if you have an iterative coalescer - assert(0 && "Joining from spilled interval"); - } -} - raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } @@ -609,15 +709,10 @@ void LiveRange::dump() const { dbgs() << *this << "\n"; } -void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { - OS << PrintReg(reg, TRI); - if (weight != 0) - OS << ',' << weight; - +void LiveInterval::print(raw_ostream &OS) const { if (empty()) - OS << " EMPTY"; + OS << "EMPTY"; else { - OS << " = "; for (LiveInterval::Ranges::const_iterator I = ranges.begin(), E = ranges.end(); I != E; ++I) { OS << *I; @@ -639,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { } else { OS << vni->def; if (vni->isPHIDef()) - OS << "-phidef"; - if (vni->hasPHIKill()) - OS << "-phikill"; + OS << "-phi"; } } } @@ -651,6 +744,23 @@ void LiveInterval::dump() const { dbgs() << *this << "\n"; } +#ifndef NDEBUG +void LiveInterval::verify() const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + assert(I->start.isValid()); + assert(I->end.isValid()); + assert(I->start < I->end); + assert(I->valno != 0); + assert(I->valno == valnos[I->valno->id]); + if (llvm::next(I) != E) { + assert(I->end <= llvm::next(I)->start); + if (I->end == llvm::next(I)->start) + assert(I->valno != llvm::next(I)->valno); + } + } +} +#endif + void LiveRange::print(raw_ostream &os) const { os << *this; @@ -712,13 +822,13 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - if (MO.isUse() && MO.isUndef()) - continue; // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = Idx.getRegSlot(MO.isUse()); - const VNInfo *VNI = LI.getVNInfoAt(Idx); - assert(VNI && "Interval not live at use."); + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. + if (!VNI) + continue; MO.setReg(LIV[getEqClass(VNI)]->reg); } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 934cc12..d0f8ae1 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -31,20 +32,20 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "LiveRangeCalc.h" #include #include #include using namespace llvm; -// Hidden options for help debugging. -static cl::opt DisableReMat("disable-rematerialization", - cl::init(false), cl::Hidden); - -STATISTIC(numIntervals , "Number of original intervals"); +// Switch to the new experimental algorithm for computing live intervals. +static cl::opt +NewLiveIntervals("new-live-intervals", cl::Hidden, + cl::desc("Use new algorithm forcomputing live intervals")); char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -61,23 +62,35 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); + AU.addRequiredTransitiveID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addPreserved(); AU.addRequiredTransitive(); MachineFunctionPass::getAnalysisUsage(AU); } +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), + DomTree(0), LRCalc(0) { + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); +} + +LiveIntervals::~LiveIntervals() { + delete LRCalc; +} + void LiveIntervals::releaseMemory() { // Free the live intervals themselves. - for (DenseMap::iterator I = r2iMap_.begin(), - E = r2iMap_.end(); I != E; ++I) - delete I->second; - - r2iMap_.clear(); + for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) + delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + VirtRegIntervals.clear(); RegMaskSlots.clear(); RegMaskBits.clear(); RegMaskBlocks.clear(); + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + delete RegUnitIntervals[i]; + RegUnitIntervals.clear(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } @@ -85,20 +98,34 @@ void LiveIntervals::releaseMemory() { /// runOnMachineFunction - Register allocate the whole function /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &mf_->getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - aa_ = &getAnalysis(); - lv_ = &getAnalysis(); - indexes_ = &getAnalysis(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - - computeIntervals(); - - numIntervals += getNumIntervals(); + MF = &fn; + MRI = &MF->getRegInfo(); + TM = &fn.getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + AA = &getAnalysis(); + LV = &getAnalysis(); + Indexes = &getAnalysis(); + DomTree = &getAnalysis(); + if (!LRCalc) + LRCalc = new LiveRangeCalc(); + AllocatableRegs = TRI->getAllocatableSet(fn); + ReservedRegs = TRI->getReservedRegs(fn); + + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + if (NewLiveIntervals) { + // This is the new way of computing live intervals. + // It is independent of LiveVariables, and it can run at any time. + computeVirtRegs(); + computeRegMasks(); + } else { + // This is the old way of computing live intervals. + // It depends on LiveVariables. + computeIntervals(); + } + computeLiveInRegUnits(); DEBUG(dump()); return true; @@ -108,27 +135,24 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; - // Dump the physregs. - for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = r2iMap_.lookup(Reg)) { - LI->print(OS, tri_); - OS << '\n'; - } + // Dump the regunits. + for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) + if (LiveInterval *LI = RegUnitIntervals[i]) + OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; // Dump the virtregs. - for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg) - if (const LiveInterval *LI = - r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) { - LI->print(OS, tri_); - OS << '\n'; - } + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (hasInterval(Reg)) + OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + } printInstrs(OS); } void LiveIntervals::printInstrs(raw_ostream &OS) const { OS << "********** MACHINEINSTRS **********\n"; - mf_->print(OS, indexes_); + MF->print(OS, Indexes); } void LiveIntervals::dumpInstrs() const { @@ -176,13 +200,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); + DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be // done once for the vreg. We use an empty interval to detect the first // time we see a vreg. - LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); @@ -226,22 +250,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << " +" << NewLR); interval.addRange(NewLR); - bool PHIJoin = lv_->isPHIJoin(interval.reg); + bool PHIJoin = LV->isPHIJoin(interval.reg); if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a new - // valno in the killing blocks. + // A phi join register is killed at the end of the MBB and revived as a + // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the // live interval. for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo); + MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); + LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), + ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR); } @@ -260,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, VNInfoAllocator); - ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); @@ -319,11 +342,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); - DEBUG({ - dbgs() << " RESULT: "; - interval.print(dbgs(), tri_); - }); - } else if (lv_->isPHIJoin(interval.reg)) { + DEBUG(dbgs() << " RESULT: " << interval); + } else if (LV->isPHIJoin(interval.reg)) { // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. @@ -337,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { llvm_unreachable("Multiply defined register"); @@ -347,101 +366,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, DEBUG(dbgs() << '\n'); } -static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) { - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) { - const MachineBasicBlock* succ = *SI; - if (succ->isLiveIn(Reg)) - return true; - } - return false; -} - -void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); - SlotIndex end = start; - - // If it is not used after definition, it is considered dead at - // the instruction defining it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - // For earlyclobbers, the defSlot was pushed back one; the extra - // advance below compensates. - if (MO.isDead()) { - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - goto exit; - } - - // If it is not dead on definition, it must be killed by a - // subsequent instruction. Hence its interval is: - // [defSlot(def), useSlot(kill)+1) - baseIndex = baseIndex.getNextIndex(); - while (++mi != MBB->end()) { - - if (mi->isDebugValue()) - continue; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - - if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - goto exit; - } else { - int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); - if (DefIdx != -1) { - if (mi->isRegTiedToUseOperand(DefIdx)) { - // Two-address instruction. - end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber()); - } else { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that - // defines it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - } - goto exit; - } - } - - baseIndex = baseIndex.getNextIndex(); - } - - // If we get here the register *should* be live out. - assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!"); - - // FIXME: We need saner rules for reserved regs. - if (isReserved(interval.reg)) { - end = start.getDeadSlot(); - } else { - // Unreserved, unallocable registers like EFLAGS can be live across basic - // block boundaries. - assert(isRegLiveIntoSuccessor(MBB, interval.reg) && - "Unreserved reg not live-out?"); - end = getMBBEndIdx(MBB); - } -exit: - assert(start < end && "did not find end of interval?"); - - // Already exists? Extend old live interval. - VNInfo *ValNo = interval.getVNInfoAt(start); - bool Extend = ValNo != 0; - if (!Extend) - ValNo = interval.getNextValue(start, VNInfoAllocator); - LiveRange LR(start, end, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); -} - void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, SlotIndex MIIdx, @@ -450,93 +374,6 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(MO.getReg())); -} - -void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, - SlotIndex MIIdx, - LiveInterval &interval) { - assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) && - "Only physical registers can be live in."); - assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() || - MBB->isLandingPad()) && - "Allocatable live-ins only valid for entry blocks and landing pads."); - - DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_)); - - // Look for kills, if it reaches a def before it's killed, then it shouldn't - // be considered a livein. - MachineBasicBlock::iterator mi = MBB->begin(); - MachineBasicBlock::iterator E = MBB->end(); - // Skip over DBG_VALUE at the start of the MBB. - if (mi != E && mi->isDebugValue()) { - while (++mi != E && mi->isDebugValue()) - ; - if (mi == E) - // MBB is empty except for DBG_VALUE's. - return; - } - - SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex; - if (getInstructionFromIndex(baseIndex) == 0) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - - SlotIndex end = baseIndex; - bool SeenDefUse = false; - - while (mi != E) { - if (mi->killsRegister(interval.reg, tri_)) { - DEBUG(dbgs() << " killed"); - end = baseIndex.getRegSlot(); - SeenDefUse = true; - break; - } else if (mi->modifiesRegister(interval.reg, tri_)) { - // Another instruction redefines the register before it is ever read. - // Then the register is essentially dead at the instruction that defines - // it. Hence its interval is: - // [defSlot(def), defSlot(def)+1) - DEBUG(dbgs() << " dead"); - end = start.getDeadSlot(); - SeenDefUse = true; - break; - } - - while (++mi != E && mi->isDebugValue()) - // Skip over DBG_VALUE. - ; - if (mi != E) - baseIndex = indexes_->getNextNonNullIndex(baseIndex); - } - - // Live-in register might not be used at all. - if (!SeenDefUse) { - if (isAllocatable(interval.reg) || - !isRegLiveIntoSuccessor(MBB, interval.reg)) { - // Allocatable registers are never live through. - // Non-allocatable registers that aren't live into any successors also - // aren't live through. - DEBUG(dbgs() << " dead"); - return; - } else { - // If we get here the register is non-allocatable and live into some - // successor. We'll conservatively assume it's live-through. - DEBUG(dbgs() << " live through"); - end = getMBBEndIdx(MBB); - } - } - - SlotIndex defIdx = getMBBStartIdx(MBB); - assert(getInstructionFromIndex(defIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator); - vni->setIsPHIDef(true); - LiveRange LR(start, end, vni); - - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << '\n'); } /// computeIntervals - computes the live intervals for virtual @@ -546,12 +383,12 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'); + << ((Value*)MF->getFunction())->getName() << '\n'); - RegMaskBlocks.resize(mf_->getNumBlockIDs()); + RegMaskBlocks.resize(MF->getNumBlockIDs()); SmallVector UndefUses; - for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); @@ -564,22 +401,16 @@ void LiveIntervals::computeIntervals() { DEBUG(dbgs() << "BB#" << MBB->getNumber() << ":\t\t# derived from " << MBB->getName() << "\n"); - // Create intervals for live-ins to this BB first. - for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(), - LE = MBB->livein_end(); LI != LE; ++LI) { - handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI)); - } - // Skip over empty initial indices. if (getInstructionFromIndex(MIIndex) == 0) - MIIndex = indexes_->getNextNonNullIndex(MIIndex); + MIIndex = Indexes->getNextNonNullIndex(MIIndex); for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); MI != miEnd; ++MI) { DEBUG(dbgs() << MIIndex << "\t" << *MI); if (MI->isDebugValue()) continue; - assert(indexes_->getInstructionFromIndex(MIIndex) == MI && + assert(Indexes->getInstructionFromIndex(MIIndex) == MI && "Lost SlotIndex synchronization"); // Handle defs. @@ -593,7 +424,7 @@ void LiveIntervals::computeIntervals() { continue; } - if (!MO.isReg() || !MO.getReg()) + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // handle register defs - build intervals @@ -604,7 +435,7 @@ void LiveIntervals::computeIntervals() { } // Move to the next instr slot. - MIIndex = indexes_->getNextNonNullIndex(MIIndex); + MIIndex = Indexes->getNextNonNullIndex(MIIndex); } // Compute the number of register mask instructions in this block. @@ -626,14 +457,147 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { return new LiveInterval(reg, Weight); } -/// dupInterval - Duplicate a live interval. The caller is responsible for -/// managing the allocated memory. -LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { - LiveInterval *NewLI = createInterval(li->reg); - NewLI->Copy(*li, mri_, getVNInfoAllocator()); - return NewLI; + +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first;; + } } +//===----------------------------------------------------------------------===// +// Register Unit Liveness +//===----------------------------------------------------------------------===// +// +// Fixed interference typically comes from ABI boundaries: Function arguments +// and return values are passed in fixed registers, and so are exception +// pointers entering landing pads. Certain instructions require values to be +// present in specific registers. That is also represented through fixed +// interference. +// + +/// computeRegUnitInterval - Compute the live interval of a register unit, based +/// on the uses and defs of aliasing registers. The interval should be empty, +/// or contain only dead phi-defs from ABI blocks. +void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { + unsigned Unit = LI->reg; + + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + + // The physregs aliasing Unit are the roots and their super-registers. + // Create all values as dead defs before extending to uses. Note that roots + // may share super-registers. That's OK because createDeadDefs() is + // idempotent. It is very rare for a register unit to have multiple roots, so + // uniquing super-registers is probably not worthwhile. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!MRI->reg_empty(Root)) + LRCalc->createDeadDefs(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + if (!MRI->reg_empty(*Supers)) + LRCalc->createDeadDefs(LI, *Supers); + } + } + + // Now extend LI to reach all uses. + // Ignore uses of reserved registers. We only track defs of those. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + unsigned Root = *Roots; + if (!isReserved(Root) && !MRI->reg_empty(Root)) + LRCalc->extendToUses(LI, Root); + for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + unsigned Reg = *Supers; + if (!isReserved(Reg) && !MRI->reg_empty(Reg)) + LRCalc->extendToUses(LI, Reg); + } + } +} + + +/// computeLiveInRegUnits - Precompute the live ranges of any register units +/// that are live-in to an ABI block somewhere. Register values can appear +/// without a corresponding def when entering the entry block or a landing pad. +/// +void LiveIntervals::computeLiveInRegUnits() { + RegUnitIntervals.resize(TRI->getNumRegUnits()); + DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); + + // Keep track of the intervals allocated. + SmallVector NewIntvs; + + // Check all basic blocks for live-ins. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock *MBB = MFI; + + // We only care about ABI blocks: Entry + landing pads. + if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty()) + continue; + + // Create phi-defs at Begin for all live-in registers. + SlotIndex Begin = Indexes->getMBBStartIdx(MBB); + DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); + for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(), + LIE = MBB->livein_end(); LII != LIE; ++LII) { + for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { + unsigned Unit = *Units; + LiveInterval *Intv = RegUnitIntervals[Unit]; + if (!Intv) { + Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); + NewIntvs.push_back(Intv); + } + VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + (void)VNI; + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); + } + } + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + + // Compute the 'normal' part of the intervals. + for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) + computeRegUnitInterval(NewIntvs[i]); +} + + /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. @@ -649,14 +613,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SmallPtrSet LiveOut; // Visit all instructions reading li->reg. - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg); + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(li->reg); MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - // Note: This intentionally picks up the wrong VNI in case of an EC redef. - // See below. - VNInfo *VNI = li->getVNInfoBefore(Idx); + LiveRangeQuery LRQ(*li, Idx); + VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting flags @@ -667,13 +630,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; } // Special case: An early-clobber tied operand reads and writes the - // register one slot early. The getVNInfoBefore call above would have - // picked up the value defined by UseMI. Adjust the kill slot and value. - if (SlotIndex::isSameInstr(VNI->def, Idx)) { - Idx = VNI->def; - VNI = li->getVNInfoBefore(Idx); - assert(VNI && "Early-clobber tied value not available"); - } + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + WorkList.push_back(std::make_pair(Idx, VNI)); } @@ -747,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - VNI->setIsUnused(true); + VNI->markUnused(); NewLI.removeRange(*LII); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; @@ -755,7 +715,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(li->reg, tri_); + MI->addRegisterDead(li->reg, TRI); if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); dead->push_back(MI); @@ -775,13 +735,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // void LiveIntervals::addKillFlags() { - for (iterator I = begin(), E = end(); I != E; ++I) { - unsigned Reg = I->first; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) continue; - if (mri_->reg_nodbg_empty(Reg)) - continue; - LiveInterval *LI = I->second; + LiveInterval *LI = &getInterval(Reg); // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; @@ -797,101 +755,6 @@ void LiveIntervals::addKillFlags() { } } -/// getReMatImplicitUse - If the remat definition MI has one (for now, we only -/// allow one) virtual register operand, then its uses are implicitly using -/// the register. Returns the virtual register. -unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, - MachineInstr *MI) const { - unsigned RegOp = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0 || Reg == li.reg) - continue; - - if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg)) - continue; - RegOp = MO.getReg(); - break; // Found vreg operand - leave the loop. - } - return RegOp; -} - -/// isValNoAvailableAt - Return true if the val# of the specified interval -/// which reaches the given instruction also reaches the specified use index. -bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, - SlotIndex UseIdx) const { - VNInfo *UValNo = li.getVNInfoAt(UseIdx); - return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI)); -} - -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool -LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl *SpillIs, - bool &isLoad) { - if (DisableReMat) - return false; - - if (!tii_->isTriviallyReMaterializable(MI, aa_)) - return false; - - // Target-specific code can mark an instruction as being rematerializable - // if it has one virtual reg use, though it had better be something like - // a PIC base register which is likely to be live everywhere. - unsigned ImpUse = getReMatImplicitUse(li, MI); - if (ImpUse) { - const LiveInterval &ImpLi = getInterval(ImpUse); - for (MachineRegisterInfo::use_nodbg_iterator - ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end(); - ri != re; ++ri) { - MachineInstr *UseMI = &*ri; - SlotIndex UseIdx = getInstructionIndex(UseMI); - if (li.getVNInfoAt(UseIdx) != ValNo) - continue; - if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) - return false; - } - - // If a register operand of the re-materialized instruction is going to - // be spilled next, then it's not legal to re-materialize this instruction. - if (SpillIs) - for (unsigned i = 0, e = SpillIs->size(); i != e; ++i) - if (ImpUse == (*SpillIs)[i]->reg) - return false; - } - return true; -} - -/// isReMaterializable - Returns true if every definition of MI of every -/// val# of the specified interval is re-materializable. -bool -LiveIntervals::isReMaterializable(const LiveInterval &li, - const SmallVectorImpl *SpillIs, - bool &isLoad) { - isLoad = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - if (!ReMatDefMI) - return false; - bool DefIsLoad = false; - if (!ReMatDefMI || - !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) - return false; - isLoad |= DefIsLoad; - } - return true; -} - MachineBasicBlock* LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { // A local live range must be fully contained inside the block, meaning it is @@ -911,11 +774,30 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { // getMBBFromIndex doesn't need to search the MBB table when both indexes // belong to proper instructions. - MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start); - MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop); + MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); return MBB1 == MBB2 ? MBB1 : NULL; } +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -940,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); @@ -990,7 +871,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, if (!Found) { // This is the first overlap. Initialize UsableRegs to all ones. UsableRegs.clear(); - UsableRegs.resize(tri_->getNumRegs(), true); + UsableRegs.resize(TRI->getNumRegs(), true); Found = true; } // Remove usable registers clobbered by this mask. @@ -1101,6 +982,9 @@ public: BundleRanges BR = createBundleRanges(Entering, Internal, Exiting); + Entering.clear(); + Internal.clear(); + Exiting.clear(); collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); assert(!hasRegMaskOp && "Can't have RegMask operand in bundle."); @@ -1176,78 +1060,44 @@ private: // TODO: Currently we're skipping uses that are reserved or have no // interval, but we're not updating their kills. This should be // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) + if (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)) continue; - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - if (MO.isEarlyClobber()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true)); - assert(LR != 0 && "No EC range?"); - if (LR->end > OldIdx.getDeadSlot()) - Exiting.insert(std::make_pair(LI, LR)); - else - Internal.insert(std::make_pair(LI, LR)); - } else if (MO.isDead()) { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); - assert(LR != 0 && "No dead-def range?"); - Internal.insert(std::make_pair(LI, LR)); - } else { - LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot()); - assert(LR && LR->end > OldIdx.getDeadSlot() && - "Non-dead-def should have live range exiting."); - Exiting.insert(std::make_pair(LI, LR)); - } + // Collect ranges for register units. These live ranges are computed on + // demand, so just skip any that haven't been computed yet. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS.getCachedRegUnit(*Units)) + collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx); + } else { + // Collect ranges for individual virtual registers. + collectRanges(MO, &LIS.getInterval(Reg), + Entering, Internal, Exiting, OldIdx); } } } - // Collect IntRangePairs for all operands of MI that may need fixing. - void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering, - RangeSet& Exiting, SlotIndex MIStartIdx, - SlotIndex MIEndIdx) { - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& MO = *MOI; - assert(!MO.isRegMask() && "Can't have RegMasks in bundles."); - if (!MO.isReg() || MO.getReg() == 0) - continue; - - unsigned Reg = MO.getReg(); - - // TODO: Currently we're skipping uses that are reserved or have no - // interval, but we're not updating their kills. This should be - // fixed. - if (!LIS.hasInterval(Reg) || - (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg))) - continue; - - LiveInterval* LI = &LIS.getInterval(Reg); - - if (MO.readsReg()) { - LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx); - if (LR != 0) - Entering.insert(std::make_pair(LI, LR)); - } - if (MO.isDef()) { - assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles."); - assert(!MO.isDead() && "Dead-defs not allowed in bundles."); - LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot()); - assert(LR != 0 && "Internal ranges not allowed in bundles."); + void collectRanges(const MachineOperand &MO, LiveInterval *LI, + RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting, + SlotIndex OldIdx) { + if (MO.readsReg()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx); + if (LR != 0) + Entering.insert(std::make_pair(LI, LR)); + } + if (MO.isDef()) { + LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot()); + assert(LR != 0 && "No live range for def?"); + if (LR->end > OldIdx.getDeadSlot()) Exiting.insert(std::make_pair(LI, LR)); - } + else + Internal.insert(std::make_pair(LI, LR)); } } - BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) { + BundleRanges createBundleRanges(RangeSet& Entering, + RangeSet& Internal, + RangeSet& Exiting) { BundleRanges BR; for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end(); @@ -1284,7 +1134,8 @@ private: return; // Bail out if we don't have kill flags on the old register. MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx); assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill."); - assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill."); + assert(!NewKillMI->killsRegister(reg) && + "New kill instr is already a kill."); OldKillMI->clearRegisterKills(reg, &TRI); NewKillMI->addRegisterKilled(reg, &TRI); } @@ -1523,22 +1374,23 @@ private: }; void LiveIntervals::handleMove(MachineInstr* MI) { - SlotIndex OldIndex = indexes_->getInstructionIndex(MI); - indexes_->removeMachineInstrFromMaps(MI); + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = MI->isInsideBundle() ? - indexes_->getInstructionIndex(MI) : - indexes_->insertMachineInstrInMaps(MI); + Indexes->getInstructionIndex(MI) : + Indexes->insertMachineInstrInMaps(MI); assert(getMBBStartIdx(MI->getParent()) <= OldIndex && OldIndex < getMBBEndIdx(MI->getParent()) && "Cannot handle moves across basic block boundaries."); assert(!MI->isBundled() && "Can't handle bundled instructions yet."); - HMEditor HME(*this, *mri_, *tri_, NewIndex); + HMEditor HME(*this, *MRI, *TRI, NewIndex); HME.moveAllRangesFrom(MI, OldIndex); } -void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) { - SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart); - HMEditor HME(*this, *mri_, *tri_, NewIndex); +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, + MachineInstr* BundleStart) { + SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); + HMEditor HME(*this, *MRI, *TRI, NewIndex); HME.moveAllRangesInto(MI, BundleStart); } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 60a6880..dadd02b 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -81,7 +81,6 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) { void LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { - OS << "LIU " << PrintReg(RepReg, TRI); if (empty()) { OS << " empty\n"; return; @@ -209,3 +208,26 @@ bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) { VRI = VirtReg->advanceTo(VRI, Overlaps.start()); } } + +void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, + unsigned NSize) { + // Reuse existing allocation. + if (NSize == Size) + return; + clear(); + Size = NSize; + LIUs = static_cast( + malloc(sizeof(LiveIntervalUnion)*NSize)); + for (unsigned i = 0; i != Size; ++i) + new(LIUs + i) LiveIntervalUnion(Alloc); +} + +void LiveIntervalUnion::Array::clear() { + if (!LIUs) + return; + for (unsigned i = 0; i != Size; ++i) + LIUs[i].~LiveIntervalUnion(); + free(LIUs); + Size = 0; + LIUs = 0; +} diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index dbf5ac1..cd4e690 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -60,13 +60,11 @@ public: class Query; private: - const unsigned RepReg; // representative register number unsigned Tag; // unique tag for current contents. LiveSegments Segments; // union of virtual reg segments public: - LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a) - {} + explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {} // Iterate over all segments in the union of live virtual registers ordered // by their starting position. @@ -183,6 +181,28 @@ public: Query(const Query&); // DO NOT IMPLEMENT void operator=(const Query&); // DO NOT IMPLEMENT }; + + // Array of LiveIntervalUnions. + class Array { + unsigned Size; + LiveIntervalUnion *LIUs; + public: + Array() : Size(0), LIUs(0) {} + ~Array() { clear(); } + + // Initialize the array to have Size entries. + // Reuse an existing allocation if the size matches. + void init(LiveIntervalUnion::Allocator&, unsigned Size); + + unsigned size() const { return Size; } + + void clear(); + + LiveIntervalUnion& operator[](unsigned idx) { + assert(idx < Size && "idx out of bounds"); + return LIUs[idx]; + } + }; }; } // end namespace llvm diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index d8ab791..d828f25 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -14,10 +14,19 @@ #define DEBUG_TYPE "regalloc" #include "LiveRangeCalc.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; -void LiveRangeCalc::reset(const MachineFunction *MF) { +void LiveRangeCalc::reset(const MachineFunction *MF, + SlotIndexes *SI, + MachineDominatorTree *MDT, + VNInfo::Allocator *VNIA) { + MRI = &MF->getRegInfo(); + Indexes = SI; + DomTree = MDT; + Alloc = VNIA; + unsigned N = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(N); @@ -26,8 +35,72 @@ void LiveRangeCalc::reset(const MachineFunction *MF) { } +void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LI->createDeadDef() will deduplicate. + for (MachineRegisterInfo::def_iterator + I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { + const MachineInstr *MI = &*I; + // Find the corresponding slot index. + SlotIndex Idx; + if (MI->isPHI()) + // PHI defs begin at the basic block start index. + Idx = Indexes->getMBBStartIdx(MI->getParent()); + else + // Instructions are either normal 'r', or early clobber 'e'. + Idx = Indexes->getInstructionIndex(MI) + .getRegSlot(I.getOperand().isEarlyClobber()); + + // Create the def in LI. This may find an existing def. + LI->createDeadDef(Idx, *Alloc); + } +} + + +void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all operands that read Reg. This may include partial defs. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), + E = MRI->reg_nodbg_end(); I != E; ++I) { + const MachineOperand &MO = I.getOperand(); + if (!MO.readsReg()) + continue; + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + const MachineInstr *MI = &*I; + + // Find the SlotIndex being read. + SlotIndex Idx; + if (MI->isPHI()) { + assert(!MO.isDef() && "Cannot handle PHI def of partial register."); + // PHI operands are paired: (Reg, PredMBB). + // Extend the live range to be live-out from PredMBB. + Idx = Indexes->getMBBEndIdx(MI->getOperand(I.getOperandNo()+1).getMBB()); + } else { + // This is a normal instruction. + Idx = Indexes->getInstructionIndex(MI).getRegSlot(); + // Check for early-clobber redefs. + unsigned DefIdx; + if (MO.isDef()) { + if (MO.isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } else if (MI->isRegTiedToDefOperand(I.getOperandNo(), &DefIdx)) { + // FIXME: This would be a lot easier if tied early-clobber uses also + // had an early-clobber flag. + if (MI->getOperand(DefIdx).isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + } + extend(LI, Idx, Reg); + } +} + + // Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) { +void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { for (SmallVectorImpl::iterator I = LiveIn.begin(), E = LiveIn.end(); I != E; ++I) { if (!I->DomNode) @@ -56,9 +129,7 @@ void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI, SlotIndexes *Indexes) { void LiveRangeCalc::extend(LiveInterval *LI, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { + unsigned PhysReg) { assert(LI && "Missing live range"); assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); @@ -75,34 +146,31 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, Indexes, DomTree); + VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); // When there were multiple different values, we may need new PHIs. if (!VNI) - updateSSA(Indexes, DomTree, Alloc); + updateSSA(); - updateLiveIns(VNI, Indexes); + updateLiveIns(VNI); } // This function is called by a client after using the low-level API to add // live-out and live-in blocks. The unique value optimization is not // available, SplitEditor::transferValues handles that case directly anyway. -void LiveRangeCalc::calculateValues(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { +void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); - updateSSA(Indexes, DomTree, Alloc); - updateLiveIns(0, Indexes); + updateSSA(); + updateLiveIns(0); } VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree) { + unsigned PhysReg) { // Blocks where LI should be live-in. SmallVector WorkList(1, KillMBB); @@ -113,7 +181,22 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = WorkList[i]; - assert(!MBB->pred_empty() && "Value live-in to entry block?"); + +#ifndef NDEBUG + if (MBB->pred_empty()) { + MBB->getParent()->verify(); + llvm_unreachable("Use not jointly dominated by defs."); + } + + if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && + !MBB->isLiveIn(PhysReg)) { + MBB->getParent()->verify(); + errs() << "The register needs to be live in to BB#" << MBB->getNumber() + << ", but is missing from the live-in list.\n"; + llvm_unreachable("Invalid global physical register"); + } +#endif + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; @@ -168,9 +251,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. -void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc) { +void LiveRangeCalc::updateSSA() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -238,7 +319,6 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes, SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); - VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index b8c8585..909829b 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -34,6 +34,11 @@ template class DomTreeNodeBase; typedef DomTreeNodeBase MachineDomTreeNode; class LiveRangeCalc { + const MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + MachineDominatorTree *DomTree; + VNInfo::Allocator *Alloc; + /// Seen - Bit vector of active entries in LiveOut, also used as a visited /// set by findReachingDefs. One entry per basic block, indexed by block /// number. This is kept as a separate bit vector because it can be cleared @@ -100,26 +105,27 @@ class LiveRangeCalc { /// to be live-in are added to LiveIn. If a unique reaching def is found, /// its value is returned, if Kill is jointly dominated by multiple values, /// NULL is returned. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *KillMBB, SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree); + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. /// /// Every live-in block must be jointly dominated by the added live-out /// blocks. No values are read from the live ranges. - void updateSSA(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void updateSSA(); /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI /// as a wildcard value for LiveIn entries without a value. - void updateLiveIns(VNInfo *VNI, SlotIndexes*); + void updateLiveIns(VNInfo *VNI); public: + LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + //===--------------------------------------------------------------------===// // High-level interface. //===--------------------------------------------------------------------===// @@ -132,14 +138,14 @@ public: /// that may overlap a previously computed live range, and before the first /// live range in a function. If live ranges are not known to be /// non-overlapping, call reset before each. - void reset(const MachineFunction *MF); + void reset(const MachineFunction *MF, + SlotIndexes*, + MachineDominatorTree*, + VNInfo::Allocator*); /// calculate - Calculate the live range of a virtual register from its defs /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI, - MachineRegisterInfo *MRI, - SlotIndexes *Indexes, - VNInfo::Allocator *Alloc); + void calculate(LiveInterval *LI); //===--------------------------------------------------------------------===// // Mid-level interface. @@ -154,21 +160,30 @@ public: /// Kill is not dominated by a single existing value, PHI-defs are inserted /// as required to preserve SSA form. If Kill is known to be dominated by a /// single existing value, Alloc may be null. - void extend(LiveInterval *LI, - SlotIndex Kill, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveInterval *LI, unsigned Reg); - /// extendToUses - Extend the live range of LI to reach all uses. + /// createDeadDefs - Create a dead def in LI for every def of LI->reg. + void createDeadDefs(LiveInterval *LI) { + createDeadDefs(LI, LI->reg); + } + + /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, - MachineRegisterInfo *MRI, - SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void extendToUses(LiveInterval *LI, unsigned Reg); + + /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. + void extendToUses(LiveInterval *LI) { + extendToUses(LI, LI->reg); + } //===--------------------------------------------------------------------===// // Low-level interface. @@ -216,9 +231,7 @@ public: /// /// Every predecessor of a live-in block must have been given a value with /// setLiveOutValue, the value may be null for live-trough blocks. - void calculateValues(SlotIndexes *Indexes, - MachineDominatorTree *DomTree, - VNInfo::Allocator *Alloc); + void calculateValues(); }; } // end namespace llvm diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 695f536..b4ce9aa 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -38,7 +38,7 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } LiveInterval &LI = LIS.getOrCreateInterval(VReg); - newRegs_.push_back(&LI); + NewRegs.push_back(&LI); return LI; } @@ -46,16 +46,16 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); - scannedRemattable_ = true; + ScannedRemattable = true; if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; - remattable_.insert(VNI); + Remattable.insert(VNI); return true; } void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { - for (LiveInterval::vni_iterator I = parent_.vni_begin(), - E = parent_.vni_end(); I != E; ++I) { + for (LiveInterval::vni_iterator I = getParent().vni_begin(), + E = getParent().vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; @@ -64,13 +64,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { continue; checkRematerializable(VNI, DefMI, aa); } - scannedRemattable_ = true; + ScannedRemattable = true; } bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { - if (!scannedRemattable_) + if (!ScannedRemattable) scanRemattable(aa); - return !remattable_.empty(); + return !Remattable.empty(); } /// allUsesAvailableAt - Return true if all registers used by OrigMI at @@ -82,12 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.isDef()) - continue; - // Reserved registers are OK. - if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) + if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; + // We can't remat physreg uses, unless it is a constant. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MRI.isConstantPhysReg(MO.getReg(), VRM->getMachineFunction())) + continue; + return false; + } + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) @@ -101,10 +105,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, bool cheapAsAMove) { - assert(scannedRemattable_ && "Call anyRematerializable first"); + assert(ScannedRemattable && "Call anyRematerializable first"); // Use scanRemattable info. - if (!remattable_.count(RM.ParentVNI)) + if (!Remattable.count(RM.ParentVNI)) return false; // No defining instruction provided. @@ -136,13 +140,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, bool Late) { assert(RM.OrigMI && "Invalid remat"); TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); - rematted_.insert(RM.ParentVNI); + Rematted.insert(RM.ParentVNI); return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) .getRegSlot(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg) { - if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) + if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } @@ -173,6 +177,19 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (!DefMI || !UseMI) return false; + // Since we're moving the DefMI load, make sure we're not extending any live + // ranges. + if (!allUsesAvailableAt(DefMI, + LIS.getInstructionIndex(DefMI), + LIS.getInstructionIndex(UseMI))) + return false; + + // We also need to make sure it is safe to move the load. + // Assume there are stores between DefMI and UseMI. + bool SawStore = true; + if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + return false; + DEBUG(dbgs() << "Try to fold single def: " << *DefMI << " into single use: " << *UseMI); @@ -220,14 +237,22 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + // Collect virtual registers to be erased after MI is gone. + SmallVector RegsToErase; + bool ReadsPhysRegs = false; + // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + ReadsPhysRegs = true; continue; + } LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and @@ -242,22 +267,49 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // Remove defined value. if (MOI->isDef()) { if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (delegate_) - delegate_->LRE_WillShrinkVirtReg(LI.reg); + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); LI.removeValNo(VNI); - if (LI.empty()) { - ToShrink.remove(&LI); - eraseVirtReg(Reg); - } + if (LI.empty()) + RegsToErase.push_back(Reg); } } } - if (delegate_) - delegate_->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } } if (ToShrink.empty()) @@ -268,8 +320,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, ToShrink.pop_back(); if (foldAsLoad(LI, Dead)) continue; - if (delegate_) - delegate_->LRE_WillShrinkVirtReg(LI->reg); + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; @@ -304,10 +356,14 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, // interval must contain all the split products, and LI doesn't. if (IsOriginal) VRM->setIsSplitFromReg(Dups.back()->reg, 0); - if (delegate_) - delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + if (TheDelegate) + TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } ConEQ.Distribute(&Dups[0], MRI); + DEBUG({ + for (unsigned i = 0; i != NumComp; ++i) + dbgs() << '\t' << *Dups[i] << '\n'; + }); } } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp new file mode 100644 index 0000000..cdb1776 --- /dev/null +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -0,0 +1,152 @@ +//===-- LiveRegMatrix.cpp - Track register interference -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveRegMatrix analysis pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveRegMatrix.h" +#include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); + +char LiveRegMatrix::ID = 0; +INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) + +LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID), + UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {} + +void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LIS = &getAnalysis(); + VRM = &getAnalysis(); + + unsigned NumRegUnits = TRI->getNumRegUnits(); + if (NumRegUnits != Matrix.size()) + Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); + Matrix.init(LIUAlloc, NumRegUnits); + + // Make sure no stale queries get reused. + invalidateVirtRegs(); + return false; +} + +void LiveRegMatrix::releaseMemory() { + for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { + Matrix[i].clear(); + Queries[i].clear(); + } +} + +void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << ':'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].unify(VirtReg); + } + ++NumAssigned; + DEBUG(dbgs() << '\n'); +} + +void LiveRegMatrix::unassign(LiveInterval &VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg.reg); + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); + Matrix[*Units].extract(VirtReg); + } + ++NumUnassigned; + DEBUG(dbgs() << '\n'); +} + +bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + // Check if the cached information is valid. + // The same BitVector can be reused for all PhysRegs. + // We could cache multiple VirtRegs if it becomes necessary. + if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg; + RegMaskTag = UserTag; + RegMaskUsable.clear(); + LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); + } + + // The BitVector is indexed by PhysReg, not register unit. + // Regmask interference is more fine grained than regunits. + // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8. + return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); +} + +bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + if (VirtReg.empty()) + return false; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (VirtReg.overlaps(LIS->getRegUnit(*Units))) + return true; + return false; +} + +LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, + unsigned RegUnit) { + LiveIntervalUnion::Query &Q = Queries[RegUnit]; + Q.init(UserTag, &VirtReg, &Matrix[RegUnit]); + return Q; +} + +LiveRegMatrix::InterferenceKind +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { + if (VirtReg.empty()) + return IK_Free; + + // Regmask interference is the fastest check. + if (checkRegMaskInterference(VirtReg, PhysReg)) + return IK_RegMask; + + // Check for fixed interference. + if (checkRegUnitInterference(VirtReg, PhysReg)) + return IK_RegUnit; + + // Check the matrix for virtual register interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (query(VirtReg, *Units).checkInterference()) + return IK_VirtReg; + + return IK_Free; +} diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h new file mode 100644 index 0000000..b3e2d7f --- /dev/null +++ b/lib/CodeGen/LiveRegMatrix.h @@ -0,0 +1,148 @@ +//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRegMatrix analysis pass keeps track of virtual register interference +// along two dimensions: Slot indexes and register units. The matrix is used by +// register allocators to ensure that no interfering virtual registers get +// assigned to overlapping physical registers. +// +// Register units are defined in MCRegisterInfo.h, they represent the smallest +// unit of interference when dealing with overlapping physical registers. The +// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When +// a virtual register is assigned to a physicval register, the live range for +// the virtual register is inserted into the LiveIntervalUnion for each regunit +// in the physreg. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H +#define LLVM_CODEGEN_LIVEREGMATRIX_H + +#include "LiveIntervalUnion.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervalAnalysis; +class MachineRegisterInfo; +class TargetRegisterInfo; +class VirtRegMap; + +class LiveRegMatrix : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveIntervals *LIS; + VirtRegMap *VRM; + + // UserTag changes whenever virtual registers have been modified. + unsigned UserTag; + + // The matrix is represented as a LiveIntervalUnion per register unit. + LiveIntervalUnion::Allocator LIUAlloc; + LiveIntervalUnion::Array Matrix; + + // Cached queries per register unit. + OwningArrayPtr Queries; + + // Cached register mask interference info. + unsigned RegMaskTag; + unsigned RegMaskVirtReg; + BitVector RegMaskUsable; + + // MachineFunctionPass boilerplate. + virtual void getAnalysisUsage(AnalysisUsage&) const; + virtual bool runOnMachineFunction(MachineFunction&); + virtual void releaseMemory(); +public: + static char ID; + LiveRegMatrix(); + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Check for interference before assigning virtual registers to physical + // registers. + // + + /// Invalidate cached interference queries after modifying virtual register + /// live ranges. Interference checks may return stale information unless + /// caches are invalidated. + void invalidateVirtRegs() { ++UserTag; } + + enum InterferenceKind { + /// No interference, go ahead and assign. + IK_Free = 0, + + /// Virtual register interference. There are interfering virtual registers + /// assigned to PhysReg or its aliases. This interference could be resolved + /// by unassigning those other virtual registers. + IK_VirtReg, + + /// Register unit interference. A fixed live range is in the way, typically + /// argument registers for a call. This can't be resolved by unassigning + /// other virtual registers. + IK_RegUnit, + + /// RegMask interference. The live range is crossing an instruction with a + /// regmask operand that doesn't preserve PhysReg. This typically means + /// VirtReg is live across a call, and PhysReg isn't call-preserved. + IK_RegMask + }; + + /// Check for interference before assigning VirtReg to PhysReg. + /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). + /// When there is more than one kind of interference, the InterferenceKind + /// with the highest enum value is returned. + InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); + + /// Assign VirtReg to PhysReg. + /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and + /// update VirtRegMap. The live range is expected to be available in PhysReg. + void assign(LiveInterval &VirtReg, unsigned PhysReg); + + /// Unassign VirtReg from its PhysReg. + /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes + /// the assignment and updates VirtRegMap accordingly. + void unassign(LiveInterval &VirtReg); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // Provide access to the underlying LiveIntervalUnions. + // + + /// Check for regmask interference only. + /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. + /// If PhysReg is null, check if VirtReg crosses any regmask operands. + bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); + + /// Check for regunit interference only. + /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's + /// register units. + bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg); + + /// Query a line of the assigned virtual register matrix directly. + /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. + /// This returns a reference to an internal Query data structure that is only + /// valid until the next query() call. + LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit); + + /// Directly access the live interval unions per regunit. + /// This returns an array indexed by the regunit number. + LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; } +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEREGMATRIX_H diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 5a0d97d..348ed3a 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -192,8 +192,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (!Def) continue; @@ -216,9 +216,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { PartDefRegs.insert(DefReg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg); - unsigned SubReg = *SubRegs; ++SubRegs) - PartDefRegs.insert(SubReg); + for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + PartDefRegs.insert(*SubRegs); } } return LastDef; @@ -247,8 +246,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); PhysRegDef[Reg] = LastPartialDef; SmallSet Processed; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (Processed.count(SubReg)) continue; if (PartDefRegs.count(SubReg)) @@ -259,7 +258,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { false/*IsDef*/, true/*IsImp*/)); PhysRegDef[SubReg] = LastPartialDef; - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) Processed.insert(*SS); } } @@ -271,9 +270,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { // Remember this use. PhysRegUse[Reg] = MI; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - PhysRegUse[SubReg] = MI; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + PhysRegUse[*SubRegs] = MI; } /// FindLastRefOrPartRef - Return the last reference or partial reference of @@ -287,8 +285,8 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; unsigned LastPartDefDist = 0; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { // There was a def of this sub-register in between. This is a partial @@ -336,8 +334,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { MachineInstr *LastPartDef = 0; unsigned LastPartDefDist = 0; SmallSet PartUses; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; if (Def && Def != LastDef) { // There was a def of this sub-register in between. This is a partial @@ -351,7 +349,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -367,8 +365,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // EAX = op AL // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (!PartUses.count(SubReg)) continue; bool NeedDef = true; @@ -388,11 +386,10 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); PhysRegUse[SubReg] = LastRefOrPartRef; - for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg); - unsigned SSReg = *SSRegs; ++SSRegs) - PhysRegUse[SSReg] = LastRefOrPartRef; + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PhysRegUse[*SS] = LastRefOrPartRef; } - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) PartUses.erase(*SS); } } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { @@ -434,7 +431,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { // Kill the largest clobbered super-register. // This avoids needless implicit operands. unsigned Super = Reg; - for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) Super = *SR; HandlePhysRegKill(Super, 0); @@ -447,11 +444,11 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, SmallSet Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { Live.insert(Reg); - for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) - Live.insert(*SS); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Live.insert(*SubRegs); } else { - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; // If a register isn't itself defined, but all parts that make up of it // are defined, then consider it also defined. // e.g. @@ -462,7 +459,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); - for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) Live.insert(*SS); } } @@ -472,8 +469,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, // is referenced. HandlePhysRegKill(Reg, MI); // Only some of the sub-registers are used. - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (!Live.count(SubReg)) // Skip if this sub-register isn't defined. continue; @@ -491,8 +488,8 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; } @@ -576,7 +573,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { unsigned MOReg = MO.getReg(); if (MO.isUse()) { MO.setIsKill(false); - UseRegs.push_back(MOReg); + if (MO.readsReg()) + UseRegs.push_back(MOReg); } else /*MO.isDef()*/ { MO.setIsDead(false); DefRegs.push_back(MOReg); @@ -732,8 +730,9 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] - .push_back(BBI->getOperand(i).getReg()); + if (BBI->getOperand(i).readsReg()) + PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI->getOperand(i).getReg()); } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 238bf52..fbc9e20 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -314,7 +314,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // No previously defined register was in range, so create a // new one. int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); - const TargetRegisterClass *RC = TRI->getPointerRegClass(); + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); DEBUG(dbgs() << " Materializing base register " << BaseReg << diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 1abb8f2..fa6b450 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -109,7 +109,8 @@ void ilist_traits::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. - N->RemoveRegOperandsFromUseLists(); + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(0); @@ -271,11 +272,9 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } - if (Alignment) { + if (Alignment) OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) << " bytes)"; - Comma = ", "; - } OS << '\n'; @@ -312,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { if (!succ_empty()) { if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } OS << '\n'; } } @@ -479,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) { void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New) { - uint32_t weight = 0; - succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + if (Old == New) + return; - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(SI); - weight = *WI; + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); - // Update the successor information. - removeSuccessor(SI); - addSuccessor(New, weight); + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); } void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { @@ -509,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t weight = 0; - + uint32_t Weight = 0; // If Weight list is empty it means we don't use it (disabled optimization). if (!fromMBB->Weights.empty()) - weight = *fromMBB->Weights.begin(); + Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, weight); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); } } @@ -528,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. @@ -542,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } } +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); - return I != Successors.end(); + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -596,6 +627,11 @@ bool MachineBasicBlock::canFallThrough() { MachineBasicBlock * MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (Succ->isLandingPad()) + return NULL; + MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere @@ -670,7 +706,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Inherit live-ins from the successor for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(), - E = Succ->livein_end(); I != E; ++I) + E = Succ->livein_end(); I != E; ++I) NMBB->addLiveIn(*I); // Update LiveVariables. diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 5ba6851..c4dca2c 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -11,7 +11,7 @@ // structure and branch probability estimates. // // The pass strives to preserve the structure of the CFG (that is, retain -// a topological ordering of basic blocks) in the absense of a *strong* signal +// a topological ordering of basic blocks) in the absence of a *strong* signal // to the contrary from probabilities. However, within the CFG structure, it // attempts to choose an ordering which favors placing more likely sequences of // blocks adjacent to each other. @@ -63,17 +63,13 @@ namespace { /// /// This is the datastructure representing a chain of consecutive blocks that /// are profitable to layout together in order to maximize fallthrough -/// probabilities. We also can use a block chain to represent a sequence of -/// basic blocks which have some external (correctness) requirement for -/// sequential layout. +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. /// -/// Eventually, the block chains will form a directed graph over the function. -/// We provide an SCC-supporting-iterator in order to quicky build and walk the -/// SCCs of block chains within a function. -/// -/// The block chains also have support for calculating and caching probability -/// information related to the chain itself versus other chains. This is used -/// for ranking during the final layout of block chains. +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. class BlockChain { /// \brief The sequence of blocks belonging to this chain. /// @@ -179,10 +175,11 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief Allocator and owner of BlockChain structures. /// - /// We build BlockChains lazily by merging together high probability BB - /// sequences acording to the "Algo2" in the paper mentioned at the top of - /// the file. To reduce malloc traffic, we allocate them using this slab-like - /// allocator, and destroy them after the pass completes. + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. SpecificBumpPtrAllocator ChainAllocator; /// \brief Function wide BasicBlock to BlockChain mapping. @@ -329,7 +326,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle // definition of edge weight semantics is encoded here as well. We should - // improve the MBPI interface to effeciently support query patterns such as + // improve the MBPI interface to efficiently support query patterns such as // this. uint32_t BestWeight = 0; uint32_t WeightScale = 0; @@ -988,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -1000,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } @@ -1053,7 +1098,7 @@ namespace { /// /// A separate pass to compute interesting statistics for evaluating block /// placement. This is separate from the actual placement pass so that they can -/// be computed in the absense of any placement transformations or when using +/// be computed in the absence of any placement transformations or when using /// alternative placement strategies. class MachineBlockPlacementStats : public MachineFunctionPass { /// \brief A handle to the branch probability pass. diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index a63688e..896461f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -84,7 +84,7 @@ namespace { bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, - MachineBasicBlock::const_iterator E) const ; + MachineBasicBlock::const_iterator E) const; bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, @@ -100,8 +100,7 @@ namespace { void ExitScope(MachineBasicBlock *MBB); bool ProcessBlock(MachineBasicBlock *MBB); void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap); + DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); }; } // end anonymous namespace @@ -216,11 +215,12 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - PhysRefs.insert(Reg); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - PhysRefs.insert(*Alias); } return !PhysRefs.empty(); @@ -326,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -396,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector, 8> CSEPairs; + SmallVector ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -437,7 +461,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // used, then it's not safe to replace it with a common subexpression. // It's also not safe if the instruction uses physical registers. bool CrossMBBPhysDef = false; - SmallSet PhysRefs; + SmallSet PhysRefs; SmallVector PhysDefs; if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) { FoundCSE = false; @@ -465,21 +489,31 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && "Do not CSE physical register defs!"); if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); DoCSE = false; break; } @@ -488,6 +522,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // within the register class of the new instruction. const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); DoCSE = false; break; } @@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -522,11 +562,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumCommutes; Changed = true; } else { - DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; @@ -537,8 +577,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { /// up the dominator tree to destroy ancestors which are now done. void MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap) { + DenseMap &OpenChildren) { if (OpenChildren[Node]) return; @@ -546,7 +585,7 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, ExitScope(Node->getBlock()); // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { + while (MachineDomTreeNode *Parent = Node->getIDom()) { unsigned Left = --OpenChildren[Parent]; if (Left != 0) break; @@ -558,7 +597,6 @@ MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { SmallVector Scopes; SmallVector WorkList; - DenseMap ParentMap; DenseMap OpenChildren; CurrVN = 0; @@ -573,7 +611,6 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { OpenChildren[Node] = NumChildren; for (unsigned i = 0; i != NumChildren; ++i) { MachineDomTreeNode *Child = Children[i]; - ParentMap[Child] = Node; WorkList.push_back(Child); } } while (!WorkList.empty()); @@ -586,7 +623,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { EnterScope(MBB); Changed |= ProcessBlock(MBB); // If it's a leaf node, it's done. Traverse upwards to pop ancestors. - ExitScopeIfDone(Node, OpenChildren, ParentMap); + ExitScopeIfDone(Node, OpenChildren); } return Changed; diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9730eaa..bac3aa2 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -62,28 +62,16 @@ void MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, SourceMap &SrcMap, DenseMap &AvailCopyMap) { - SourceMap::iterator SI = SrcMap.find(Reg); - if (SI != SrcMap.end()) { - const DestList& Defs = SI->second; - for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); - I != E; ++I) { - unsigned MappedDef = *I; - // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); - } - } - } - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - SI = SrcMap.find(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SourceMap::iterator SI = SrcMap.find(*AI); if (SI != SrcMap.end()) { const DestList& Defs = SI->second; for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) AvailCopyMap.erase(*SR); } } @@ -188,11 +176,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } // If Src is defined by a previous copy, it cannot be eliminated. - CI = CopyMap.find(Src); - if (CI != CopyMap.end()) - MaybeDeadCopies.remove(CI->second); - for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) { - CI = CopyMap.find(*AS); + for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { + CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } @@ -211,13 +196,13 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember Def is defined by the copy. // ... Make sure to clear the def maps of aliases first. - for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) { - CopyMap.erase(*AS); - AvailCopyMap.erase(*AS); + for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); } CopyMap[Def] = MI; AvailCopyMap[Def] = MI; - for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) { + for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } @@ -256,11 +241,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. - DenseMap::iterator CI = CopyMap.find(Reg); - if (CI != CopyMap.end()) - MaybeDeadCopies.remove(CI->second); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - CI = CopyMap.find(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + DenseMap::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) MaybeDeadCopies.remove(CI->second); } @@ -296,11 +278,9 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Reg = Defs[i]; // No longer defined by a copy. - CopyMap.erase(Reg); - AvailCopyMap.erase(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - CopyMap.erase(*AS); - AvailCopyMap.erase(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); } // If 'Reg' is previously source of a copy, it is no longer available for diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d8c2f6a..d4aede8a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -60,7 +60,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, MFInfo = 0; FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); if (Fn->hasFnAttr(Attribute::StackAlignment)) - FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( + FrameInfo->ensureMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); @@ -84,9 +84,13 @@ MachineFunction::~MachineFunction() { MFInfo->~MachineFunctionInfo(); Allocator.Deallocate(MFInfo); } - FrameInfo->~MachineFrameInfo(); Allocator.Deallocate(FrameInfo); - ConstantPool->~MachineConstantPool(); Allocator.Deallocate(ConstantPool); - + + FrameInfo->~MachineFrameInfo(); + Allocator.Deallocate(FrameInfo); + + ConstantPool->~MachineConstantPool(); + Allocator.Deallocate(ConstantPool); + if (JumpTableInfo) { JumpTableInfo->~MachineJumpTableInfo(); Allocator.Deallocate(JumpTableInfo); @@ -98,7 +102,7 @@ MachineFunction::~MachineFunction() { MachineJumpTableInfo *MachineFunction:: getOrCreateJumpTableInfo(unsigned EntryKind) { if (JumpTableInfo) return JumpTableInfo; - + JumpTableInfo = new (Allocator) MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); return JumpTableInfo; @@ -116,12 +120,12 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBI = begin(); else MBBI = MBB; - + // Figure out the block number this should have. unsigned BlockNo = 0; if (MBBI != begin()) BlockNo = prior(MBBI)->getNumber()+1; - + for (; MBBI != E; ++MBBI, ++BlockNo) { if (MBBI->getNumber() != (int)BlockNo) { // Remove use of the old number. @@ -130,7 +134,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { "MBB number mismatch!"); MBBNumbering[MBBI->getNumber()] = 0; } - + // If BlockNo is already taken, set that block's number to -1. if (MBBNumbering[BlockNo]) MBBNumbering[BlockNo]->setNumber(-1); @@ -138,7 +142,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { MBBNumbering[BlockNo] = MBBI; MBBI->setNumber(BlockNo); } - } + } // Okay, all the blocks are renumbered. If we have compactified the block // numbering, shrink MBBNumbering now. @@ -295,16 +299,16 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { // Print Frame Information FrameInfo->print(*this, OS); - + // Print JumpTable Information if (JumpTableInfo) JumpTableInfo->print(OS); // Print Constant Pool ConstantPool->print(OS); - + const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); - + if (RegInfo && !RegInfo->livein_empty()) { OS << "Function Live Ins: "; for (MachineRegisterInfo::livein_iterator @@ -324,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << ' ' << PrintReg(*I, TRI); OS << '\n'; } - + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; BB->print(OS, Indexes); @@ -411,10 +415,9 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { assert(JumpTableInfo && "No jump tables"); - assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); const MCAsmInfo &MAI = *getTarget().getMCAsmInfo(); - + const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() : MAI.getPrivateGlobalPrefix(); SmallString<60> Name; @@ -691,7 +694,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, else if (B->getType() != IntTy) B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, const_cast(B), TD); - + return A == B; } @@ -714,7 +717,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, Constants[i].Alignment = Alignment; return i; } - + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); return Constants.size()-1; } @@ -723,7 +726,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; - + // Check to see if we already have this constant. // // FIXME, this could be made much more efficient for large constant pools. diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 2aaa798..0102ac7 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -14,7 +14,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -28,6 +30,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { raw_ostream &OS; const std::string Banner; + MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { } MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) : MachineFunctionPass(ID), OS(os), Banner(banner) {} @@ -40,7 +43,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) { OS << "# " << Banner << ":\n"; - MF.print(OS); + MF.print(OS, getAnalysisIfAvailable()); return false; } }; @@ -48,6 +51,10 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass { char MachineFunctionPrinterPass::ID = 0; } +char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", + "Machine Function Printer", false, false) + namespace llvm { /// Returns a newly-created MachineFunction Printer pass. The /// default banner is empty. diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e553a04..b166849 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/LLVMContext.h" @@ -33,7 +34,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" @@ -47,55 +47,6 @@ using namespace llvm; // MachineOperand Implementation //===----------------------------------------------------------------------===// -/// AddRegOperandToRegInfo - Add this register operand to the specified -/// MachineRegisterInfo. If it is null, then the next/prev fields should be -/// explicitly nulled out. -void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { - assert(isReg() && "Can only add reg operand to use lists"); - - // If the reginfo pointer is null, just explicitly null out or next/prev - // pointers, to ensure they are not garbage. - if (RegInfo == 0) { - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; - return; - } - - // Otherwise, add this operand to the head of the registers use/def list. - MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - - // For SSA values, we prefer to keep the definition at the start of the list. - // we do this by skipping over the definition if it is at the head of the - // list. - if (*Head && (*Head)->isDef()) - Head = &(*Head)->Contents.Reg.Next; - - Contents.Reg.Next = *Head; - if (Contents.Reg.Next) { - assert(getReg() == Contents.Reg.Next->getReg() && - "Different regs on the same list!"); - Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; - } - - Contents.Reg.Prev = Head; - *Head = this; -} - -/// RemoveRegOperandFromRegInfo - Remove this register operand from the -/// MachineRegisterInfo it is linked with. -void MachineOperand::RemoveRegOperandFromRegInfo() { - assert(isOnRegUseList() && "Reg operand is not on a use list"); - // Unlink this from the doubly linked list of operands. - MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; - if (NextOp) { - assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); - NextOp->Contents.Reg.Prev = Contents.Reg.Prev; - } - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; -} - void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. @@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { - RemoveRegOperandFromRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); SmallContents.RegNo = Reg; - AddRegOperandToRegInfo(&MF->getRegInfo()); + MRI.addRegOperandToUseList(this); return; } @@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { setReg(Reg); } +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. - if (isReg() && getParent() && getParent()->getParent() && - getParent()->getParent()->getParent()) - RemoveRegOperandFromRegInfo(); + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; @@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the // register's use/def lists. - if (isReg()) { - assert(!isEarlyClobber()); - setReg(Reg); - } else { - // Otherwise, change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - AddRegOperandToRegInfo(&MF->getRegInfo()); - } + if (RegInfo && isReg()) + RegInfo->removeRegOperandFromUseList(this); + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -182,11 +151,18 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; - SubReg = 0; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); } /// isIdenticalTo - Return true if this operand is identical to the specified -/// operand. +/// operand. Note that this should stay in sync with the hash_value overload +/// below. bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { if (getType() != Other.getType() || getTargetFlags() != Other.getTargetFlags()) @@ -207,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_FrameIndex: return getIndex() == Other.getIndex(); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); case MachineOperand::MO_JumpTableIndex: return getIndex() == Other.getIndex(); @@ -227,6 +204,47 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { llvm_unreachable("Invalid machine operand type"); } +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getReg(), + MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getBlockAddress()); + case MachineOperand::MO_RegisterMask: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + } + llvm_unreachable("Invalid machine operand type"); +} + /// print - Print the specified machine operand. /// void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -255,12 +273,16 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "imp-"; OS << "def"; NeedComma = true; + // only makes sense when getSubReg() is set. + // Don't clutter the output otherwise. + if (isUndef() && getSubReg()) + OS << ",read-undef"; } else if (isImplicit()) { OS << "imp-use"; NeedComma = true; } - if (isKill() || isDead() || isUndef() || isInternalRead()) { + if (isKill() || isDead() || (isUndef() && isUse()) || isInternalRead()) { if (NeedComma) OS << ','; NeedComma = false; if (isKill()) { @@ -271,7 +293,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "dead"; NeedComma = true; } - if (isUndef()) { + if (isUndef() && isUse()) { if (NeedComma) OS << ','; OS << "undef"; NeedComma = true; @@ -308,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; + case MachineOperand::MO_TargetIndex: + OS << "'; + break; case MachineOperand::MO_JumpTableIndex: OS << "'; break; @@ -605,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists() { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); - } + MRI.removeRegOperandFromUseList(&Operands[i]); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(&RegInfo); - } + MRI.addRegOperandToUseList(&Operands[i]); } - /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list @@ -650,13 +674,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; if (RegInfo) - Operands[OpNo].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). - assert((isImpReg || MCID->isVariadic() || OpNo < MCID->getNumOperands()) && + // RegMask operands go between the explicit and implicit operands. + assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || + OpNo < MCID->getNumOperands()) && "Trying to add an operand to a machine instr that is already done!"); // All operands from OpNo have been removed from RegInfo. If the Operands @@ -665,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); // Insert the new operand at OpNo. Operands.insert(Operands.begin() + OpNo, Op); @@ -676,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); // When adding a register operand, tell RegInfo about it. if (Operands[OpNo].isReg()) { - // Add the new operand to RegInfo, even when RegInfo is NULL. - // This will initialize the linked list pointers. - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // Ensure isOnRegUseList() returns false, regardless of Op's status. + Operands[OpNo].Contents.Reg.Prev = 0; + // Add the new operand to RegInfo. + if (RegInfo) + RegInfo->addRegOperandToUseList(&Operands[OpNo]); // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); @@ -692,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (RegInfo) { for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -702,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. - if (Operands.back().isReg() && Operands.back().isOnRegUseList()) - Operands.back().RemoveRegOperandFromRegInfo(); + if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) + RegInfo->removeRegOperandFromUseList(&Operands.back()); Operands.pop_back(); return; @@ -716,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { // Otherwise, we are removing an interior operand. If we have reginfo to // update, remove all operands that will be shifted down from their reg lists, // move everything down, then re-add them. - MachineRegisterInfo *RegInfo = getRegInfo(); if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); } } @@ -729,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -868,7 +896,8 @@ void MachineInstr::eraseFromParent() { MBB->erase(MI); } } - getParent()->erase(this); + // Erase the individual instruction, which may itself be inside a bundle. + getParent()->erase_instr(this); } @@ -938,9 +967,13 @@ const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(getParent() && "Can't have an MBB reference here!"); + assert(getParent()->getParent() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getParent()->getParent(); + // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) - return TII->getRegClass(getDesc(), OpIdx, TRI); + return TII->getRegClass(getDesc(), OpIdx, TRI, MF); if (!getOperand(OpIdx).isReg()) return NULL; @@ -962,7 +995,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, // Assume that all registers in a memory operand are pointers. if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) - return TRI->getPointerRegClass(); + return TRI->getPointerRegClass(MF); return NULL; } @@ -1530,12 +1563,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { bool HasAliasLive = false; - for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg); - unsigned AliasReg = *Alias; ++Alias) + for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); + AI.isValid(); ++AI) { + unsigned AliasReg = *AI; if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { HasAliasLive = true; break; } + } if (!HasAliasLive) { OmittedAnyCallClobbers = true; continue; @@ -1667,7 +1702,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); - bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; SmallVector DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { @@ -1739,7 +1775,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); - bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); bool Found = false; SmallVector DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { @@ -1758,9 +1795,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, // There exists a super-register that's marked dead. if (RegInfo->isSuperRegister(IncomingReg, Reg)) return true; - if (RegInfo->getSubRegisters(IncomingReg) && - RegInfo->getSuperRegisters(Reg) && - RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(IncomingReg, Reg)) DeadOps.push_back(i); } } @@ -1841,52 +1876,16 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { // Build up a buffer of hash code components. - // - // FIXME: This is a total hack. We should have a hash_value overload for - // MachineOperand, but currently that doesn't work because there are many - // different ideas of "equality" and thus different sets of information that - // contribute to the hash code. This one happens to want to take a specific - // subset. And it's still not clear that this routine uses the *correct* - // subset of information when computing the hash code. The goal is to use the - // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to - // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would - // be very useful to factor the selection of relevant inputs out of the two - // functions and into a common routine, but it's not clear how that can be - // done. SmallVector HashComponents; HashComponents.reserve(MI->getNumOperands() + 1); HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - switch (MO.getType()) { - default: break; - case MachineOperand::MO_Register: - if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; // Skip virtual register defs. - HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); - break; - case MachineOperand::MO_Immediate: - HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); - break; - case MachineOperand::MO_FrameIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_JumpTableIndex: - HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); - break; - case MachineOperand::MO_MachineBasicBlock: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); - break; - case MachineOperand::MO_GlobalAddress: - HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); - break; - case MachineOperand::MO_BlockAddress: - HashComponents.push_back(hash_combine(MO.getType(), - MO.getBlockAddress())); - break; - case MachineOperand::MO_MCSymbol: - HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); - break; - } + if (MO.isReg() && MO.isDef() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + HashComponents.push_back(hash_value(MO)); } return hash_combine_range(HashComponents.begin(), HashComponents.end()); } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 73489a7..b7de7bf 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -169,8 +169,8 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } if (!MO.isDead()) { - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; if (LocalDefSet.insert(SubReg)) LocalDefs.push_back(SubReg); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 8c562cc..efec481 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -445,8 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } if (MO.isImplicit()) { - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegClobbers.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegClobbers.set(*AI); if (!MO.isDead()) // Non-dead implicit def? This cannot be hoisted. RuledOut = true; @@ -465,7 +465,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, // If we have already seen another instruction that defines the same // register, then this is not safe. Two defs is indicated by setting a // PhysRegClobbers bit. - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) { + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); if (PhysRegClobbers.test(*AS)) @@ -517,8 +517,8 @@ void MachineLICM::HoistRegionPostRA() { for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), E = BB->livein_end(); I != E; ++I) { unsigned Reg = *I; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - PhysRegDefs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegDefs.set(*AI); } SpeculationState = SpeculateUnknown; @@ -540,8 +540,8 @@ void MachineLICM::HoistRegionPostRA() { unsigned Reg = MO.getReg(); if (!Reg) continue; - for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) - TermRegs.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + TermRegs.set(*AI); } } @@ -1260,11 +1260,11 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (NewOpc == 0) return 0; const MCInstrDesc &MID = TII->get(NewOpc); if (MID.getNumDefs() != 1) return 0; - const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI); + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); - MachineFunction &MF = *MI->getParent()->getParent(); SmallVector NewMIs; bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 189cb2b..9f3829e 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -9,7 +9,7 @@ // // This file defines the MachineLoopInfo class that is used to identify natural // loops and determine the loop depth of various nodes of the CFG. Note that -// the loops identified may actually be several natural loops that share the +// the loops identified may actually be several natural loops that share the // same header node... not just a single natural loop. // //===----------------------------------------------------------------------===// @@ -17,17 +17,13 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Support/Debug.h" using namespace llvm; -namespace llvm { -#define MLB class LoopBase -TEMPLATE_INSTANTIATION(MLB); -#undef MLB -#define MLIB class LoopInfoBase -TEMPLATE_INSTANTIATION(MLIB); -#undef MLIB -} +// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. +template class llvm::LoopBase; +template class llvm::LoopInfoBase; char MachineLoopInfo::ID = 0; INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", @@ -40,7 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { releaseMemory(); - LI.Calculate(getAnalysis().getBase()); // Update + LI.Analyze(getAnalysis().getBase()); return false; } diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index 58e067b..cb204fd 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -18,6 +18,19 @@ using namespace llvm; void MachinePassRegistryListener::anchor() { } +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = 0; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + /// Add - Adds a function pass to the registration list. /// void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7ea1517..5fb938f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - - // Add a reg, but keep track of whether the vector reallocated or not. - const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); - void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); - - if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) - // The vector reallocated, handle this now. - HandleVRegListReallocation(); return Reg; } @@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() { VRegInfo.clear(); } -/// HandleVRegListReallocation - We just added a virtual register to the -/// VRegInfo info list and it reallocated. Update the use/def lists info -/// pointers. -void MachineRegisterInfo::HandleVRegListReallocation() { - // The back pointers for the vreg lists point into the previous vector. - // Update them to point to their correct slots. - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - MachineOperand *List = VRegInfo[Reg].second; - if (!List) continue; - // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[Reg].second; +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; } } +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -162,14 +201,20 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. def_iterator I = def_begin(Reg); + assert((I.atEnd() || llvm::next(I) == def_end()) && + "getVRegDef assumes a single definition or no definition"); return !I.atEnd() ? &*I : 0; } -bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); +/// getUniqueVRegDef - Return the unique machine instr that defines the +/// specified virtual register or null if none is found. If there are +/// multiple definitions or no definition, return null. +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { + if (def_empty(Reg)) return 0; + def_iterator I = def_begin(Reg); + if (llvm::next(I) != def_end()) + return 0; + return &*I; } bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { @@ -268,15 +313,15 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); // Check if any overlapping register is modified. - for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) - if (!def_empty(*R)) + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + if (!def_empty(*AI)) return false; // Check if any overlapping register is allocatable so it may be used later. if (AllocatableRegs.empty()) AllocatableRegs = TRI->getAllocatableSet(MF); - for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R) - if (AllocatableRegs.test(*R)) + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + if (AllocatableRegs.test(*AI)) return false; return true; } diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 070a557..076547a 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, } MachineSSAUpdater::~MachineSSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast(AV); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -241,30 +241,6 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { I->second = NewReg; } -/// MachinePHIiter - Iterator for PHI operands. This is used for the -/// PHI_iterator in the SSAUpdaterImpl template. -namespace { - class MachinePHIiter { - private: - MachineInstr *PHI; - unsigned idx; - - public: - explicit MachinePHIiter(MachineInstr *P) // begin iterator - : PHI(P), idx(1) {} - MachinePHIiter(MachineInstr *P, bool) // end iterator - : PHI(P), idx(PHI->getNumOperands()) {} - - MachinePHIiter &operator++() { idx += 2; return *this; } - bool operator==(const MachinePHIiter& x) const { return idx == x.idx; } - bool operator!=(const MachinePHIiter& x) const { return !operator==(x); } - unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } - MachineBasicBlock *getIncomingBlock() { - return PHI->getOperand(idx+1).getMBB(); - } - }; -} - /// SSAUpdaterTraits - Traits for the SSAUpdaterImpl /// template, specialized for MachineSSAUpdater. namespace llvm { @@ -279,7 +255,26 @@ public: static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } - typedef MachinePHIiter PHI_iterator; + /// Iterator for PHI operands. + class PHI_iterator { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit PHI_iterator(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + PHI_iterator(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + PHI_iterator &operator++() { idx += 2; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } static inline PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 1d3241b..a1dc948 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -17,9 +17,13 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -50,6 +54,15 @@ static bool ViewMISchedDAGs = false; // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// +MachineSchedContext::MachineSchedContext(): + MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + RegClassInfo = new RegisterClassInfo(); +} + +MachineSchedContext::~MachineSchedContext() { + delete RegClassInfo; +} + namespace { /// MachineScheduler runs after coalescing and before register allocation. class MachineScheduler : public MachineSchedContext, @@ -122,6 +135,29 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -139,6 +175,8 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); + // Initialize the context of the pass. MF = &mf; MLI = &getAnalysis(); @@ -149,6 +187,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + RegClassInfo->runOnMachineFunction(*MF); + // Select the scheduler, or set the default. MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; if (Ctor == useDefaultMachineSched) { @@ -163,13 +203,16 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { OwningPtr Scheduler(Ctor(this)); // Visit all machine basic blocks. + // + // TODO: Visit blocks in global postorder or postorder within the bottom-up + // loop tree. Then we can optionally compute global RegPressure. for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); MBB != MBBEnd; ++MBB) { Scheduler->startBlock(MBB); // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. RegionEnd points the the scheduling + // region as soon as it is discovered. RegionEnd points the scheduling // boundary at the bottom of the region. The DAG does not include RegionEnd, // but the region does (i.e. the next RegionEnd is above the previous // RegionBegin). If the current block has no terminator then RegionEnd == @@ -181,6 +224,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { unsigned RemainingCount = MBB->size(); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { @@ -207,7 +251,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->exitRegion(); continue; } - DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF->getFunction()->getName() << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; @@ -260,6 +305,9 @@ public: /// be scheduled at the bottom. virtual SUnit *pickNode(bool &IsTopNode) = 0; + /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled a node. + virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; + /// When all predecessor dependencies have been resolved, free this node for /// top-down scheduling. virtual void releaseTopNode(SUnit *SU) = 0; @@ -279,22 +327,45 @@ namespace { /// machine instructions while updating LiveIntervals. class ScheduleDAGMI : public ScheduleDAGInstrs { AliasAnalysis *AA; + RegisterClassInfo *RegClassInfo; MachineSchedStrategy *SchedImpl; + MachineBasicBlock::iterator LiveRegionEnd; + + /// Register pressure in this region computed by buildSchedGraph. + IntervalPressure RegPressure; + RegPressureTracker RPTracker; + + /// List of pressure sets that exceed the target's pressure limit before + /// scheduling, listed in increasing set ID order. Each pressure set is paired + /// with its max pressure in the currently scheduled regions. + std::vector RegionCriticalPSets; + /// The top of the unscheduled zone. MachineBasicBlock::iterator CurrentTop; + IntervalPressure TopPressure; + RegPressureTracker TopRPTracker; /// The bottom of the unscheduled zone. MachineBasicBlock::iterator CurrentBottom; + IntervalPressure BotPressure; + RegPressureTracker BotRPTracker; +#ifndef NDEBUG /// The number of instructions scheduled so far. Used to cut off the /// scheduler at the point determined by misched-cutoff. unsigned NumInstrsScheduled; +#endif public: ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), - AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), - NumInstrsScheduled(0) {} + AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), + RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure), + CurrentBottom(), BotRPTracker(BotPressure) { +#ifndef NDEBUG + NumInstrsScheduled = 0; +#endif + } ~ScheduleDAGMI() { delete SchedImpl; @@ -303,22 +374,68 @@ public: MachineBasicBlock::iterator top() const { return CurrentTop; } MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - /// Implement ScheduleDAGInstrs interface. + /// Implement the ScheduleDAGInstrs interface for handling the next scheduling + /// region. This covers all instructions in a block, while schedule() may only + /// cover a subset. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Implement ScheduleDAGInstrs interface for scheduling a sequence of + /// reorderable instructions. void schedule(); + /// Get current register pressure for the top scheduled instructions. + const IntervalPressure &getTopPressure() const { return TopPressure; } + const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } + + /// Get current register pressure for the bottom scheduled instructions. + const IntervalPressure &getBotPressure() const { return BotPressure; } + const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } + + /// Get register pressure for the entire scheduling region before scheduling. + const IntervalPressure &getRegPressure() const { return RegPressure; } + + const std::vector &getRegionCriticalPSets() const { + return RegionCriticalPSets; + } + + /// getIssueWidth - Return the max instructions per scheduling group. + unsigned getIssueWidth() const { + return (InstrItins && InstrItins->SchedModel) + ? InstrItins->SchedModel->IssueWidth : 1; + } + + /// getNumMicroOps - Return the number of issue slots required for this MI. + unsigned getNumMicroOps(MachineInstr *MI) const { + if (!InstrItins) return 1; + int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI); + } + protected: + void initRegPressure(); + void updateScheduledPressure(std::vector NewMaxPressure); + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); bool checkSchedLimit(); + void releaseRoots(); + void releaseSucc(SUnit *SU, SDep *SuccEdge); void releaseSuccessors(SUnit *SU); void releasePred(SUnit *SU, SDep *PredEdge); void releasePredecessors(SUnit *SU); + + void placeDebugValues(); }; } // namespace /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); @@ -345,6 +462,8 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When /// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); @@ -371,12 +490,17 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) { void ScheduleDAGMI::moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { - // Fix RegionBegin if the first instruction moves down. + // Advance RegionBegin if the first instruction moves down. if (&*RegionBegin == MI) - RegionBegin = llvm::next(RegionBegin); + ++RegionBegin; + + // Update the instruction stream. BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals LIS->handleMove(MI); - // Fix RegionBegin if another instruction moves above the first instruction. + + // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) RegionBegin = MI; } @@ -392,12 +516,114 @@ bool ScheduleDAGMI::checkSchedLimit() { return true; } +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = + (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void ScheduleDAGMI::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.getPressure().dump(TRI)); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) + BotRPTracker.recede(); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + std::vector RegionPressure = RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) + RegionCriticalPSets.push_back(PressureElement(i, 0)); + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].PSetID) << " "; + dbgs() << "\n"); +} + +// FIXME: When the pressure tracker deals in pressure differences then we won't +// iterate over all RegionCriticalPSets[i]. +void ScheduleDAGMI:: +updateScheduledPressure(std::vector NewMaxPressure) { + for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { + unsigned ID = RegionCriticalPSets[i].PSetID; + int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; + if ((int)NewMaxPressure[ID] > MaxUnits) + MaxUnits = NewMaxPressure[ID]; + } +} + +// Release all DAG roots for scheduling. +void ScheduleDAGMI::releaseRoots() { + SmallVector BotRoots; + + for (std::vector::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + // A SUnit is ready to top schedule if it has no predecessors. + if (I->Preds.empty()) + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + BotRoots.push_back(&(*I)); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) + SchedImpl->releaseBottomNode(*I); +} + /// schedule - Called back from MachineScheduler::runOnMachineFunction -/// after setting up the current scheduling region. +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. void ScheduleDAGMI::schedule() { - buildSchedGraph(AA); + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); - DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -410,22 +636,12 @@ void ScheduleDAGMI::schedule() { releasePredecessors(&ExitSU); // Release all DAG roots for scheduling. - for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); - I != E; ++I) { - // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); - // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - SchedImpl->releaseBottomNode(&(*I)); - } + releaseRoots(); - CurrentTop = RegionBegin; + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); CurrentBottom = RegionEnd; bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { - DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling Instruction:\n"; SU->dump(this)); if (!checkSchedLimit()) break; @@ -435,28 +651,69 @@ void ScheduleDAGMI::schedule() { if (IsTopNode) { assert(SU->isTopReady() && "node still has unscheduled dependencies"); if (&*CurrentTop == MI) - ++CurrentTop; - else + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + // Release dependent instructions for scheduling. releaseSuccessors(SU); } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); - if (&*llvm::prior(CurrentBottom) == MI) - --CurrentBottom; + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; else { - if (&*CurrentTop == MI) - CurrentTop = llvm::next(CurrentTop); + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } + // Update bottom scheduled pressure. + BotRPTracker.recede(); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + // Release dependent instructions for scheduling. releasePredecessors(SU); } SU->isScheduled = true; + SchedImpl->schedNode(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == llvm::prior(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = NULL; } //===----------------------------------------------------------------------===// @@ -464,56 +721,603 @@ void ScheduleDAGMI::schedule() { //===----------------------------------------------------------------------===// namespace { +/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience +/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified +/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. +class ReadyQueue { + unsigned ID; + std::string Name; + std::vector Queue; + +public: + ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} + + unsigned getID() const { return ID; } + + StringRef getName() const { return Name; } + + // SU is in this queue if it's NodeQueueID is a superset of this ID. + bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } + + bool empty() const { return Queue.empty(); } + + unsigned size() const { return Queue.size(); } + + typedef std::vector::iterator iterator; + + iterator begin() { return Queue.begin(); } + + iterator end() { return Queue.end(); } + + iterator find(SUnit *SU) { + return std::find(Queue.begin(), Queue.end(), SU); + } + + void push(SUnit *SU) { + Queue.push_back(SU); + SU->NodeQueueId |= ID; + } + + void remove(iterator I) { + (*I)->NodeQueueId &= ~ID; + *I = Queue.back(); + Queue.pop_back(); + } + + void dump() { + dbgs() << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; + } +}; + /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. class ConvergingScheduler : public MachineSchedStrategy { + + /// Store the state used by ConvergingScheduler heuristics, required for the + /// lifetime of one invocation of pickNode(). + struct SchedCandidate { + // The best SUnit candidate. + SUnit *SU; + + // Register pressure values for the best candidate. + RegPressureDelta RPDelta; + + SchedCandidate(): SU(NULL) {} + }; + /// Represent the type of SchedCandidate found within a single queue. + enum CandResult { + NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure }; + + /// Each Scheduling boundary is associated with ready queues. It tracks the + /// current cycle in whichever direction at has moved, and maintains the state + /// of "hazards" and other interlocks at the current cycle. + struct SchedBoundary { + ScheduleDAGMI *DAG; + + ReadyQueue Available; + ReadyQueue Pending; + bool CheckPending; + + ScheduleHazardRecognizer *HazardRec; + + unsigned CurrCycle; + unsigned IssueCount; + + /// MinReadyCycle - Cycle of the soonest available instruction. + unsigned MinReadyCycle; + + // Remember the greatest min operand latency. + unsigned MaxMinLatency; + + /// Pending queues extend the ready queues with the same ID and the + /// PendingFlag set. + SchedBoundary(unsigned ID, const Twine &Name): + DAG(0), Available(ID, Name+".A"), + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0), + MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + + ~SchedBoundary() { delete HazardRec; } + + bool isTop() const { + return Available.getID() == ConvergingScheduler::TopQID; + } + + bool checkHazard(SUnit *SU); + + void releaseNode(SUnit *SU, unsigned ReadyCycle); + + void bumpCycle(); + + void bumpNode(SUnit *SU); + + void releasePending(); + + void removeReady(SUnit *SU); + + SUnit *pickOnlyChoice(); + }; + ScheduleDAGMI *DAG; + const TargetRegisterInfo *TRI; - unsigned NumTopReady; - unsigned NumBottomReady; + // State of the top and bottom scheduled instruction boundaries. + SchedBoundary Top; + SchedBoundary Bot; public: - virtual void initialize(ScheduleDAGMI *dag) { - DAG = dag; + /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) + enum { + TopQID = 1, + BotQID = 2, + LogMaxQID = 2 + }; + + ConvergingScheduler(): + DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initialize(ScheduleDAGMI *dag); + + virtual SUnit *pickNode(bool &IsTopNode); + + virtual void schedNode(SUnit *SU, bool IsTopNode); + + virtual void releaseTopNode(SUnit *SU); + + virtual void releaseBottomNode(SUnit *SU); + +protected: + SUnit *pickNodeBidrectional(bool &IsTopNode); - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + CandResult pickNodeFromQueue(ReadyQueue &Q, + const RegPressureTracker &RPTracker, + SchedCandidate &Candidate); +#ifndef NDEBUG + void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU, + PressureElement P = PressureElement()); +#endif +}; +} // namespace + +void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { + DAG = dag; + TRI = DAG->TRI; + Top.DAG = dag; + Bot.DAG = dag; + + // Initialize the HazardRecognizers. + const TargetMachine &TM = DAG->MF.getTarget(); + const InstrItineraryData *Itin = TM.getInstrItineraryData(); + Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); +} + +void ConvergingScheduler::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true); +#ifndef NDEBUG + Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency); +#endif + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } + Top.releaseNode(SU, SU->TopReadyCycle); +} - virtual SUnit *pickNode(bool &IsTopNode) { - if (DAG->top() == DAG->bottom()) - return NULL; +void ConvergingScheduler::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; - // As an initial placeholder heuristic, schedule in the direction that has - // the fewest choices. - SUnit *SU; - if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { - SU = DAG->getSUnit(DAG->top()); - IsTopNode = true; + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; + unsigned Latency = + DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true); +#ifndef NDEBUG + Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency); +#endif + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; + } + Bot.releaseNode(SU, SU->BotReadyCycle); +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled()) + return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; + + if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) + return true; + + return false; +} + +void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + if (ReadyCycle > CurrCycle || checkHazard(SU)) + Pending.push(SU); + else + Available.push(SU); +} + +/// Move the boundary of scheduled code by one cycle. +void ConvergingScheduler::SchedBoundary::bumpCycle() { + unsigned Width = DAG->getIssueWidth(); + IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; + + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); } - else { - SU = DAG->getSUnit(llvm::prior(DAG->bottom())); - IsTopNode = false; + } + CheckPending = true; + + DEBUG(dbgs() << "*** " << Available.getName() << " cycle " + << CurrCycle << '\n'); +} + +/// Move the boundary of scheduled code by one SUnit. +void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); } - if (SU->isTopReady()) { - assert(NumTopReady > 0 && "bad ready count"); - --NumTopReady; + HazardRec->EmitInstruction(SU); + } + // Check the instruction group dispatch limit. + // TODO: Check if this SU must end a dispatch group. + IssueCount += DAG->getNumMicroOps(SU->getInstr()); + if (IssueCount >= DAG->getIssueWidth()) { + DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + bumpCycle(); + } +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void ConvergingScheduler::SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// advance the cycle until at least one node is ready. If multiple instructions +/// are ready, return NULL. +SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + for (unsigned i = 0; Available.empty(); ++i) { + assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + "permanent hazard"); (void)i; + bumpCycle(); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return NULL; +} + +#ifndef NDEBUG +void ConvergingScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, + SUnit *SU, PressureElement P) { + dbgs() << Label << " " << Q.getName() << " "; + if (P.isValid()) + dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease + << " "; + else + dbgs() << " "; + SU->dump(DAG); +} +#endif + +/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is +/// more desirable than RHS from scheduling standpoint. +static bool compareRPDelta(const RegPressureDelta &LHS, + const RegPressureDelta &RHS) { + // Compare each component of pressure in decreasing order of importance + // without checking if any are valid. Invalid PressureElements are assumed to + // have UnitIncrease==0, so are neutral. + + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) + return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; + + // Avoid increasing the max critical pressure in the scheduled region. + if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) + return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; + + // Avoid increasing the max pressure of the entire region. + if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) + return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; + + return false; +} + +/// Pick the best candidate from the top queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +ConvergingScheduler::CandResult ConvergingScheduler:: +pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, + SchedCandidate &Candidate) { + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast(RPTracker); + + // BestSU remains NULL if no top candidates beat the best existing candidate. + CandResult FoundCandidate = NoCand; + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + RegPressureDelta RPDelta; + TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + + // Initialize the candidate if needed. + if (!Candidate.SU) { + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = NodeOrder; + continue; + } + // Avoid exceeding the target's limit. + if (RPDelta.Excess.UnitIncrease < Candidate.RPDelta.Excess.UnitIncrease) { + DEBUG(traceCandidate("ECAND", Q, *I, RPDelta.Excess)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleExcess; + continue; + } + if (RPDelta.Excess.UnitIncrease > Candidate.RPDelta.Excess.UnitIncrease) + continue; + if (FoundCandidate == SingleExcess) + FoundCandidate = MultiPressure; + + // Avoid increasing the max critical pressure in the scheduled region. + if (RPDelta.CriticalMax.UnitIncrease + < Candidate.RPDelta.CriticalMax.UnitIncrease) { + DEBUG(traceCandidate("PCAND", Q, *I, RPDelta.CriticalMax)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleCritical; + continue; + } + if (RPDelta.CriticalMax.UnitIncrease + > Candidate.RPDelta.CriticalMax.UnitIncrease) + continue; + if (FoundCandidate == SingleCritical) + FoundCandidate = MultiPressure; + + // Avoid increasing the max pressure of the entire region. + if (RPDelta.CurrentMax.UnitIncrease + < Candidate.RPDelta.CurrentMax.UnitIncrease) { + DEBUG(traceCandidate("MCAND", Q, *I, RPDelta.CurrentMax)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = SingleMax; + continue; } - if (SU->isBottomReady()) { - assert(NumBottomReady > 0 && "bad ready count"); - --NumBottomReady; + if (RPDelta.CurrentMax.UnitIncrease + > Candidate.RPDelta.CurrentMax.UnitIncrease) + continue; + if (FoundCandidate == SingleMax) + FoundCandidate = MultiPressure; + + // Fall through to original instruction order. + // Only consider node order if Candidate was chosen from this Q. + if (FoundCandidate == NoCand) + continue; + + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) + || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + DEBUG(traceCandidate("NCAND", Q, *I)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + FoundCandidate = NodeOrder; } + } + return FoundCandidate; +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *ConvergingScheduler::pickNodeBidrectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; return SU; } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + return SU; + } + SchedCandidate BotCand; + // Prefer bottom scheduling when heuristics are silent. + CandResult BotResult = pickNodeFromQueue(Bot.Available, + DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if (BotResult == SingleExcess || BotResult == SingleCritical) { + IsTopNode = false; + return BotCand.SU; + } + // Check if the top Q has a better candidate. + SchedCandidate TopCand; + CandResult TopResult = pickNodeFromQueue(Top.Available, + DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + + if (TopResult == SingleExcess || TopResult == SingleCritical) { + IsTopNode = true; + return TopCand.SU; + } + // If either Q has a single candidate that minimizes pressure above the + // original region's pressure pick it. + if (BotResult == SingleMax) { + IsTopNode = false; + return BotCand.SU; + } + if (TopResult == SingleMax) { + IsTopNode = true; + return TopCand.SU; + } + // Check for a salient pressure difference and pick the best from either side. + if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { + IsTopNode = true; + return TopCand.SU; + } + // Otherwise prefer the bottom candidate in node order. + IsTopNode = false; + return BotCand.SU; +} - virtual void releaseTopNode(SUnit *SU) { - ++NumTopReady; +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return NULL; } - virtual void releaseBottomNode(SUnit *SU) { - ++NumBottomReady; + SUnit *SU; + if (ForceTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + SchedCandidate TopCand; + CandResult TopResult = + pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + assert(TopResult != NoCand && "failed to find the first candidate"); + (void)TopResult; + SU = TopCand.SU; + } + IsTopNode = true; } -}; -} // namespace + else if (ForceBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + SchedCandidate BotCand; + CandResult BotResult = + pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + assert(BotResult != NoCand && "failed to find the first candidate"); + (void)BotResult; + SU = BotCand.SU; + } + IsTopNode = false; + } + else { + SU = pickNodeBidrectional(IsTopNode); + } + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; + SU->dump(DAG)); + return SU; +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update +/// it's state based on the current cycle before MachineSchedStrategy does. +void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = Top.CurrCycle; + Top.bumpNode(SU); + } + else { + SU->BotReadyCycle = Bot.CurrCycle; + Bot.bumpNode(SU); + } +} /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. @@ -592,6 +1396,8 @@ public: return SU; } + virtual void schedNode(SUnit *SU, bool IsTopNode) {} + virtual void releaseTopNode(SUnit *SU) { TopQ.push(SU); } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1ce546b..bc383cb 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -99,6 +99,16 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + // We give successors with smaller loop depth higher priority. + SmallVector Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..1a3aa60 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1153 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + ItinData = MF->getTarget().getInstrItineraryData(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis(); + BlockInfo.resize(MF->getNumBlockIDs()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef Blocks; + SmallPtrSet Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl &Deps, + SparseSet &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector Kills; + SmallVector LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, + Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet &RegUnits, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. + if (I->MI) + DepHeight += TII->computeOperandLatency(ItinData, + MI, MO.getOperandNo(), + I->MI, I->Op); + else + // No UseMI. Just use the MI latency instead. + DepHeight += TII->getInstrLatency(ItinData, MI); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists +/// of all the blocks in Trace. Stop when reaching the block that contains +/// DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, + ArrayRef Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps.front().DefMI, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.ItinData, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps[i].DefMI, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, + Dep.DefMI, Dep.DefOp, + PHI, Dep.UseOp, + /* FindMin = */ false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 0000000..c5b86f3 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,341 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InstrItineraryData; +class MachineBasicBlock; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *ItinData; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + void verifyAnalysis() const; + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// A virtual register or regunit required by a basic block or its trace + /// successors. + struct LiveInReg { + /// The virtual register required, or a register unit. + unsigned Reg; + + /// For virtual registers: Minimum height of the defining instruction. + /// For regunits: Height of the highest user in the trace. + unsigned Height; + + LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + }; + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + /// Valid when hasValidDepth(). + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + /// Valid when hasValidHeight(). + const MachineBasicBlock *Succ; + + /// The block number of the head of the trace. (When hasValidDepth()). + unsigned Head; + + /// The block number of the tail of the trace. (When hasValidHeight()). + unsigned Tail; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : + Pred(0), Succ(0), + InstrDepth(~0u), InstrHeight(~0u), + HasValidInstrDepths(false), HasValidInstrHeights(false) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + + // Data-dependency-related information. Per-instruction depth and height + // are computed from data dependencies in the current trace, using + // itinerary data. + + /// Instruction depths have been computed. This implies hasValidDepth(). + bool HasValidInstrDepths; + + /// Instruction heights have been computed. This implies hasValidHeight(). + bool HasValidInstrHeights; + + /// Critical path length. This is the number of cycles in the longest data + /// dependency chain through the trace. This is only valid when both + /// HasValidInstrDepths and HasValidInstrHeights are set. + unsigned CriticalPath; + + /// Live-in registers. These registers are defined above the current block + /// and used by this block or a block below it. + /// This does not include PHI uses in the current block, but it does + /// include PHI uses in deeper blocks. + SmallVector LiveIns; + + void print(raw_ostream&) const; + }; + + /// InstrCycles represents the cycle height and depth of an instruction in a + /// trace. + struct InstrCycles { + /// Earliest issue cycle as determined by data dependencies and instruction + /// latencies from the beginning of the trace. Data dependencies from + /// before the trace are not included. + unsigned Depth; + + /// Minimum number of cycles from this instruction is issued to the of the + /// trace, as determined by data dependencies and instruction latencies. + unsigned Height; + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + + /// Return the resource depth of the top/bottom of the trace center block. + /// This is the number of cycles required to execute all instructions from + /// the trace head to the trace center block. The resource depth only + /// considers execution resources, it ignores data dependencies. + /// When Bottom is set, instructions in the trace center block are included. + unsigned getResourceDepth(bool Bottom) const; + + /// Return the resource length of the trace. This is the number of cycles + /// required to execute the instructions in the trace if they were all + /// independent, exposing the maximum instruction-level parallelism. + /// + /// Any blocks in Extrablocks are included as if they were part of the + /// trace. + unsigned getResourceLength(ArrayRef Extrablocks = + ArrayRef()) const; + + /// Return the length of the (data dependency) critical path through the + /// trace. + unsigned getCriticalPath() const { return TBI.CriticalPath; } + + /// Return the depth and height of MI. The depth is only valid for + /// instructions in or above the trace center block. The height is only + /// valid for instructions in or below the trace center block. + InstrCycles getInstrCycles(const MachineInstr *MI) const { + return TE.Cycles.lookup(MI); + } + + /// Return the slack of MI. This is the number of cycles MI can be delayed + /// before the critical path becomes longer. + /// MI must be an instruction in the trace center block. + unsigned getInstrSlack(const MachineInstr *MI) const; + + /// Return the Depth of a PHI instruction in a trace center block successor. + /// The PHI does not have to be part of the trace. + unsigned getPHIDepth(const MachineInstr *PHI) const; + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector BlockInfo; + DenseMap Cycles; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); + void computeInstrDepths(const MachineBasicBlock*); + void computeInstrHeights(const MachineBasicBlock*); + void addLiveIns(const MachineInstr *DefMI, + ArrayRef Trace); + + protected: + MachineTraceMetrics &MTM; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + const MachineLoop *getLoopFor(const MachineBasicBlock*) const; + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() const =0; + void print(raw_ostream&) const; + void invalidate(const MachineBasicBlock *MBB); + void verify() const; + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + /// + /// This invalidates per-block information about resource usage for MBB only, + /// and it invalidates per-trace information for any trace that passes + /// through MBB. + /// + /// Call Ensemble::getTrace() again to update any trace handles. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Ensemble &En) { + En.print(OS); + return OS; +} +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 74ba94d..172402e 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -89,8 +89,8 @@ namespace { void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) - RV.push_back(*R); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RV.push_back(*SubRegs); } struct BBInfo { @@ -191,9 +191,11 @@ namespace { void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineBundleBefore(const MachineInstr *MI); void visitMachineInstrBefore(const MachineInstr *MI); void visitMachineOperand(const MachineOperand *MO, unsigned MONum); void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBundleAfter(const MachineInstr *MI); void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); void visitMachineFunctionAfter(); @@ -201,6 +203,10 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); @@ -210,6 +216,10 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -288,6 +298,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); + // Keep track of the current bundle header. + const MachineInstr *CurBundle = 0; for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { @@ -295,15 +307,21 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { *OS << "Instruction: " << *MBBI; continue; } - // Skip BUNDLE instruction for now. FIXME: We should add code to verify - // the BUNDLE's specifically. - if (MBBI->isBundle()) - continue; + // Is this a bundle header? + if (!MBBI->isInsideBundle()) { + if (CurBundle) + visitMachineBundleAfter(CurBundle); + CurBundle = MBBI; + visitMachineBundleBefore(CurBundle); + } else if (!CurBundle) + report("No bundle header", MBBI); visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); visitMachineInstrAfter(MBBI); } + if (CurBundle) + visitMachineBundleAfter(CurBundle); visitMachineBasicBlockAfter(MFI); } visitMachineFunctionAfter(); @@ -340,9 +358,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getName() - << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")"; + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -367,6 +385,28 @@ void MachineVerifier::report(const char *msg, *OS << "\n"; } +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -384,10 +424,10 @@ void MachineVerifier::visitMachineFunctionBefore() { // A sub-register of a reserved register is also reserved for (int Reg = regsReserved.find_first(); Reg>=0; Reg = regsReserved.find_next(Reg)) { - for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { // FIXME: This should probably be: - // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); - regsReserved.set(*Sub); + // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register"); + regsReserved.set(*SubRegs); } } @@ -466,8 +506,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().isBarrier() && - !TII->isPredicated(&MBB->back())) { + if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && + !TII->isPredicated(getBundleStart(&MBB->back()))) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -487,10 +527,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -510,10 +550,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().isBarrier()) { + } else if (getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -530,10 +570,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().isBarrier()) { + } else if (!getBundleStart(&MBB->back())->isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().isTerminator()) { + } else if (!getBundleStart(&MBB->back())->isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -554,8 +594,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { continue; } regsLive.insert(*I); - for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++) - regsLive.insert(*R); + for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; @@ -564,8 +604,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { regsLive.insert(I); - for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++) - regsLive.insert(*R); + for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); } regsKilled.clear(); @@ -575,6 +615,30 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { lastIndex = Indexes->getMBBStartIdx(MBB); } +// This function gets called for all bundle headers, including normal +// stand-alone unbundled instructions. +void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { + if (Indexes && Indexes->hasIndex(MI)) { + SlotIndex idx = Indexes->getInstructionIndex(MI); + if (!(idx > lastIndex)) { + report("Instruction index out of order", MI); + *OS << "Last instruction was at " << lastIndex << '\n'; + } + lastIndex = idx; + } + + // Ensure non-terminators don't follow terminators. + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { + if (!FirstTerminator) + FirstTerminator = MI; + } else if (FirstTerminator) { + report("Non-terminator instruction after the first terminator", MI); + *OS << "First terminator was:\t" << *FirstTerminator; + } +} + void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); if (MI->getNumOperands() < MCID.getNumOperands()) { @@ -608,17 +672,6 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Ensure non-terminators don't follow terminators. - // Ignore predicated terminators formed by if conversion. - // FIXME: If conversion shouldn't need to violate this rule. - if (MI->isTerminator() && !TII->isPredicated(MI)) { - if (!FirstTerminator) - FirstTerminator = MI; - } else if (FirstTerminator) { - report("Non-terminator instruction after the first terminator", MI); - *OS << "First terminator was:\t" << *FirstTerminator; - } - StringRef ErrorInfo; if (!TII->verifyInstruction(MI, ErrorInfo)) report(ErrorInfo.data(), MI); @@ -634,7 +687,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MONum < MCID.getNumDefs()) { if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); - else if (!MO->isDef()) + else if (!MO->isDef() && !MCOI.isOptionalDef()) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); @@ -662,6 +715,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -672,7 +731,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Illegal subregister index for physical register", MO, MONum); return; } - if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(Reg) << " is not a " @@ -698,7 +758,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; } } - if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) { + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { if (SubIdx) { const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(RC); @@ -761,20 +822,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (MO->readsReg()) { regsLiveInButUnused.erase(Reg); - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) + if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. @@ -786,23 +834,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } @@ -812,6 +881,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Reserved registers may be used even when 'dead'. if (!isReserved(Reg)) report("Using an undefined physical register", MO, MONum); + } else if (MRI->def_empty(Reg)) { + report("Reading virtual register without a def", MO, MONum); } else { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; // We don't know which virtual registers are live in, so only complain @@ -841,12 +912,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + if (VNI->def != DefIdx) { report("Inconsistent valno->def", MO, MONum); *OS << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; @@ -863,6 +935,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { +} + +// This function gets called after visiting all instructions in a bundle. The +// argument points to the bundle header. +// Normal stand-alone instructions are also considered 'bundles', and this +// function is called for all of them. +void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); @@ -876,15 +955,6 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { } set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); - - if (Indexes && Indexes->hasIndex(MI)) { - SlotIndex idx = Indexes->getInstructionIndex(MI); - if (!(idx > lastIndex)) { - report("Instruction index out of order", MI); - *OS << "Last instruction was at " << lastIndex << '\n'; - } - lastIndex = idx; - } } void @@ -1025,7 +1095,21 @@ void MachineVerifier::visitMachineFunctionAfter() { // Now check liveness info if available calcRegsRequired(); - if (MRI->isSSA() && !MF->empty()) { + // Check for killed virtual registers that should be live out. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + if (MInfo.regsKilled.count(*I)) { + report("Virtual register killed in block, but needed live out.", MFI); + *OS << "Virtual register " << PrintReg(*I) + << " is used after the block.\n"; + } + } + + if (!MF->empty()) { BBInfo &MInfo = MBBInfoMap[&MF->front()]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; @@ -1069,292 +1153,298 @@ void MachineVerifier::verifyLiveVariables() { void MachineVerifier::verifyLiveIntervals() { assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); - for (LiveIntervals::const_iterator LVI = LiveInts->begin(), - LVE = LiveInts->end(); LVI != LVE; ++LVI) { - const LiveInterval &LI = *LVI->second; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); // Spilling and splitting may leave unused registers around. Skip them. - if (MRI->use_empty(LI.reg)) + if (MRI->reg_nodbg_empty(Reg)) continue; - // Physical registers have much weirdness going on, mostly from coalescing. - // We should probably fix it, but for now just ignore them. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg)) + if (!LiveInts->hasInterval(Reg)) { + report("Missing live interval for virtual register", MF); + *OS << PrintReg(Reg, TRI) << " still has defs or uses\n"; continue; + } - assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); + const LiveInterval &LI = LiveInts->getInterval(Reg); + assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) { - VNInfo *VNI = *I; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} - if (!DefVNI) { - if (!VNI->isUnused()) { - report("Valno not live at def and not marked unused", MF); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } - continue; - } +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; - if (VNI->isUnused()) - continue; + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (DefVNI != VNI) { - report("Live range at def has different valno", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; - continue; - } + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); - if (!MBB) { - report("Invalid definition index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } - if (VNI->isPHIDef()) { - if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() - << " in " << LI << '\n'; - } - } else { - // Non-PHI def. - const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); - if (!MI) { - report("No instruction at def index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->regsOverlap(LI.reg, MOI->getReg())) - continue; - } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF); - I->print(*OS); - *OS << " has a valno not in " << LI << '\n'; - } + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} - if (VNI->isUnused()) { - report("Live range valno is marked unused", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - } +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); - if (!MBB) { - report("Bad start of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } - SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } - const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); - if (!EndMBB) { - report("Bad end of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } - // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) - continue; + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) continue; - } + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } - // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", MI); + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); I->print(*OS); *OS << " in " << LI << '\n'; } - - if (I->end.isDead()) { - // Segment ends on the dead slot. - // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == E || (I+1)->start != I->end) { - report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; } + } + } - // The following checks only apply to virtual registers. Physreg liveness - // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - bool hasRead = false; - bool hasDeadDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) - continue; - if (MOI->readsReg()) - hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; - } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { - if (!hasRead) { - report("Instruction ending live range doesn't read the register", - MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - } + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } - // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { - // Not live-in to any blocks. - if (MBB == EndMBB) - continue; - // Skip this block. - ++MFI; + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; } - for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); - // We don't know how to track physregs into a landing pad. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && - MFI->isLandingPad()) { - if (&*MFI == EndMBB) - break; - ++MFI; - continue; - } - // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - - if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) - continue; - - if (!PVNI) { - report("Register not marked live out of predecessor", *PI); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << " in " << LI << '\n'; - continue; - } - if (PVNI != VNI) { - report("Different value live out of predecessor", *PI); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n'; - } - } - if (&*MFI == EndMBB) - break; - ++MFI; + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; } } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} - // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF); - *OS << NumComp << " components in " << LI << '\n'; - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; } } } } - diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 0ed4c34..e6e23da 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -171,23 +171,30 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, return true; } +/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// This includes registers with no defs. +static bool isImplicitlyDefined(unsigned VirtReg, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(VirtReg), + DE = MRI->def_end(); DI != DE; ++DI) + if (!DI->isImplicitDef()) + return false; + return true; +} + /// isSourceDefinedByImplicitDef - Return true if all sources of the phi node /// are implicit_def's. static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, const MachineRegisterInfo *MRI) { - for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { - unsigned SrcReg = MPhi->getOperand(i).getReg(); - const MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (!DefMI || !DefMI->isImplicitDef()) + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) return false; - } return true; } - /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assuption that it needs to be lowered in a way that supports +/// under the assumption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// @@ -287,7 +294,8 @@ void PHIElimination::LowerAtomicPHINode( for (int i = NumSrcs - 1; i >= 0; --i) { unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); - + bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || + isImplicitlyDefined(SrcReg, MRI); assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); @@ -295,14 +303,6 @@ void PHIElimination::LowerAtomicPHINode( // path the PHI. MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // If source is defined by an implicit def, there is no need to insert a - // copy. - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) { - ImpDefs.insert(DefMI); - continue; - } - // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. @@ -315,12 +315,27 @@ void PHIElimination::LowerAtomicPHINode( findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. - if (!reusedIncoming && IncomingReg) - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg); + if (!reusedIncoming && IncomingReg) { + if (SrcUndef) { + // The source register is undefined, so there is no need for a real + // COPY, but we still need to ensure joint dominance by defs. + // Insert an IMPLICIT_DEF instruction. + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + + // Clean up the old implicit-def, if there even was one. + if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) + if (DefMI->isImplicitDef()) + ImpDefs.insert(DefMI); + } else { + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); + } + } // Now update live variable information if we have it. Otherwise we're done - if (!LV) continue; + if (SrcUndef || !LV) continue; // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live @@ -340,39 +355,35 @@ void PHIElimination::LowerAtomicPHINode( // add a kill marker in this block saying that it kills the incoming value! if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the - // register. In most cases this is the copy, however, the first - // terminator instruction at the end of the block may also use the value. - // In this case, we should mark *it* as being the killing block, not the - // copy. - MachineBasicBlock::iterator KillInst; - MachineBasicBlock::iterator Term = opBlock.getFirstTerminator(); - if (Term != opBlock.end() && Term->readsRegister(SrcReg)) { - KillInst = Term; - - // Check that no other terminators use values. -#ifndef NDEBUG - for (MachineBasicBlock::iterator TI = llvm::next(Term); - TI != opBlock.end(); ++TI) { - if (TI->isDebugValue()) - continue; - assert(!TI->readsRegister(SrcReg) && - "Terminator instructions cannot use virtual registers unless" - "they are the first terminator in a block!"); - } -#endif - } else if (reusedIncoming || !IncomingReg) { - // We may have to rewind a bit if we didn't insert a copy this time. - KillInst = Term; - while (KillInst != opBlock.begin()) { - --KillInst; - if (KillInst->isDebugValue()) - continue; - if (KillInst->readsRegister(SrcReg)) - break; + // register. In most cases this is the copy, however, terminator + // instructions at the end of the block may also use the value. In this + // case, we should mark the last such terminator as being the killing + // block, not the copy. + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); } - } else { - // We just inserted this copy. - KillInst = prior(InsertPos); } assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); @@ -412,28 +423,71 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); + bool Changed = false; for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); - // We break edges when registers are live out from the predecessor block - // (not considering PHI nodes). If the register is live in to this block - // anyway, we would gain nothing from splitting. + // Is there a critical edge from PreMBB to MBB? + if (PreMBB->succ_size() == 1) + continue; + // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB != &MBB && - !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { - if (!MLI || - !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && - MLI->isLoopHeader(&MBB))) { - if (PreMBB->SplitCriticalEdge(&MBB, this)) { - Changed = true; - ++NumCriticalEdgesSplit; - } - } + if (PreMBB == &MBB) + continue; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + if (IsLoopHeader && PreLoop == CurLoop) + continue; + + // LV doesn't consider a phi use live-out, so isLiveOut only returns true + // when the source register is live-out for some other reason than a phi + // use. That means the copy we will insert in PreMBB won't be a kill, and + // there is a risk it may not be coalesced away. + // + // If the copy would be a kill, there is no need to split the edge. + if (!LV.isLiveOut(Reg, *PreMBB)) + continue; + + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + + // If Reg is not live-in to MBB, it means it must be live-in to some + // other PreMBB successor, and we can avoid the interference by splitting + // the edge. + // + // If Reg *is* live-in to MBB, the interference is inevitable and a copy + // is likely to be left after coalescing. If we are looking at a loop + // exiting edge, split it so we won't insert code in the loop, otherwise + // don't bother. + bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + + // Check for a loop exiting edge. + if (!ShouldSplit && CurLoop != PreLoop) { + DEBUG({ + dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; + if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + }); + // This edge could be entering a loop, exiting a loop, or it could be + // both: Jumping directly form one loop to the header of a sibling + // loop. + // Split unless this edge is entering CurLoop from an outer loop. + ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); + } + if (!ShouldSplit) + continue; + if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + DEBUG(dbgs() << "Failed to split ciritcal edge.\n"); + continue; } + Changed = true; + ++NumCriticalEdgesSplit; } } return Changed; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 490547b..cfa3eec 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -48,6 +49,8 @@ static cl::opt DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt DisableMachineDCE("disable-machine-dce", cl::Hidden, cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt EnableEarlyIfConversion("enable-early-ifcvt", cl::Hidden, + cl::desc("Enable Early If-conversion")); static cl::opt DisableMachineLICM("disable-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); static cl::opt DisableMachineCSE("disable-machine-cse", cl::Hidden, @@ -80,15 +83,23 @@ static cl::opt PrintGCInfo("print-gc", cl::Hidden, static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); +static cl::opt +PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, + cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified")); + +// Experimental option to run live inteerval analysis early. +static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. /// These should be converted to boolOrDefault in order to use applyOverride. -static AnalysisID applyDisable(AnalysisID ID, bool Override) { +static AnalysisID applyDisable(AnalysisID PassID, bool Override) { if (Override) - return &NoPassID; - return ID; + return 0; + return PassID; } /// Allow Pass selection to be overriden by command line options. This supports @@ -101,13 +112,13 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override, case cl::BOU_UNSET: return TargetID; case cl::BOU_TRUE: - if (TargetID != &NoPassID) + if (TargetID) return TargetID; - if (StandardID == &NoPassID) + if (StandardID == 0) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: - return &NoPassID; + return 0; } llvm_unreachable("Invalid command line option state"); } @@ -149,6 +160,9 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { if (StandardID == &DeadMachineInstructionElimID) return applyDisable(TargetID, DisableMachineDCE); + if (StandardID == &EarlyIfConverterID) + return applyDisable(TargetID, !EnableEarlyIfConversion); + if (StandardID == &MachineLICMID) return applyDisable(TargetID, DisableMachineLICM); @@ -178,9 +192,6 @@ INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", "Target Pass Configuration", false, false) char TargetPassConfig::ID = 0; -static char NoPassIDAnchor = 0; -char &llvm::NoPassID = NoPassIDAnchor; - // Pseudo Pass IDs. char TargetPassConfig::EarlyTailDuplicateID = 0; char TargetPassConfig::PostRAMachineLICMID = 0; @@ -193,9 +204,13 @@ public: // that are part of a standard pass pipeline without overridding the entire // pipeline. This mechanism allows target options to inherit a standard pass's // user interface. For example, a target may disable a standard pass by - // default by substituting NoPass, and the user may still enable that standard - // pass with an explicit command line option. + // default by substituting a pass ID of zero, and the user may still enable + // that standard pass with an explicit command line option. DenseMap TargetPasses; + + /// Store the pairs of of which the second pass + /// is inserted after each instance of the first one. + SmallVector, 4> InsertedPasses; }; } // namespace llvm @@ -207,7 +222,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), TM(tm), PM(&pm), Impl(0), Initialized(false), + : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), + Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -218,11 +234,22 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) initializeCodeGen(*PassRegistry::getPassRegistry()); // Substitute Pseudo Pass IDs for real ones. - substitutePass(EarlyTailDuplicateID, TailDuplicateID); - substitutePass(PostRAMachineLICMID, MachineLICMID); + substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); + substitutePass(&PostRAMachineLICMID, &MachineLICMID); + + // Disable early if-conversion. Targets that are ready can enable it. + disablePass(&EarlyIfConverterID); // Temporarily disable experimental passes. - substitutePass(MachineSchedulerID, NoPassID); + substitutePass(&MachineSchedulerID, 0); +} + +/// Insert InsertedPassID pass after TargetPassID. +void TargetPassConfig::insertPass(AnalysisID TargetPassID, + AnalysisID InsertedPassID) { + assert(TargetPassID != InsertedPassID && "Insert a pass after itself!"); + std::pair P(TargetPassID, InsertedPassID); + Impl->InsertedPasses.push_back(P); } /// createPassConfig - Create a pass configuration object to be used by @@ -244,8 +271,9 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) { Opt = Val; } -void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) { - Impl->TargetPasses[&StandardID] = &TargetID; +void TargetPassConfig::substitutePass(AnalysisID StandardID, + AnalysisID TargetID) { + Impl->TargetPasses[StandardID] = TargetID; } AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { @@ -256,29 +284,62 @@ AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const { return I->second; } -/// Add a CodeGen pass at this point in the pipeline after checking for target -/// and command line overrides. -AnalysisID TargetPassConfig::addPass(char &ID) { +/// Add a pass to the PassManager if that pass is supposed to be run. If the +/// Started/Stopped flags indicate either that the compilation should start at +/// a later pass or that it should stop after an earlier pass, then do not add +/// the pass. Finally, compare the current pass against the StartAfter +/// and StopAfter options and change the Started/Stopped flags accordingly. +void TargetPassConfig::addPass(Pass *P) { assert(!Initialized && "PassConfig is immutable"); - AnalysisID TargetID = getPassSubstitution(&ID); - AnalysisID FinalID = overridePass(&ID, TargetID); - if (FinalID == &NoPassID) + // Cache the Pass ID here in case the pass manager finds this pass is + // redundant with ones already scheduled / available, and deletes it. + // Fundamentally, once we add the pass to the manager, we no longer own it + // and shouldn't reference it. + AnalysisID PassID = P->getPassID(); + + if (Started && !Stopped) + PM->add(P); + if (StopAfter == PassID) + Stopped = true; + if (StartAfter == PassID) + Started = true; + if (Stopped && !Started) + report_fatal_error("Cannot stop compilation after pass that is not run"); +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { + AnalysisID TargetID = getPassSubstitution(PassID); + AnalysisID FinalID = overridePass(PassID, TargetID); + if (FinalID == 0) return FinalID; Pass *P = Pass::createPass(FinalID); if (!P) llvm_unreachable("Pass ID not registered"); - PM->add(P); + addPass(P); + // Add the passes after the pass P if there is any. + for (SmallVector, 4>::iterator + I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); + I != E; ++I) { + if ((*I).first == PassID) { + assert((*I).second && "Illegal Pass ID!"); + Pass *NP = Pass::createPass((*I).second); + assert(NP && "Pass ID not registered"); + addPass(NP); + } + } return FinalID; } -void TargetPassConfig::printAndVerify(const char *Banner) const { +void TargetPassConfig::printAndVerify(const char *Banner) { if (TM->shouldPrintMachineCode()) - PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); + addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM->add(createMachineVerifierPass(Banner)); + addPass(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -288,46 +349,73 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - PM->add(createTypeBasedAliasAnalysisPass()); - PM->add(createBasicAliasAnalysisPass()); + addPass(createTypeBasedAliasAnalysisPass()); + addPass(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - PM->add(createLoopStrengthReducePass(getTargetLowering())); + addPass(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) - PM->add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } - PM->add(createGCLoweringPass()); + addPass(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. - PM->add(createUnreachableBlockEliminationPass()); + addPass(createUnreachableBlockEliminationPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +void TargetPassConfig::addPassesToHandleExceptions() { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::Win64: + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + addPass(createLowerInvokePass(TM->getTargetLowering())); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(createUnreachableBlockEliminationPass()); + break; + } } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - PM->add(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(getTargetLowering())); - PM->add(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(getTargetLowering())); addPreISel(); if (PrintISelInput) - PM->add(createPrintFunctionPass("\n\n" + addPass(createPrintFunctionPass("\n\n" "*** Final LLVM Code input to ISel ***\n", &dbgs())); // All passes which modify the LLVM IR are now complete; run the verifier // to ensure that the IR is valid. if (!DisableVerify) - PM->add(createVerifierPass()); + addPass(createVerifierPass()); } /// Add the complete set of target-independent postISel code generator passes. @@ -349,11 +437,26 @@ void TargetPassConfig::addISelPrepare() { /// TODO: We could use a single addPre/Post(ID) hook to allow pass injection /// before/after any target-independent pass. But it's currently overkill. void TargetPassConfig::addMachinePasses() { + // Insert a machine instr printer pass after the specified pass. + // If -print-machineinstrs specified, print machineinstrs after all passes. + if (StringRef(PrintMachineInstrs.getValue()).equals("")) + TM->Options.PrintMachineCode = true; + else if (!StringRef(PrintMachineInstrs.getValue()) + .equals("option-unspecified")) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); + const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); + assert (TPI && IPI && "Pass ID not registered!"); + const char *TID = (char *)(TPI->getTypeInfo()); + const char *IID = (char *)(IPI->getTypeInfo()); + insertPass(TID, IID); + } + // Print the instruction selected machine code... printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(ExpandISelPseudosID); + addPass(&ExpandISelPseudosID); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -362,7 +465,7 @@ void TargetPassConfig::addMachinePasses() { else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); } // Run pre-ra passes. @@ -381,7 +484,7 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After PostRegAlloc passes"); // Insert prolog/epilog code. Eliminate abstract frame index references... - addPass(PrologEpilogCodeInserterID); + addPass(&PrologEpilogCodeInserterID); printAndVerify("After PrologEpilogCodeInserter"); /// Add passes that optimize machine instructions after register allocation. @@ -389,7 +492,7 @@ void TargetPassConfig::addMachinePasses() { addMachineLateOptimization(); // Expand pseudo instructions before second scheduling pass. - addPass(ExpandPostRAPseudosID); + addPass(&ExpandPostRAPseudosID); printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. @@ -398,14 +501,14 @@ void TargetPassConfig::addMachinePasses() { // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { - addPass(PostRASchedulerID); + addPass(&PostRASchedulerID); printAndVerify("After PostRAScheduler"); } // GC - addPass(GCMachineCodeAnalysisID); + addPass(&GCMachineCodeAnalysisID); if (PrintGCInfo) - PM->add(createGCInfoPrinter(dbgs())); + addPass(createGCInfoPrinter(dbgs())); // Basic block placement. if (getOptLevel() != CodeGenOpt::None) @@ -418,30 +521,31 @@ void TargetPassConfig::addMachinePasses() { /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(EarlyTailDuplicateID) != &NoPassID) + if (addPass(&EarlyTailDuplicateID)) printAndVerify("After Pre-RegAlloc TailDuplicate"); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(OptimizePHIsID); + addPass(&OptimizePHIsID); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). - addPass(DeadMachineInstructionElimID); + addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(MachineLICMID); - addPass(MachineCSEID); - addPass(MachineSinkingID); + addPass(&EarlyIfConverterID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&MachineSinkingID); printAndVerify("After Machine LICM, CSE and Sinking passes"); - addPass(PeepholeOptimizerID); + addPass(&PeepholeOptimizerID); printAndVerify("After codegen peephole optimization pass"); } @@ -519,10 +623,10 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - addPass(PHIEliminationID); - addPass(TwoAddressInstructionPassID); + addPass(&PHIEliminationID); + addPass(&TwoAddressInstructionPassID); - PM->add(RegAllocPass); + addPass(RegAllocPass); printAndVerify("After Register Allocation"); } @@ -530,42 +634,51 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&ProcessImplicitDefsID); + // LiveVariables currently requires pure SSA form. // // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). - addPass(LiveVariablesID); + addPass(&LiveVariablesID); // Add passes that move from transformed SSA into conventional SSA. This is a // "copy coalescing" problem. // if (!EnableStrongPHIElim) { // Edge splitting is smarter with machine loop info. - addPass(MachineLoopInfoID); - addPass(PHIEliminationID); + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); } - addPass(TwoAddressInstructionPassID); - // FIXME: Either remove this pass completely, or fix it so that it works on - // SSA form. We could modify LiveIntervals to be independent of this pass, But - // it would be even better to simply eliminate *all* IMPLICIT_DEFs before - // leaving SSA. - addPass(ProcessImplicitDefsID); + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) - addPass(StrongPHIEliminationID); + addPass(&StrongPHIEliminationID); - addPass(RegisterCoalescerID); + addPass(&RegisterCoalescerID); // PreRA instruction scheduling. - if (addPass(MachineSchedulerID) != &NoPassID) + if (addPass(&MachineSchedulerID)) printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. - PM->add(RegAllocPass); - printAndVerify("After Register Allocation"); + addPass(RegAllocPass); + printAndVerify("After Register Allocation, before rewriter"); + + // Allow targets to change the register assignments before rewriting. + if (addPreRewrite()) + printAndVerify("After pre-rewrite passes"); + + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); + printAndVerify("After Virtual Register Rewriter"); // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature, // but eventually, all users of it should probably be moved to addPostRA and @@ -579,12 +692,12 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. - addPass(StackSlotColoringID); + addPass(&StackSlotColoringID); // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? - addPass(PostRAMachineLICMID); + addPass(&PostRAMachineLICMID); printAndVerify("After StackSlotColoring and postra Machine LICM"); } @@ -596,33 +709,33 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. - if (addPass(BranchFolderPassID) != &NoPassID) + if (addPass(&BranchFolderPassID)) printAndVerify("After BranchFolding"); // Tail duplication. - if (addPass(TailDuplicateID) != &NoPassID) + if (addPass(&TailDuplicateID)) printAndVerify("After TailDuplicate"); // Copy propagation. - if (addPass(MachineCopyPropagationID) != &NoPassID) + if (addPass(&MachineCopyPropagationID)) printAndVerify("After copy propagation pass"); } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID ID = &NoPassID; + AnalysisID PassID = 0; if (!DisableBlockPlacement) { // MachineBlockPlacement is a new pass which subsumes the functionality of // CodPlacementOpt. The old code placement pass can be restored by // disabling block placement, but eventually it will be removed. - ID = addPass(MachineBlockPlacementID); + PassID = addPass(&MachineBlockPlacementID); } else { - ID = addPass(CodePlacementOptID); + PassID = addPass(&CodePlacementOptID); } - if (ID != &NoPassID) { + if (PassID) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) - addPass(MachineBlockPlacementStatsID); + addPass(&MachineBlockPlacementStatsID); printAndVerify("After machine block placement."); } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 9c5c029..6bc7e37 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -31,6 +31,15 @@ // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. // +// Another instance, in this code: +// +// sub r1, r3 | sub r1, imm +// cmp r3, r1 or cmp r1, r3 | cmp r1, imm +// bge L1 +// +// If the branch instruction can use flag from "sub", then we can replace +// "sub" with "subs" and eliminate the "cmp" instruction. +// // - Optimize Bitcast pairs: // // v1 = bitcast v0 @@ -69,6 +78,7 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -95,16 +105,17 @@ namespace { } private: - bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); - bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); - bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); - bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -116,7 +127,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", "Peephole Optimizations", false, false) -/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads +/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads /// a single register and writes a single register and it does not modify the /// source, and if the source value is preserved as a sub-register of the /// result, then replace all reachable uses of the source with the subreg of the @@ -126,7 +137,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: -OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, +optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) @@ -136,16 +147,30 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; - MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); - if (++UI == MRI->use_nodbg_end()) + if (MRI->hasOneNonDBGUse(SrcReg)) // No other uses. return false; + // Ensure DstReg can get a register class that actually supports + // sub-registers. Don't change the class until we commit. + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); + if (!DstRC) + return false; + + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = TM->getRegisterInfo()-> + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet ReachedBBs; - UI = MRI->use_nodbg_begin(DstReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) ReachedBBs.insert(UI->getParent()); @@ -156,8 +181,8 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallVector ExtendedUses; bool ExtendLife = true; - UI = MRI->use_nodbg_begin(SrcReg); - for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; @@ -169,6 +194,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + // It's an error to translate this: // // %reg1025 = %reg1024 @@ -223,9 +252,9 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Look for PHI uses of the extended result, we don't want to extend the // liveness of a PHI input. It breaks all kinds of assumptions down // stream. A PHI use is expected to be the kill of its source values. - UI = MRI->use_nodbg_begin(DstReg); for (MachineRegisterInfo::use_nodbg_iterator - UE = MRI->use_nodbg_end(); UI != UE; ++UI) + UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end(); + UI != UE; ++UI) if (UI->isPHI()) PHIBBs.insert(UI->getParent()); @@ -238,14 +267,20 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; // About to add uses of DstReg, clear DstReg's kill flags. - if (!Changed) + if (!Changed) { MRI->clearKillFlags(DstReg); + MRI->constrainRegClass(DstReg, DstRC); + } unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } UseMO->setReg(NewVR); ++NumReuse; Changed = true; @@ -255,7 +290,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast /// a value cross register classes), and the source is defined by another /// bitcast instruction B. And if the register class of source of B matches @@ -265,7 +300,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, /// %vreg3 = VMOVRS %vreg0 /// Replace all uses of vreg3 with vreg1. -bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, +bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB) { unsigned NumDefs = MI->getDesc().getNumDefs(); unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; @@ -327,22 +362,23 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, return true; } -/// OptimizeCmpInstr - If the instruction is a compare and the previous +/// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. -bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, +bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. - unsigned SrcReg; + unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; - if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg)) + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. - if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++NumCmps; return true; } @@ -350,6 +386,30 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, return false; } +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { @@ -368,10 +428,10 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, return false; } -/// FoldImmediate - Try folding register operands that are defined by move +/// foldImmediate - Try folding register operands that are defined by move /// immediate instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. -bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, +bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs) { for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { @@ -407,6 +467,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -414,6 +475,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; bool First = true; MachineBasicBlock::iterator PMII; @@ -422,15 +484,20 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = &*MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { + FoldAsLoadDefReg = 0; ++MII; continue; } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; if (MI->isBitcast()) { - if (OptimizeBitcastInstr(MI, MBB)) { + if (optimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; @@ -438,7 +505,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } } else if (MI->isCompare()) { - if (OptimizeCmpInstr(MI, MBB)) { + if (optimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); Changed = true; @@ -450,11 +517,36 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { - Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + Changed |= optimizeExtInstr(MI, MBB, LocalMIs); if (SeenMoveImm) - Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); + Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + PMII = FoldMI; + MII = llvm::next(PMII); + continue; + } + } First = false; PMII = MII; ++MII; diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 24d3e5a..7449ff5 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,7 +22,6 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -31,6 +30,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -78,7 +78,6 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { - AliasAnalysis *AA; const TargetInstrInfo *TII; RegisterClassInfo RegClassInfo; @@ -206,6 +205,10 @@ SchedulePostRATDList::SchedulePostRATDList( const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); HazardRec = TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + + assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || + MRI.tracksLiveness()) && + "Live-ins must be accurate for anti-dependency breaking"); AntiDepBreak = ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : @@ -423,9 +426,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } else { @@ -437,9 +439,8 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } } @@ -464,10 +465,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); - for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg); - *Subreg; ++Subreg) { - if (LiveRegs.test(*Subreg)) { - MI->addOperand(MachineOperand::CreateReg(*Subreg, + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MI->addOperand(MachineOperand::CreateReg(*SubRegs, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -517,9 +517,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { LiveRegs.reset(Reg); // Repeat for all subregs. - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.reset(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); } // Examine all used registers and set/clear kill flag. When a @@ -536,9 +535,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { if (!killedRegs.test(Reg)) { kill = true; // A register is not killed if any subregs are live... - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - if (LiveRegs.test(*Subreg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { kill = false; break; } @@ -570,9 +568,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { LiveRegs.set(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) - LiveRegs.set(*Subreg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); } } } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 1ad3479..34d075c 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -9,297 +9,163 @@ #define DEBUG_TYPE "processimplicitdefs" -#include "llvm/CodeGen/ProcessImplicitDefs.h" - -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" - using namespace llvm; +namespace { +/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def +/// for each use. Add isUndef marker to implicit_def defs and their uses. +class ProcessImplicitDefs : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + SmallSetVector WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + static char ID; + + ProcessImplicitDefs() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &au) const; + + virtual bool runOnMachineFunction(MachineFunction &fn); +}; +} // end anonymous namespace + char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved(); - AU.addPreserved(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.addPreservedID(TwoAddressInstructionPassID); - AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } -bool -ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, - unsigned Reg, unsigned OpIdx, - SmallSet &ImpDefRegs) { - switch(OpIdx) { - case 1: - return MI->isCopy() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - case 2: - return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() || - ImpDefRegs.count(MI->getOperand(0).getReg())); - default: return false; - } -} - -static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - SmallSet &ImpDefRegs) { - if (MI->isCopy()) { - MachineOperand &MO0 = MI->getOperand(0); - MachineOperand &MO1 = MI->getOperand(1); - if (MO1.getReg() != Reg) - return false; - if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg())) - return true; +bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { + if (!MI->isCopyLike() && + !MI->isInsertSubreg() && + !MI->isRegSequence() && + !MI->isPHI()) return false; - } - return false; + for (MIOperands MO(MI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse() && MO->readsReg()) + return false; + return true; } -/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure -/// there is one implicit_def for each use. Add isUndef marker to -/// implicit_def defs and their uses. -bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { - - DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" - << "********** Function: " - << ((Value*)fn.getFunction())->getName() << '\n'); - - bool Changed = false; - - TII = fn.getTarget().getInstrInfo(); - TRI = fn.getTarget().getRegisterInfo(); - MRI = &fn.getRegInfo(); - LV = getAnalysisIfAvailable(); - - SmallSet ImpDefRegs; - SmallVector ImpDefMIs; - SmallVector RUses; - SmallPtrSet Visited; - SmallPtrSet ModInsts; - - MachineBasicBlock *Entry = fn.begin(); - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MachineBasicBlock *MBB = *DFI; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ) { - MachineInstr *MI = &*I; - ++I; - if (MI->isImplicitDef()) { - ImpDefMIs.push_back(MI); - // Is this a sub-register read-modify-write? - if (MI->getOperand(0).readsReg()) - continue; - unsigned Reg = MI->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS) - ImpDefRegs.insert(*SS); - } +void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { + DEBUG(dbgs() << "Processing " << *MI); + unsigned Reg = MI->getOperand(0).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // For virtual regiusters, mark all uses as , and convert users to + // implicit-def when possible. + for (MachineRegisterInfo::use_nodbg_iterator UI = + MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &MO = UI.getOperand(); + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + if (!canTurnIntoImplicitDef(UserMI)) continue; - } - - // Eliminate %reg1032:sub = COPY undef. - if (MI->isCopy() && MI->getOperand(0).readsReg()) { - MachineOperand &MO = MI->getOperand(1); - if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { - if (LV && MO.isKill()) { - LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); - vi.removeKill(MI); - } - unsigned Reg = MI->getOperand(0).getReg(); - MI->eraseFromParent(); - Changed = true; - - // A REG_SEQUENCE may have been expanded into partial definitions. - // If this was the last one, mark Reg as implicitly defined. - if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI->def_empty(Reg)) - ImpDefRegs.insert(Reg); - continue; - } - } - - bool ChangedToImpDef = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.readsReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!ImpDefRegs.count(Reg)) - continue; - // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { - bool isKill = MO.isKill(); - MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); - if (isKill) { - ImpDefRegs.erase(Reg); - if (LV) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(MI); - } - } - ChangedToImpDef = true; - Changed = true; - break; - } - - Changed = true; - MO.setIsUndef(); - // This is a partial register redef of an implicit def. - // Make sure the whole register is defined by the instruction. - if (MO.isDef()) { - MI->addRegisterDefined(Reg); - continue; - } - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { - // Make sure other reads of Reg are also marked . - for (unsigned j = i+1; j != e; ++j) { - MachineOperand &MOJ = MI->getOperand(j); - if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg()) - MOJ.setIsUndef(); - } - ImpDefRegs.erase(Reg); - } - } - - if (ChangedToImpDef) { - // Backtrack to process this new implicit_def. - --I; - } else { - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - ImpDefRegs.erase(MO.getReg()); - } - } + DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + WorkList.insert(UserMI); } + MI->eraseFromParent(); + return; + } - // Any outstanding liveout implicit_def's? - for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { - MachineInstr *MI = ImpDefMIs[i]; - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !ImpDefRegs.count(Reg)) { - // Delete all "local" implicit_def's. That include those which define - // physical registers since they cannot be liveout. - MI->eraseFromParent(); - Changed = true; + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + MachineBasicBlock::instr_iterator UserMI = MI; + MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); + bool Found = false; + for (++UserMI; UserMI != UserE; ++UserMI) { + for (MIOperands MO(UserMI); MO.isValid(); ++MO) { + if (!MO->isReg()) continue; - } - - // If there are multiple defs of the same register and at least one - // is not an implicit_def, do not insert implicit_def's before the - // uses. - bool Skip = false; - SmallVector DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), - DE = MRI->def_end(); DI != DE; ++DI) { - MachineInstr *DeadImpDef = &*DI; - if (!DeadImpDef->isImplicitDef()) { - Skip = true; - break; - } - DeadImpDefs.push_back(DeadImpDef); - } - if (Skip) + unsigned UserReg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + !TRI->regsOverlap(Reg, UserReg)) continue; + // UserMI uses or redefines Reg. Set flags on all uses. + Found = true; + if (MO->isUse()) + MO->setIsUndef(); + } + if (Found) + break; + } - // The only implicit_def which we want to keep are those that are live - // out of its block. - for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) - DeadImpDefs[j]->eraseFromParent(); - Changed = true; - - // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - if (UI.getOperand().isUndef()) - continue; - MachineInstr *RMI = &*UI; - if (ModInsts.insert(RMI)) - RUses.push_back(RMI); - } + // If we found the using MI, we can erase the IMPLICIT_DEF. + if (Found) { + DEBUG(dbgs() << "Physreg user: " << *UserMI); + MI->eraseFromParent(); + return; + } - for (unsigned i = 0, e = RUses.size(); i != e; ++i) { - MachineInstr *RMI = RUses[i]; + // Using instr wasn't found, it could be in another block. + // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + for (unsigned i = MI->getNumOperands() - 1; i; --i) + MI->RemoveOperand(i); + DEBUG(dbgs() << "Keeping physreg: " << *MI); +} - // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, ImpDefRegs)) { - RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); +/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into +/// operands. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { - bool isKill = false; - SmallVector Ops; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - Ops.push_back(j); - if (RRMO.isKill()) - isKill = true; - } - } - // Leave the other operands along. - for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { - unsigned OpIdx = Ops[j]; - RMI->RemoveOperand(OpIdx-j); - } + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " + << ((Value*)MF.getFunction())->getName() << '\n'); - // Update LiveVariables varinfo if the instruction is a kill. - if (LV && isKill) { - LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); - vi.removeKill(RMI); - } - continue; - } + bool Changed = false; - // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = MRI->getRegClass(Reg); - unsigned NewVReg = MRI->createVirtualRegister(RC); - bool isKill = true; - for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { - MachineOperand &RRMO = RMI->getOperand(j); - if (RRMO.isReg() && RRMO.getReg() == Reg) { - RRMO.setReg(NewVReg); - RRMO.setIsUndef(); - if (isKill) { - // Only the first operand of NewVReg is marked kill. - RRMO.setIsKill(); - isKill = false; - } - } - } - } - RUses.clear(); - ModInsts.clear(); - } - ImpDefRegs.clear(); - ImpDefMIs.clear(); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); + assert(WorkList.empty() && "Inconsistent worklist state"); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; ++MFI) { + // Scan the basic block for implicit defs. + for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) + if (MBBI->isImplicitDef()) + WorkList.insert(MBBI); + + if (WorkList.empty()) + continue; + + DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + << " implicit defs.\n"); + Changed = true; + + // Drain the WorkList to recursively process any new implicit defs. + do processImplicitDef(WorkList.pop_back_val()); + while (!WorkList.empty()); } - return Changed; } - diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 458915e..c791ffb 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -302,7 +302,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (! ShrinkWrapThisFunction) { + if (!ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index b00eceb..993dbc7 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" +#include "LiveRegMatrix.h" #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" @@ -34,8 +35,6 @@ using namespace llvm; -STATISTIC(NumAssigned , "Number of registers assigned"); -STATISTIC(NumUnassigned , "Number of registers unassigned"); STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside @@ -47,85 +46,20 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), const char *RegAllocBase::TimerGroupName = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; -#ifndef NDEBUG -// Verify each LiveIntervalUnion. -void RegAllocBase::verify() { - LiveVirtRegBitSet VisitedVRegs; - OwningArrayPtr - unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); - - // Verify disjoint unions. - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); - LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; - PhysReg2LiveUnion[PhysReg].verify(VRegs); - // Union + intersection test could be done efficiently in one pass, but - // don't add a method to SparseBitVector unless we really need it. - assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); - VisitedVRegs |= VRegs; - } - - // Verify vreg coverage. - for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); - liItr != liEnd; ++liItr) { - unsigned reg = liItr->first; - if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; - if (!VRM->hasPhys(reg)) continue; // spilled? - unsigned PhysReg = VRM->getPhys(reg); - if (!unionVRegs[PhysReg].test(reg)) { - dbgs() << "LiveVirtReg " << reg << " not in union " << - TRI->getName(PhysReg) << "\n"; - llvm_unreachable("unallocated live vreg"); - } - } - // FIXME: I'm not sure how to verify spilled intervals. -} -#endif //!NDEBUG - //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// -// Instantiate a LiveIntervalUnion for each physical register. -void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, - unsigned NRegs) { - NumRegs = NRegs; - Array = - static_cast(malloc(sizeof(LiveIntervalUnion)*NRegs)); - for (unsigned r = 0; r != NRegs; ++r) - new(Array + r) LiveIntervalUnion(r, allocator); -} - -void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { - NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); +void RegAllocBase::init(VirtRegMap &vrm, + LiveIntervals &lis, + LiveRegMatrix &mat) { TRI = &vrm.getTargetRegInfo(); MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; + Matrix = &mat; MRI->freezeReservedRegs(vrm.getMachineFunction()); RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); - - const unsigned NumRegs = TRI->getNumRegs(); - if (NumRegs != PhysReg2LiveUnion.numRegs()) { - PhysReg2LiveUnion.init(UnionAllocator, NumRegs); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); - } -} - -void RegAllocBase::LiveUnionArray::clear() { - if (!Array) - return; - for (unsigned r = 0; r != NumRegs; ++r) - Array[r].~LiveIntervalUnion(); - free(Array); - NumRegs = 0; - Array = 0; -} - -void RegAllocBase::releaseMemory() { - for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) - PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical @@ -133,35 +67,14 @@ void RegAllocBase::releaseMemory() { // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { - unsigned RegNum = I->first; - LiveInterval &VirtReg = *I->second; - if (TargetRegisterInfo::isPhysicalRegister(RegNum)) - PhysReg2LiveUnion[RegNum].unify(VirtReg); - else - enqueue(&VirtReg); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + enqueue(&LIS->getInterval(Reg)); } } -void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) - << " to " << PrintReg(PhysReg, TRI) << '\n'); - assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); - VRM->assignVirt2Phys(VirtReg.reg, PhysReg); - MRI->setPhysRegUsed(PhysReg); - PhysReg2LiveUnion[PhysReg].unify(VirtReg); - ++NumAssigned; -} - -void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) - << " from " << PrintReg(PhysReg, TRI) << '\n'); - assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); - PhysReg2LiveUnion[PhysReg].extract(VirtReg); - VRM->clearVirt(VirtReg.reg); - ++NumUnassigned; -} - // Top-level driver to manage the queue of unassigned VirtRegs and call the // selectOrSplit implementation. void RegAllocBase::allocatePhysRegs() { @@ -179,14 +92,14 @@ void RegAllocBase::allocatePhysRegs() { } // Invalidate all interference queries, live ranges could have changed. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << *VirtReg << '\n'); + << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); @@ -211,7 +124,7 @@ void RegAllocBase::allocatePhysRegs() { } if (AvailablePhysReg) - assign(*VirtReg, AvailablePhysReg); + Matrix->assign(*VirtReg, AvailablePhysReg); for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { @@ -230,51 +143,3 @@ void RegAllocBase::allocatePhysRegs() { } } } - -// Check if this live virtual register interferes with a physical register. If -// not, then check for interference on each register that aliases with the -// physical register. Return the interfering register. -unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - if (query(VirtReg, *AliasI).checkInterference()) - return *AliasI; - return 0; -} - -// Add newly allocated physical registers to the MBB live in sets. -void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { - NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - SlotIndexes *Indexes = LIS->getSlotIndexes(); - if (MF->size() <= 1) - return; - - LiveIntervalUnion::SegmentIter SI; - for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { - LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; - if (LiveUnion.empty()) - continue; - DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:"); - MachineFunction::iterator MBB = llvm::next(MF->begin()); - MachineFunction::iterator MFE = MF->end(); - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.setMap(LiveUnion.getMap()); - SI.find(Start); - while (SI.valid()) { - if (SI.start() <= Start) { - if (!MBB->isLiveIn(PhysReg)) - MBB->addLiveIn(PhysReg); - DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':' - << PrintReg(SI.value()->reg, TRI)); - } else if (SI.start() > Stop) - MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); - if (++MBB == MFE) - break; - tie(Start, Stop) = Indexes->getMBBRange(MBB); - SI.advanceTo(Start); - } - DEBUG(dbgs() << '\n'); - } -} - diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 072fe2b..db0c8e1 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -37,9 +37,9 @@ #ifndef LLVM_CODEGEN_REGALLOCBASE #define LLVM_CODEGEN_REGALLOCBASE -#include "llvm/ADT/OwningPtr.h" #include "LiveIntervalUnion.h" -#include "RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -47,6 +47,7 @@ template class SmallVectorImpl; class TargetRegisterInfo; class VirtRegMap; class LiveIntervals; +class LiveRegMatrix; class Spiller; /// RegAllocBase provides the register allocation driver and interface that can @@ -56,69 +57,20 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { - LiveIntervalUnion::Allocator UnionAllocator; - - // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual - // registers may have changed. - unsigned UserTag; - - // Array of LiveIntervalUnions indexed by physical register. - class LiveUnionArray { - unsigned NumRegs; - LiveIntervalUnion *Array; - public: - LiveUnionArray(): NumRegs(0), Array(0) {} - ~LiveUnionArray() { clear(); } - - unsigned numRegs() const { return NumRegs; } - - void init(LiveIntervalUnion::Allocator &, unsigned NRegs); - - void clear(); - - LiveIntervalUnion& operator[](unsigned PhysReg) { - assert(PhysReg < NumRegs && "physReg out of bounds"); - return Array[PhysReg]; - } - }; - - LiveUnionArray PhysReg2LiveUnion; - - // Current queries, one per physreg. They must be reinitialized each time we - // query on a new live virtual register. - OwningArrayPtr Queries; - protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; VirtRegMap *VRM; LiveIntervals *LIS; + LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} virtual ~RegAllocBase() {} // A RegAlloc pass should call this before allocatePhysRegs. - void init(VirtRegMap &vrm, LiveIntervals &lis); - - // Get an initialized query to check interferences between lvr and preg. Note - // that Query::init must be called at least once for each physical register - // before querying a new live virtual register. This ties Queries and - // PhysReg2LiveUnion together. - LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); - return Queries[PhysReg]; - } - - // Get direct access to the underlying LiveIntervalUnion for PhysReg. - LiveIntervalUnion &getLiveUnion(unsigned PhysReg) { - return PhysReg2LiveUnion[PhysReg]; - } - - // Invalidate all cached information about virtual registers - live ranges may - // have changed. - void invalidateVirtRegs() { ++UserTag; } + void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); // The top-level driver. The output is a VirtRegMap that us updated with // physical register assignments. @@ -140,31 +92,6 @@ protected: virtual unsigned selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &splitLVRs) = 0; - // A RegAlloc pass should call this when PassManager releases its memory. - virtual void releaseMemory(); - - // Helper for checking interference between a live virtual register and a - // physical register, including all its register aliases. If an interference - // exists, return the interfering register, which may be preg or an alias. - unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg); - - /// assign - Assign VirtReg to PhysReg. - /// This should not be called from selectOrSplit for the current register. - void assign(LiveInterval &VirtReg, unsigned PhysReg); - - /// unassign - Undo a previous assignment of VirtReg to PhysReg. - /// This can be invoked from selectOrSplit, but be careful to guarantee that - /// allocation is making progress. - void unassign(LiveInterval &VirtReg, unsigned PhysReg); - - /// addMBBLiveIns - Add physreg liveins to basic blocks. - void addMBBLiveIns(MachineFunction *); - -#ifndef NDEBUG - // Verify each LiveIntervalUnion. - void verify(); -#endif - // Use this group name for NamedRegionTimer. static const char *TimerGroupName; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 77ee314..3a03807 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,11 +13,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "AllocationOrder.h" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/PassAnalysisSupport.h" @@ -64,10 +65,6 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // context MachineFunction *MF; - // analyses - LiveStacks *LS; - RenderMachineFunction *RMF; - // state std::auto_ptr SpillerInstance; std::priority_queue, @@ -118,9 +115,6 @@ public: bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl &SplitVRegs); - void spillReg(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs); - static char ID; }; @@ -139,7 +133,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -147,6 +141,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -159,41 +154,15 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - DEBUG(AU.addRequired()); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } void RABasic::releaseMemory() { SpillerInstance.reset(0); - RegAllocBase::releaseMemory(); } -// Helper for spillInterferences() that spills all interfering vregs currently -// assigned to this physical register. -void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs) { - LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); - assert(Q.seenAllInterferences() && "need collectInterferences()"); - const SmallVectorImpl &PendingSpills = Q.interferingVRegs(); - - for (SmallVectorImpl::const_iterator I = PendingSpills.begin(), - E = PendingSpills.end(); I != E; ++I) { - LiveInterval &SpilledVReg = **I; - DEBUG(dbgs() << "extracting from " << - TRI->getName(PhysReg) << " " << SpilledVReg << '\n'); - - // Deallocate the interfering vreg by removing it from the union. - // A LiveInterval instance may not be in a union during modification! - unassign(SpilledVReg, PhysReg); - - // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); - spiller().spill(LRE); - } - // After extracting segments, the query's results are invalid. But keep the - // contents valid until we're done accessing pendingSpills. - Q.clear(); -} // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are @@ -202,22 +171,41 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. - unsigned NumInterferences = 0; + SmallVector Intfs; + // Collect interferences assigned to any alias of the physical register. - for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { - LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI); - NumInterferences += QAlias.collectInterferingVRegs(); - if (QAlias.seenUnspillableVReg()) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) return false; + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + return false; + Intfs.push_back(Intf); } } DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << " interferences with " << VirtReg << "\n"); - assert(NumInterferences > 0 && "expect interference"); + assert(!Intfs.empty() && "expected interference"); // Spill each interfering vreg allocated to PhysReg or an alias. - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) - spillReg(VirtReg, *AliasI, SplitVRegs); + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval &Spill = *Intfs[i]; + + // Skip duplicates. + if (!VRM->hasPhys(Spill.reg)) + continue; + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + Matrix->unassign(Spill); + + // Spill the extracted interval. + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + } return true; } @@ -235,49 +223,36 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs) { - // Check for register mask interference. When live ranges cross calls, the - // set of usable registers is reduced to the callee-saved ones. - bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs); - // Populate a list of physical register spill candidates. SmallVector PhysRegSpillCands; // Check for an available register in this class. - ArrayRef Order = - RegClassInfo.getOrder(MRI->getRegClass(VirtReg.reg)); - for (ArrayRef::iterator I = Order.begin(), E = Order.end(); I != E; - ++I) { - unsigned PhysReg = *I; - - // If PhysReg is clobbered by a register mask, it isn't useful for - // allocation or spilling. - if (CrossRegMasks && !UsableRegs.test(PhysReg)) - continue; - - // Check interference and as a side effect, intialize queries for this - // VirtReg and its aliases. - unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); - if (interfReg == 0) { - // Found an available register. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + while (unsigned PhysReg = Order.next()) { + // Check for interference in PhysReg + switch (Matrix->checkInterference(VirtReg, PhysReg)) { + case LiveRegMatrix::IK_Free: + // PhysReg is available, allocate it. return PhysReg; - } - LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg); - IntfQ.collectInterferingVRegs(1); - LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front(); - // The current VirtReg must either be spillable, or one of its interferences - // must have less spill weight. - if (interferingVirtReg->weight < VirtReg.weight ) { + case LiveRegMatrix::IK_VirtReg: + // Only virtual registers in the way, we may be able to spill them. PhysRegSpillCands.push_back(PhysReg); + continue; + + default: + // RegMask or RegUnit interference. + continue; } } + // Try to spill another interfering reg with less spill weight. for (SmallVectorImpl::iterator PhysRegI = PhysRegSpillCands.begin(), - PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { - - if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + continue; - assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 && + assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && "Interference after spill."); // Tell the caller to allocate to this newly freed physical register. return *PhysRegI; @@ -287,7 +262,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -301,53 +276,17 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { << ((Value*)mf.getFunction())->getName() << '\n'); MF = &mf; - DEBUG(RMF = &getAnalysis()); - - RegAllocBase::init(getAnalysis(), getAnalysis()); + RegAllocBase::init(getAnalysis(), + getAnalysis(), + getAnalysis()); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); - addMBBLiveIns(MF); - // Diagnostic output before rewriting DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); - // optional HTML output - DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM)); - - // FIXME: Verification currently must run before VirtRegRewriter. We should - // make the rewriter a separate pass and override verifyAnalysis instead. When - // that happens, verification naturally falls under VerifyMachineCode. -#ifndef NDEBUG - if (VerifyEnabled) { - // Verify accuracy of LiveIntervals. The standard machine code verifier - // ensures that each LiveIntervals covers all uses of the virtual reg. - - // FIXME: MachineVerifier is badly broken when using the standard - // spiller. Always use -spiller=inline with -verify-regalloc. Even with the - // inline spiller, some tests fail to verify because the coalescer does not - // always generate verifiable code. - MF->verify(this, "In RABasic::verify"); - - // Verify that LiveIntervals are partitioned into unions and disjoint within - // the unions. - verify(); - } -#endif // !NDEBUG - - // Run rewriter - VRM->rewrite(LIS->getSlotIndexes()); - - // Write out new DBG_VALUE instructions. - getAnalysis().emitDebugValues(VRM); - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers and release all the transient data. - VRM->clearAllVirt(); - MRI->clearVirtRegs(); releaseMemory(); - return true; } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index e09b7f8..6b3a48e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "RegisterClassInfo.h" #include "llvm/BasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" @@ -77,7 +77,7 @@ namespace { explicit LiveReg(unsigned v) : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} - unsigned getSparseSetKey() const { + unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; @@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { - // Check for non-debug uses or defs following MO. - // This is the most likely way to fail - fast path it. - MachineOperand *Next = &MO; - while ((Next = Next->getNextOperandForReg())) - if (!Next->isDebug()) - return false; - // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. - return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. @@ -354,8 +350,8 @@ void RAFast::usePhysReg(MachineOperand &MO) { } // Maybe a superregister is reserved? - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; switch (PhysRegState[Alias]) { case regDisabled: break; @@ -408,8 +404,8 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // This is a disabled register, disable all aliases. PhysRegState[PhysReg] = NewState; - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -456,8 +452,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { // This is a disabled register, add up cost of aliases. DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); unsigned Cost = 0; - for (const uint16_t *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; if (UsedInInstr.test(Alias)) return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { @@ -659,9 +655,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, // Return true if the operand kills its register. bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { MachineOperand &MO = MI->getOperand(OpNum); + bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); - return MO.isKill() || MO.isDead(); + return MO.isKill() || Dead; } // Handle subregister index. @@ -674,7 +671,13 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { MI->addRegisterKilled(PhysReg, TRI, true); return true; } - return MO.isDead(); + + // A of a sub-register requires an implicit def of the full + // register. + if (MO.isDef() && MO.isUndef()) + MI->addRegisterDefined(PhysReg, TRI); + + return Dead; } // Handle special instruction operand like early clobbers and tied ops when @@ -704,13 +707,10 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - UsedInInstr.set(Reg); - if (ThroughRegs.count(PhysRegState[Reg])) - definePhysReg(MI, Reg, regFree); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - UsedInInstr.set(*AS); - if (ThroughRegs.count(PhysRegState[*AS])) - definePhysReg(MI, *AS, regFree); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + UsedInInstr.set(*AI); + if (ThroughRegs.count(PhysRegState[*AI])) + definePhysReg(MI, *AI, regFree); } } @@ -1029,9 +1029,8 @@ void RAFast::AllocateBasicBlock() { if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; - UsedInInstr.set(Reg); - for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedInInstr.set(*AI); } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 3f2a617..6ac5428 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,6 +16,7 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" +#include "LiveRegMatrix.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -73,7 +74,6 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; - LiveStacks *LS; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -168,19 +168,6 @@ class RAGreedy : public MachineFunctionPass, } }; - // Register mask interference. The current VirtReg is checked for register - // mask interference on entry to selectOrSplit(). If there is no - // interference, UsableRegs is left empty. If there is interference, - // UsableRegs has a bit mask of registers that can be used without register - // mask interference. - BitVector UsableRegs; - - /// clobberedByRegMask - Returns true if PhysReg is not directly usable - /// because of register mask clobbers. - bool clobberedByRegMask(unsigned PhysReg) const { - return !UsableRegs.empty() && !UsableRegs.test(PhysReg); - } - // splitting state. std::auto_ptr SA; std::auto_ptr SE; @@ -286,6 +273,8 @@ private: SmallVectorImpl&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, @@ -327,6 +316,7 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); } @@ -336,6 +326,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -349,6 +340,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -360,8 +353,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { //===----------------------------------------------------------------------===// bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { - if (unsigned PhysReg = VRM->getPhys(VirtReg)) { - unassign(LIS->getInterval(VirtReg), PhysReg); + if (VRM->hasPhys(VirtReg)) { + Matrix->unassign(LIS->getInterval(VirtReg)); return true; } // Unassigned virtreg is probably in the priority queue. @@ -370,13 +363,12 @@ bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { } void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (!PhysReg) + if (!VRM->hasPhys(VirtReg)) return; // Register is assigned, put it back on the queue for reassignment. LiveInterval &LI = LIS->getInterval(VirtReg); - unassign(LI, PhysReg); + Matrix->unassign(LI); enqueue(&LI); } @@ -398,7 +390,6 @@ void RAGreedy::releaseMemory() { SpillerInstance.reset(0); ExtraRegInfo.clear(); GlobalCand.clear(); - RegAllocBase::releaseMemory(); } void RAGreedy::enqueue(LiveInterval *LI) { @@ -450,12 +441,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { Order.rewind(); unsigned PhysReg; - while ((PhysReg = Order.next())) { - if (clobberedByRegMask(PhysReg)) - continue; - if (!checkPhysRegInterference(VirtReg, PhysReg)) + while ((PhysReg = Order.next())) + if (!Matrix->checkInterference(VirtReg, PhysReg)) break; - } if (!PhysReg || Order.isHint(PhysReg)) return PhysReg; @@ -464,7 +452,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // If we missed a simple hint, try to cheaply evict interference from the // preferred register. if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) - if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) { + if (Order.isHint(Hint)) { DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); EvictionCost MaxCost(1); if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { @@ -527,6 +515,10 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @returns True when interference can be evicted cheaper than MaxCost. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -539,8 +531,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, Cascade = NextCascade; EvictionCost Cost; - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. if (Q.collectInterferingVRegs(10) >= 10) return false; @@ -548,15 +540,21 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Check if any interfering live range is heavier than MaxWeight. for (unsigned i = Q.interferingVRegs().size(); i; --i) { LiveInterval *Intf = Q.interferingVRegs()[i - 1]; - if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) - return false; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); // Never evict spill products. They cannot split or spill. if (getStage(*Intf) == RS_Done) return false; // Once a live range becomes small enough, it is urgent that we find a // register for it. This is indicated by an infinite spill weight. These // urgent live ranges get to evict almost anything. - bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable(); + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; if (Cascade <= IntfCascade) { @@ -597,19 +595,29 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); - for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + + // Collect all interfering virtregs first. + SmallVector Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); assert(Q.seenAllInterferences() && "Didn't check all interfererences."); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; - unassign(*Intf, VRM->getPhys(Intf->reg)); - assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || - VirtReg.isSpillable() < Intf->isSpillable()) && - "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg].Cascade = Cascade; - ++NumEvicted; - NewVRegs.push_back(Intf); - } + ArrayRef IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf); } } @@ -636,8 +644,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, Order.rewind(); while (unsigned PhysReg = Order.next()) { - if (clobberedByRegMask(PhysReg)) - continue; if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -1183,7 +1189,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1231,7 +1237,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1265,6 +1271,65 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + // Split around every non-copy instruction. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy()) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// @@ -1291,9 +1356,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, GapWeight.assign(NumGaps, 0.0f); // Add interference from each overlapping register. - for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(const_cast(SA->getParent()), *AI) - .checkInterference()) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast(SA->getParent()), *Units) + .checkInterference()) continue; // We know that VirtReg is a continuous interval from FirstInstr to @@ -1303,7 +1368,8 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // surrounding the instruction. The exception is interference before // StartIdx and after StopIdx. // - LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx); + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { // Skip the gaps before IntI. while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) @@ -1323,6 +1389,30 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; } } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveInterval &LI = LIS->getRegUnit(*Units); + LiveInterval::const_iterator I = LI.find(StartIdx); + LiveInterval::const_iterator E = LI.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = HUGE_VALF; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } } /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only @@ -1355,7 +1445,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // If VirtReg is live across any register mask operands, compute a list of // gaps with register masks. SmallVector RegMaskGaps; - if (!UsableRegs.empty()) { + if (Matrix->checkRegMaskInterference(VirtReg)) { // Get regmask slots for the whole block. ArrayRef RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); DEBUG(dbgs() << RMS.size() << " regmasks in block:"); @@ -1417,7 +1507,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, calcGapWeights(PhysReg, GapWeight); // Remove any gaps with regmask clobbers. - if (clobberedByRegMask(PhysReg)) + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) GapWeight[RegMaskGaps[i]] = HUGE_VALF; @@ -1512,7 +1602,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1561,7 +1651,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); SA->analyze(&VirtReg); - return tryLocalSplit(VirtReg, Order, NewVRegs); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); @@ -1574,7 +1667,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // an assertion when the coalescer is fixed. if (SA->didRepairRange()) { // VirtReg has changed, so all cached queries are invalid. - invalidateVirtRegs(); + Matrix->invalidateVirtRegs(); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; } @@ -1599,11 +1692,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { - // Check if VirtReg is live across any calls. - UsableRegs.clear(); - if (LIS->checkRegMaskInterference(VirtReg, UsableRegs)) - DEBUG(dbgs() << "Live across regmasks.\n"); - // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1644,7 +1732,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -1665,7 +1753,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); - RegAllocBase::init(getAnalysis(), getAnalysis()); + RegAllocBase::init(getAnalysis(), + getAnalysis(), + getAnalysis()); Indexes = &getAnalysis(); DomTree = &getAnalysis(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); @@ -1679,30 +1769,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; - IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI); + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. allocatePhysRegs(); - addMBBLiveIns(MF); - LIS->addKillFlags(); - - // Run rewriter - { - NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); - VRM->rewrite(Indexes); - } - - // Write out new DBG_VALUE instructions. - { - NamedRegionTimer T("Emit Debug Info", TimerGroupName, TimePassesIsEnabled); - DebugVars->emitDebugValues(VRM); - } - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers and release all the transient data. - VRM->clearAllVirt(); - MRI->clearVirtRegs(); releaseMemory(); - return true; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index a284614..d0db26b 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,6 @@ #define DEBUG_TYPE "regalloc" -#include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" @@ -98,7 +97,6 @@ public: initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } /// Return the pass name. @@ -134,7 +132,6 @@ private: const TargetInstrInfo *tii; const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; - RenderMachineFunction *rmf; std::auto_ptr spiller; LiveIntervals *lis; @@ -196,7 +193,7 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector LIVector; - ArrayRef regMaskSlots = lis->getRegMaskSlots(); + LiveIntervals *LIS = const_cast(lis); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -205,12 +202,11 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, RegSet pregs; // Collect the set of preg intervals, record that they're used in the MF. - for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { - pregs.insert(itr->first); - mri->setPhysRegUsed(itr->first); - } + for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { + if (mri->def_empty(Reg)) + continue; + pregs.insert(Reg); + mri->setPhysRegUsed(Reg); } BitVector reservedRegs = tri->getReservedRegs(*mf); @@ -220,7 +216,11 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, vregItr != vregEnd; ++vregItr) { unsigned vreg = *vregItr; const TargetRegisterClass *trc = mri->getRegClass(vreg); - const LiveInterval *vregLI = &lis->getInterval(vreg); + LiveInterval *vregLI = &LIS->getInterval(vreg); + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); // Compute an initial allowed set for the current vreg. typedef std::vector VRAllowed; @@ -228,80 +228,26 @@ std::auto_ptr PBQPBuilder::build(MachineFunction *mf, ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; - if (!reservedRegs.test(preg)) { - vrAllowed.push_back(preg); - } - } - - RegSet overlappingPRegs; - - // Record physical registers whose ranges overlap. - for (RegSet::const_iterator pregItr = pregs.begin(), - pregEnd = pregs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - const LiveInterval *pregLI = &lis->getInterval(preg); - - if (pregLI->empty()) { + if (reservedRegs.test(preg)) continue; - } - if (vregLI->overlaps(*pregLI)) - overlappingPRegs.insert(preg); - } + // vregLI crosses a regmask operand that clobbers preg. + if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) + continue; - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps(tri->getNumRegs()); - for (ArrayRef::iterator rmItr = regMaskSlots.begin(), - rmEnd = regMaskSlots.end(); - rmItr != rmEnd; ++rmItr) { - SlotIndex rmIdx = *rmItr; - if (vregLI->liveAt(rmIdx)) { - MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); - const uint32_t* regMask = 0; - for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), - mopEnd = rmMI->operands_end(); - mopItr != mopEnd; ++mopItr) { - if (mopItr->isRegMask()) { - regMask = mopItr->getRegMask(); - break; - } + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { + if (vregLI->overlaps(LIS->getRegUnit(*Units))) { + Interference = true; + break; } - assert(regMask != 0 && "Couldn't find register mask."); - regMaskOverlaps.setBitsNotInMask(regMask); } - } + if (Interference) + continue; - for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { - if (regMaskOverlaps.test(preg)) - overlappingPRegs.insert(preg); - } - - for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), - pregEnd = overlappingPRegs.end(); - pregItr != pregEnd; ++pregItr) { - unsigned preg = *pregItr; - - // Remove the register from the allowed set. - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), preg); - - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - - // Also remove any aliases. - const uint16_t *aliasItr = tri->getAliasSet(preg); - if (aliasItr != 0) { - for (; *aliasItr != 0; ++aliasItr) { - VRAllowed::iterator eraseItr = - std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr); - - if (eraseItr != vrAllowed.end()) { - vrAllowed.erase(eraseItr); - } - } - } + // preg is usable for this virtual register. + vrAllowed.push_back(preg); } // Construct the node. @@ -379,7 +325,7 @@ std::auto_ptr PBQPBuilderWithCoalescing::build( PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); - CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo()); + CoalescerPair cp(*tm.getRegisterInfo()); // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. @@ -498,21 +444,17 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired(); au.addPreserved(); au.addRequired(); - au.addRequired(); MachineFunctionPass::getAnalysisUsage(au); } void RegAllocPBQP::findVRegIntervalsToAlloc() { // Iterate over all live ranges. - for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - - // Ignore physical ones. - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (mri->reg_nodbg_empty(Reg)) continue; - - LiveInterval *li = itr->second; + LiveInterval *li = &lis->getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in @@ -544,16 +486,17 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); - DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n"); + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " + << tri->getName(preg) << "\n"); assert(preg != 0 && "Invalid preg selected."); vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); SmallVector newSpills; - LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); + LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); - DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " + DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to @@ -561,7 +504,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << (*itr)->reg << " "); + DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); vregsToAlloc.insert((*itr)->reg); } @@ -579,9 +522,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, void RegAllocPBQP::finalizeAlloc() const { - typedef LiveIntervals::iterator LIIterator; - typedef LiveInterval::Ranges::const_iterator LRIterator; - // First allocate registers for the empty intervals. for (RegSet::const_iterator itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); @@ -597,51 +537,6 @@ void RegAllocPBQP::finalizeAlloc() const { vrm->assignVirt2Phys(li->reg, physReg); } - - // Finally iterate over the basic blocks to compute and set the live-in sets. - SmallVector liveInMBBs; - MachineBasicBlock *entryMBB = &*mf->begin(); - - for (LIIterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = 0; - - // Get the physical register for this interval - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { - reg = li->reg; - } else if (vrm->isAssignedReg(li->reg)) { - reg = vrm->getPhys(li->reg); - } else { - // Ranges which are assigned a stack slot only are ignored. - continue; - } - - if (reg == 0) { - // Filter out zero regs - they're for intervals that were spilled. - continue; - } - - // Iterate over the ranges of the current interval... - for (LRIterator lrItr = li->begin(), lrEnd = li->end(); - lrItr != lrEnd; ++lrItr) { - - // Find the set of basic blocks which this range is live into... - if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { - // And add the physreg for this interval to their live-in sets. - for (unsigned i = 0; i != liveInMBBs.size(); ++i) { - if (liveInMBBs[i] != entryMBB) { - if (!liveInMBBs[i]->isLiveIn(reg)) { - liveInMBBs[i]->addLiveIn(reg); - } - } - } - liveInMBBs.clear(); - } - } - } - } bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { @@ -655,7 +550,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis(); lss = &getAnalysis(); loopInfo = &getAnalysis(); - rmf = &getAnalysis(); vrm = &getAnalysis(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -719,22 +613,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(); - - rmf->renderMachineFunction("After PBQP register allocation.", vrm); - vregsToAlloc.clear(); emptyIntervalVRegs.clear(); DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); - // Run rewriter - vrm->rewrite(lis->getSlotIndexes()); - - // All machine operands and other references to virtual registers have been - // replaced. Remove the virtual registers. - vrm->clearAllVirt(); - mri->clearVirtRegs(); - return true; } diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 17165fa..652bc30 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -15,8 +15,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -50,9 +50,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { CSRNum.clear(); CSRNum.resize(TRI->getNumRegs(), 0); for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) - for (const uint16_t *AS = TRI->getOverlaps(Reg); - unsigned Alias = *AS; ++AS) - CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... Update = true; } CalleeSaved = CSR; diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h deleted file mode 100644 index 400e1f4..0000000 --- a/lib/CodeGen/RegisterClassInfo.h +++ /dev/null @@ -1,132 +0,0 @@ -//===-- RegisterClassInfo.h - Dynamic Register Class Info -*- C++ -*-------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the RegisterClassInfo class which provides dynamic -// information about target register classes. Callee saved and reserved -// registers depends on calling conventions and other dynamic information, so -// some things cannot be determined statically. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_REGISTERCLASSINFO_H -#define LLVM_CODEGEN_REGISTERCLASSINFO_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Target/TargetRegisterInfo.h" - -namespace llvm { - -class RegisterClassInfo { - struct RCInfo { - unsigned Tag; - unsigned NumRegs; - bool ProperSubClass; - OwningArrayPtr Order; - - RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {} - operator ArrayRef() const { - return makeArrayRef(Order.get(), NumRegs); - } - }; - - // Brief cached information for each register class. - OwningArrayPtr RegClass; - - // Tag changes whenever cached information needs to be recomputed. An RCInfo - // entry is valid when its tag matches. - unsigned Tag; - - const MachineFunction *MF; - const TargetRegisterInfo *TRI; - - // Callee saved registers of last MF. Assumed to be valid until the next - // runOnFunction() call. - const uint16_t *CalleeSaved; - - // Map register number to CalleeSaved index + 1; - SmallVector CSRNum; - - // Reserved registers in the current MF. - BitVector Reserved; - - // Compute all information about RC. - void compute(const TargetRegisterClass *RC) const; - - // Return an up-to-date RCInfo for RC. - const RCInfo &get(const TargetRegisterClass *RC) const { - const RCInfo &RCI = RegClass[RC->getID()]; - if (Tag != RCI.Tag) - compute(RC); - return RCI; - } - -public: - RegisterClassInfo(); - - /// runOnFunction - Prepare to answer questions about MF. This must be called - /// before any other methods are used. - void runOnMachineFunction(const MachineFunction &MF); - - /// getNumAllocatableRegs - Returns the number of actually allocatable - /// registers in RC in the current function. - unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const { - return get(RC).NumRegs; - } - - /// getOrder - Returns the preferred allocation order for RC. The order - /// contains no reserved registers, and registers that alias callee saved - /// registers come last. - ArrayRef getOrder(const TargetRegisterClass *RC) const { - return get(RC); - } - - /// isProperSubClass - Returns true if RC has a legal super-class with more - /// allocatable registers. - /// - /// Register classes like GR32_NOSP are not proper sub-classes because %esp - /// is not allocatable. Similarly, tGPR is not a proper sub-class in Thumb - /// mode because the GPR super-class is not legal. - bool isProperSubClass(const TargetRegisterClass *RC) const { - return get(RC).ProperSubClass; - } - - /// getLastCalleeSavedAlias - Returns the last callee saved register that - /// overlaps PhysReg, or 0 if Reg doesn't overlap a CSR. - unsigned getLastCalleeSavedAlias(unsigned PhysReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); - if (unsigned N = CSRNum[PhysReg]) - return CalleeSaved[N-1]; - return 0; - } - - /// isReserved - Returns true when PhysReg is a reserved register. - /// - /// Reserved registers may belong to an allocatable register class, but the - /// target has explicitly requested that they are not used. - /// - bool isReserved(unsigned PhysReg) const { - return Reserved.test(PhysReg); - } - - /// isAllocatable - Returns true when PhysReg belongs to an allocatable - /// register class and it hasn't been reserved. - /// - /// Allocatable registers may show up in the allocation order of some virtual - /// register, so a register allocator needs to track its liveness and - /// availability. - bool isAllocatable(unsigned PhysReg) const { - return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg); - } -}; -} // end namespace llvm - -#endif - diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 75f88ca..9906334 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,34 +16,35 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" #include "LiveDebugVariables.h" -#include "RegisterClassInfo.h" #include "VirtRegMap.h" #include "llvm/Pass.h" #include "llvm/Value.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" #include #include using namespace llvm; @@ -53,8 +54,6 @@ STATISTIC(numCrossRCs , "Number of cross class joins performed"); STATISTIC(numCommutes , "Number of instruction commuting performed"); STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); -STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); -STATISTIC(numAborts , "Number of times interval joining aborted"); STATISTIC(NumInflated , "Number of register classes inflated"); static cl::opt @@ -63,22 +62,13 @@ EnableJoining("join-liveintervals", cl::init(true)); static cl::opt -DisableCrossClassJoin("disable-cross-class-join", - cl::desc("Avoid coalescing cross register class copies"), - cl::init(false), cl::Hidden); - -static cl::opt -EnablePhysicalJoin("join-physregs", - cl::desc("Join physical register copies"), - cl::init(false), cl::Hidden); - -static cl::opt VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); namespace { - class RegisterCoalescer : public MachineFunctionPass { + class RegisterCoalescer : public MachineFunctionPass, + private LiveRangeEdit::Delegate { MachineFunction* MF; MachineRegisterInfo* MRI; const TargetMachine* TM; @@ -90,87 +80,83 @@ namespace { AliasAnalysis *AA; RegisterClassInfo RegClassInfo; - /// JoinedCopies - Keep track of copies eliminated due to coalescing. - /// - SmallPtrSet JoinedCopies; + /// WorkList - Copy instructions yet to be coalesced. + SmallVector WorkList; + + /// ErasedInstrs - Set of instruction pointers that have been erased, and + /// that may be present in WorkList. + SmallPtrSet ErasedInstrs; + + /// Dead instructions that are about to be deleted. + SmallVector DeadDefs; + + /// Virtual registers to be considered for register class inflation. + SmallVector InflateRegs; - /// ReMatCopies - Keep track of copies eliminated due to remat. - /// - SmallPtrSet ReMatCopies; + /// Recursively eliminate dead defs in DeadDefs. + void eliminateDeadDefs(); - /// ReMatDefs - Keep track of definition instructions which have - /// been remat'ed. - SmallPtrSet ReMatDefs; + /// LiveRangeEdit callback. + void LRE_WillEraseInstruction(MachineInstr *MI); - /// joinIntervals - join compatible live intervals - void joinIntervals(); + /// joinAllIntervals - join compatible live intervals + void joinAllIntervals(); - /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting - /// copies that cannot yet be coalesced into the "TryAgain" list. - void CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector &TryAgain); + /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into WorkList. + void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after + /// position From. Return true if any progress was made. + bool copyCoalesceWorkList(unsigned From = 0); + + /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns /// true if the copy was successfully coalesced away. If it is not /// currently possible to coalesce this interval, but it may be possible if /// other things get coalesced, then it returns true by reference in /// 'Again'. - bool JoinCopy(MachineInstr *TheCopy, bool &Again); + bool joinCopy(MachineInstr *TheCopy, bool &Again); - /// JoinIntervals - Attempt to join these two intervals. On failure, this + /// joinIntervals - Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we /// can use this information below to update aliases. - bool JoinIntervals(CoalescerPair &CP); + bool joinIntervals(CoalescerPair &CP); + + /// Attempt joining with a reserved physreg. + bool joinReservedPhysReg(CoalescerPair &CP); - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); - /// HasOtherReachingDefs - Return true if there are definitions of IntB + /// hasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. - bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo); - /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - /// ReMaterializeTrivialDef - If the source of a copy is defined by a + /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. - bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, - unsigned DstReg, MachineInstr *CopyMI); - - /// shouldJoinPhys - Return true if a physreg copy should be joined. - bool shouldJoinPhys(CoalescerPair &CP); - - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce - /// two virtual registers from different register classes. - bool isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC); - - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + MachineInstr *CopyMI); + + /// canJoinPhys - Return true if a physreg copy should be joined. + bool canJoinPhys(CoalescerPair &CP); + + /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. - void UpdateRegDefsUses(const CoalescerPair &CP); - - /// RemoveDeadDef - If a def of a live interval is now determined dead, - /// remove the val# it defines. If the live interval becomes empty, remove - /// it as well. - bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); - - /// markAsJoined - Remember that CopyMI has already been joined. - void markAsJoined(MachineInstr *CopyMI); + void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); /// eliminateUndefCopy - Handle copies of undef values. bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); @@ -233,7 +219,8 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, } bool CoalescerPair::setRegisters(const MachineInstr *MI) { - SrcReg = DstReg = SubIdx = 0; + SrcReg = DstReg = 0; + SrcIdx = DstIdx = 0; NewRC = 0; Flipped = CrossClass = false; @@ -271,39 +258,44 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { } } else { // Both registers are virtual. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); // Both registers have subreg indices. if (SrcSub && DstSub) { - // For now we only handle the case of identical indices in commensurate - // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg - // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. - if (SrcSub != DstSub) + // Copies between different sub-registers are never coalescable. + if (Src == Dst && SrcSub != DstSub) return false; - const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); - const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); - if (!TRI.getCommonSubClass(DstRC, SrcRC)) + + NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, + SrcIdx, DstIdx); + if (!NewRC) return false; - SrcSub = DstSub = 0; + } else if (DstSub) { + // SrcReg will be merged with a sub-register of DstReg. + SrcIdx = DstSub; + NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + } else if (SrcSub) { + // DstReg will be merged with a sub-register of SrcReg. + DstIdx = SrcSub; + NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); + } else { + // This is a straight copy without sub-registers. + NewRC = TRI.getCommonSubClass(DstRC, SrcRC); } - // There can be no SrcSub. - if (SrcSub) { + // The combined constraint may be impossible to satisfy. + if (!NewRC) + return false; + + // Prefer SrcReg to be a sub-register of DstReg. + // FIXME: Coalescer should support subregs symmetrically. + if (DstIdx && !SrcIdx) { std::swap(Src, Dst); - DstSub = SrcSub; - SrcSub = 0; - assert(!Flipped && "Unexpected flip"); - Flipped = true; + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; } - // Find the new register class. - const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); - const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); - if (DstSub) - NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); - else - NewRC = TRI.getCommonSubClass(DstRC, SrcRC); - if (!NewRC) - return false; CrossClass = NewRC != DstRC || NewRC != SrcRC; } // Check our invariants @@ -312,14 +304,14 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { "Cannot have a physical SubIdx"); SrcReg = Src; DstReg = Dst; - SubIdx = DstSub; return true; } bool CoalescerPair::flip() { - if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) return false; std::swap(SrcReg, DstReg); + std::swap(SrcIdx, DstIdx); Flipped = !Flipped; return true; } @@ -343,7 +335,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { if (!TargetRegisterInfo::isPhysicalRegister(Dst)) return false; - assert(!SubIdx && "Inconsistent CoalescerPair state."); + assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); // DstSub could be set for a physreg from INSERT_SUBREG. if (DstSub) Dst = TRI.getSubReg(Dst, DstSub); @@ -357,7 +349,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { if (DstReg != Dst) return false; // Registers match, do the subregisters line up? - return compose(TRI, SubIdx, SrcSub) == DstSub; + return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); } } @@ -375,19 +367,18 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { - /// Joined copies are not deleted immediately, but kept in JoinedCopies. - JoinedCopies.insert(CopyMI); +void RegisterCoalescer::eliminateDeadDefs() { + SmallVector NewRegs; + LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); +} - /// Mark all register operands of CopyMI as so they won't affect dead - /// code elimination. - for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), - E = CopyMI->operands_end(); I != E; ++I) - if (I->isReg()) - I->setIsUndef(true); +// Callback from eliminateDeadDefs(). +void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { + // MI may be in WorkList. Make sure we don't visit it. + ErasedInstrs.insert(MI); } -/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, /// see if we can merge these two pieces of B into a single value number, @@ -402,12 +393,10 @@ void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { /// /// This returns true if an interval was modified. /// -bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // Bail if there is no dst interval - can happen when merging physical subreg - // operations. - if (!LIS->hasInterval(CP.getDstReg())) - return false; +bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert(!CP.isPartial() && "This doesn't work for partial copies."); + assert(!CP.isPhys() && "This doesn't work for physreg copies."); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -457,24 +446,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // IntB, we can merge them. if (ValLR+1 != BLR) return false; - // If a live interval is a physical register, conservatively check if any - // of its aliases is overlapping the live interval of the virtual register. - // If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) - if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { - DEBUG({ - dbgs() << "\t\tInterfere with alias "; - LIS->getInterval(*AS).print(dbgs(), TRI); - }); - return false; - } - } - - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), TRI); - }); + DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; // We are about to delete CopyMI, so need to remove it as the 'instruction @@ -487,33 +459,10 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // two value numbers. IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { - if (!LIS->hasInterval(*SR)) - continue; - LiveInterval &SRLI = LIS->getInterval(*SR); - SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, - LIS->getVNInfoAllocator()))); - } - } - // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); + if (BValNo != ValLR->valno) IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), TRI); - dbgs() << "\n"; - }); + DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. @@ -525,8 +474,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. - CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), - *TRI); + CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); if (ALR->end == CopyIdx) LIS->shrinkToUses(&IntA); @@ -534,12 +482,17 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, return true; } -/// HasOtherReachingDefs - Return true if there are definitions of IntB +/// hasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. -bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, - LiveInterval &IntB, - VNInfo *AValNo, - VNInfo *BValNo) { +bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -559,7 +512,7 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, return false; } -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with /// IntA being the source and IntB being the dest, thus this defines a value /// number in IntB. If the source value number (in IntA) is defined by a /// commutable instruction and its other operand is coalesced to the copy dest @@ -582,18 +535,9 @@ bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, /// /// This returns true if an interval was modified. /// -bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, - MachineInstr *CopyMI) { - // FIXME: For now, only eliminate the copy by commuting its def when the - // source register is a virtual register. We want to guard against cases - // where the copy is a back edge copy and commuting the def lengthen the - // live interval of the source register to the entire loop. - if (CP.isPhys() && CP.isFlipped()) - return false; - - // Bail if there is no dst interval. - if (!LIS->hasInterval(CP.getDstReg())) - return false; +bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert (!CP.isPhys()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); @@ -613,10 +557,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + if (AValNo->isPHIDef() || AValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -647,17 +588,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // Make sure there are no other definitions of IntB that would reach the // uses which the new definition can reach. - if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) return false; - // Abort if the aliases of IntB.reg have values that are not simply the - // clobbers from the superreg. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) - for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) - if (LIS->hasInterval(*AS) && - HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) - return false; - // If some of the uses of IntA.reg is already coalesced away, return false. // It's not possible to determine whether it's safe to perform the coalescing. for (MachineRegisterInfo::use_nodbg_iterator UI = @@ -666,13 +599,14 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end()) + if (ULR == IntA.end() || ULR->valno != AValNo) continue; - if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) + // If this use is tied to a def, we can't rewrite the register. + if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) return false; } - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' << *DefMI); // At this point we have decided that it is legal to do this @@ -709,8 +643,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &UseMO = UI.getOperand(); MachineInstr *UseMI = &*UI; ++UI; - if (JoinedCopies.count(UseMI)) - continue; if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough // info to decide whether to do this replacement or not. For now do it. @@ -721,6 +653,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); if (TargetRegisterInfo::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else @@ -742,7 +676,9 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - markAsJoined(UseMI); + ErasedInstrs.insert(UseMI); + LIS->RemoveMachineInstrFromMaps(UseMI); + UseMI->eraseFromParent(); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition @@ -762,12 +698,11 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return true; } -/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, - bool preserveSrcInt, - unsigned DstReg, - MachineInstr *CopyMI) { +bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, + unsigned DstReg, + MachineInstr *CopyMI) { SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); @@ -792,7 +727,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { if (MRI->getRegClass(DstReg) != RC) return false; @@ -838,23 +773,21 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { - unsigned reg = NewMIImplDefs[i]; - LiveInterval &li = LIS->getInterval(reg); - VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), - LIS->getVNInfoAllocator()); - LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); - li.addRange(lr); + unsigned Reg = NewMIImplDefs[i]; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) + LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } CopyMI->eraseFromParent(); - ReMatCopies.insert(CopyMI); - ReMatDefs.insert(DefMI); + ErasedInstrs.insert(CopyMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; // The source interval can become smaller because we removed a use. - if (preserveSrcInt) - LIS->shrinkToUses(&SrcInt); + LIS->shrinkToUses(&SrcInt, &DeadDefs); + if (!DeadDefs.empty()) + eliminateDeadDefs(); return true; } @@ -902,51 +835,40 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, return true; } -/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and +/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. -void -RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { - bool DstIsPhys = CP.isPhys(); - unsigned SrcReg = CP.getSrcReg(); - unsigned DstReg = CP.getDstReg(); - unsigned SubIdx = CP.getSubIdx(); +void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, + unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); // Update LiveDebugVariables. LDV->renameRegister(SrcReg, DstReg, SubIdx); for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { - // A PhysReg copy that won't be coalesced can perhaps be rematerialized - // instead. - if (DstIsPhys) { - if (UseMI->isFullCopy() && - UseMI->getOperand(1).getReg() == SrcReg && - UseMI->getOperand(0).getReg() != SrcReg && - UseMI->getOperand(0).getReg() != DstReg && - !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false, - UseMI->getOperand(0).getReg(), UseMI)) - continue; - } - SmallVector Ops; bool Reads, Writes; tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + // If SrcReg wasn't read, it may still be the case that DstReg is live-in + // because SrcReg is a sub-register. + if (DstInt && !Reads && SubIdx) + Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = UseMI->getOperand(Ops[i]); - // Make sure we don't create read-modify-write defs accidentally. We - // assume here that a SrcReg def cannot be joined into a live DstReg. If - // RegisterCoalescer starts tracking partially live registers, we will - // need to check the actual LiveInterval to determine if DstReg is live - // here. - if (SubIdx && !Reads) - MO.setIsUndef(); + // Adjust flags in case of sub-register joins. We don't want to + // turn a full def into a read-modify-write sub-register def and vice + // versa. + if (SubIdx && MO.isDef()) + MO.setIsUndef(!Reads); if (DstIsPhys) MO.substPhysReg(DstReg, *TRI); @@ -954,10 +876,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { MO.substVirtReg(DstReg, SubIdx, *TRI); } - // This instruction is a copy that will be removed. - if (JoinedCopies.count(UseMI)) - continue; - DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) @@ -967,210 +885,107 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { } } -/// removeIntervalIfEmpty - Check if the live interval of a physical register -/// is empty, if so remove it and also remove the empty intervals of its -/// sub-registers. Return true if live interval is removed. -static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, - const TargetRegisterInfo *TRI) { - if (li.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(li.reg)) - for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { - if (!LIS->hasInterval(*SR)) - continue; - LiveInterval &sli = LIS->getInterval(*SR); - if (sli.empty()) - LIS->removeInterval(*SR); - } - LIS->removeInterval(li.reg); - return true; - } - return false; -} - -/// RemoveDeadDef - If a def of a live interval is now determined dead, remove -/// the val# it defines. If the live interval becomes empty, remove it as well. -bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, - MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); - LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); - if (DefIdx != MLR->valno->def) - return false; - li.removeValNo(MLR->valno); - return removeIntervalIfEmpty(li, LIS, TRI); -} - -/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. -/// We need to be careful about coalescing a source physical register with a -/// virtual register. Once the coalescing is done, it cannot be broken and these -/// are not spillable! If the destination interval uses are far away, think -/// twice about coalescing them! -bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { - bool Allocatable = LIS->isAllocatable(CP.getDstReg()); - LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); - +/// canJoinPhys - Return true if a copy involving a physreg should be joined. +bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. - if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) - return true; - - if (!EnablePhysicalJoin) { - DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); - return false; - } - - // Only coalesce to allocatable physreg, we don't want to risk modifying - // reserved registers. - if (!Allocatable) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (LIS->hasInterval(CP.getDstReg()) && - LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); + if (!RegClassInfo.isReserved(CP.getDstReg())) { + DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); return false; } - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial()) { - const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg()); - unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; - unsigned Length = LIS->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold) { - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - return false; - } - } - return true; -} - -/// isWinToJoinCrossClass - Return true if it's profitable to coalesce -/// two virtual registers from different register classes. -bool -RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, - unsigned DstReg, - const TargetRegisterClass *SrcRC, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *NewRC) { - unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); - // This heuristics is good enough in practice, but it's obviously not *right*. - // 4 is a magic number that works well enough for x86, ARM, etc. It filter - // out all but the most restrictive register classes. - if (NewRCCount > 4 || - // Early exit if the function is fairly small, coalesce aggressively if - // that's the case. For really special register classes with 3 or - // fewer registers, be a bit more careful. - (LIS->getFuncInstructionCount() / NewRCCount) < 8) - return true; - LiveInterval &SrcInt = LIS->getInterval(SrcReg); - LiveInterval &DstInt = LIS->getInterval(DstReg); - unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt); - unsigned DstSize = LIS->getApproximateInstructionCount(DstInt); - - // Coalesce aggressively if the intervals are small compared to the number of - // registers in the new class. The number 4 is fairly arbitrary, chosen to be - // less aggressive than the 8 used for the whole function size. - const unsigned ThresSize = 4 * NewRCCount; - if (SrcSize <= ThresSize && DstSize <= ThresSize) + LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); + if (CP.isFlipped() && JoinVInt.containsOneValue()) return true; - // Estimate *register use density*. If it doubles or more, abort. - unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg), - MRI->use_nodbg_end()); - unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg), - MRI->use_nodbg_end()); - unsigned NewUses = SrcUses + DstUses; - unsigned NewSize = SrcSize + DstSize; - if (SrcRC != NewRC && SrcSize > ThresSize) { - unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); - if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) - return false; - } - if (DstRC != NewRC && DstSize > ThresSize) { - unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); - if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) - return false; - } - return true; + DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + return false; } - -/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. -bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { +bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; - if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) - return false; // Already done. - DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - CoalescerPair CP(*TII, *TRI); + CoalescerPair CP(*TRI); if (!CP.setRegisters(CopyMI)) { DEBUG(dbgs() << "\tNot coalescable.\n"); return false; } - // If they are already joined we continue. - if (CP.getSrcReg() == CP.getDstReg()) { - markAsJoined(CopyMI); - DEBUG(dbgs() << "\tCopy already coalesced.\n"); - return false; // Not coalescable. + // Dead code elimination. This really should be handled by MachineDCE, but + // sometimes dead copies slip through, and we can't generate invalid live + // ranges. + if (!CP.isPhys() && CopyMI->allDefsAreDead()) { + DEBUG(dbgs() << "\tCopy is dead.\n"); + DeadDefs.push_back(CopyMI); + eliminateDeadDefs(); + return true; } // Eliminate undefs. if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { - markAsJoined(CopyMI); DEBUG(dbgs() << "\tEliminated copy of value.\n"); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); return false; // Not coalescable. } - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) - << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx()) - << "\n"); + // Coalesced copies are normally removed immediately, but transformations + // like removeCopyByCommutingDef() can inadvertently create identity copies. + // When that happens, just join the values and remove the copy. + if (CP.getSrcReg() == CP.getDstReg()) { + LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); + LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + if (VNInfo *DefVNI = LRQ.valueDefined()) { + VNInfo *ReadVNI = LRQ.valueIn(); + assert(ReadVNI && "No value before copy and no flag."); + assert(ReadVNI != DefVNI && "Cannot read and define the same value."); + LI.MergeValueNumberInto(DefVNI, ReadVNI); + DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); + } + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return true; + } // Enforce policies. if (CP.isPhys()) { - if (!shouldJoinPhys(CP)) { + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) + << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) + << '\n'); + if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), CP.getDstReg(), CopyMI)) return true; return false; } } else { - // Avoid constraining virtual register regclass too much. - if (CP.isCrossClass()) { - DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); - if (DisableCrossClassJoin) { - DEBUG(dbgs() << "\tCross-class joins disabled.\n"); - return false; - } - if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), - MRI->getRegClass(CP.getSrcReg()), - MRI->getRegClass(CP.getDstReg()), - CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } + DEBUG({ + dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() + << " with "; + if (CP.getDstIdx() && CP.getSrcIdx()) + dbgs() << PrintReg(CP.getDstReg()) << " in " + << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " + << PrintReg(CP.getSrcReg()) << " in " + << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; + else + dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " + << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; + }); // When possible, let DstReg be the larger interval. - if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() > + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > LIS->getInterval(CP.getDstReg()).ranges.size()) CP.flip(); } @@ -1179,21 +994,22 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - if (!JoinIntervals(CP)) { + if (!joinIntervals(CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try // rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, + reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), CP.getDstReg(), CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!CP.isPartial()) { - if (AdjustCopiesBackFrom(CP, CopyMI) || - RemoveCopyByCommutingDef(CP, CopyMI)) { - markAsJoined(CopyMI); + if (!CP.isPartial() && !CP.isPhys()) { + if (adjustCopiesBackFrom(CP, CopyMI) || + removeCopyByCommutingDef(CP, CopyMI)) { + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1212,29 +1028,21 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); } - // Remember to delete the copy instruction. - markAsJoined(CopyMI); + // Removing sub-register copies can ease the register class constraints. + // Make sure we attempt to inflate the register class of DstReg. + if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) + InflateRegs.push_back(CP.getDstReg()); - UpdateRegDefsUses(CP); + // CopyMI has been erased by joinIntervals at this point. Remove it from + // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back + // to the work list. This keeps ErasedInstrs from growing needlessly. + ErasedInstrs.erase(CopyMI); - // If we have extended the live range of a physical register, make sure we - // update live-in lists as well. - if (CP.isPhys()) { - SmallVector BlockSeq; - // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the - // ranges for this, and they are preserved. - LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg()); - for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); - I != E; ++I ) { - LIS->findLiveInMBBs(I->start, I->end, BlockSeq); - for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { - MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(CP.getDstReg())) - block.addLiveIn(CP.getDstReg()); - } - BlockSeq.clear(); - } - } + // Rewrite all SrcReg operands to DstReg. + // Also update DstReg operands to include DstIdx if it is set. + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); // SrcReg is guaranteed to be the register whose live interval that is // being merged. @@ -1244,16 +1052,56 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - LiveInterval &DstInt = LIS->getInterval(CP.getDstReg()); - dbgs() << "\tJoined. Result = "; - DstInt.print(dbgs(), TRI); - dbgs() << "\n"; + dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); + if (!CP.isPhys()) + dbgs() << LIS->getInterval(CP.getDstReg()); + dbgs() << '\n'; }); ++numJoins; return true; } +/// Attempt joining with a reserved physreg. +bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + assert(CP.isPhys() && "Must be a physreg copy"); + assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); + + assert(CP.isFlipped() && RHS.containsOneValue() && + "Invalid join with reserved register"); + + // Optimization for reserved registers like ESP. We can only merge with a + // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). + // The live range of the reserved register will look like a set of dead defs + // - we don't properly track the live range of reserved registers. + + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + return false; + } + + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + + // We don't track kills for reserved registers. + MRI->clearKillFlags(CP.getSrcReg()); + + return true; +} + /// ComputeUltimateVN - Assuming we are going to join two live intervals, /// compute what the resultant value numbers for each value in the input two /// ranges will be. This is complicated by copies between the two which can @@ -1320,144 +1168,70 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li, const TargetRegisterInfo &tri, CoalescerPair &CP, VNInfo *VNI, - LiveRange *LR, + VNInfo *OtherVNI, SmallVector &DupCopies) { // FIXME: This is very conservative. For example, we don't handle // physical registers. MachineInstr *MI = li.getInstructionFromIndex(VNI->def); - if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) + if (!MI || CP.isPartial() || CP.isPhys()) return false; - unsigned Dst = MI->getOperand(0).getReg(); - unsigned Src = MI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(Src) || - !TargetRegisterInfo::isVirtualRegister(Dst)) + unsigned A = CP.getDstReg(); + if (!TargetRegisterInfo::isVirtualRegister(A)) return false; - unsigned A = CP.getDstReg(); unsigned B = CP.getSrcReg(); - - if (B == Dst) - std::swap(A, B); - assert(Dst == A); - - VNInfo *Other = LR->valno; - const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); - - if (!OtherMI || !OtherMI->isFullCopy()) + if (!TargetRegisterInfo::isVirtualRegister(B)) return false; - unsigned OtherDst = OtherMI->getOperand(0).getReg(); - unsigned OtherSrc = OtherMI->getOperand(1).getReg(); - - if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || - !TargetRegisterInfo::isVirtualRegister(OtherDst)) + MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); + if (!OtherMI) return false; - assert(OtherDst == B); - - if (Src != OtherSrc) - return false; + if (MI->isImplicitDef()) { + DupCopies.push_back(MI); + return true; + } else { + if (!MI->isFullCopy()) + return false; + unsigned Src = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Src)) + return false; + if (!OtherMI->isFullCopy()) + return false; + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) + return false; - // If the copies use two different value numbers of X, we cannot merge - // A and B. - LiveInterval &SrcInt = li.getInterval(Src); - // getVNInfoBefore returns NULL for undef copies. In this case, the - // optimization is still safe. - if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def)) - return false; + if (Src != OtherSrc) + return false; - DupCopies.push_back(MI); + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + // getVNInfoBefore returns NULL for undef copies. In this case, the + // optimization is still safe. + if (SrcInt.getVNInfoBefore(OtherVNI->def) != + SrcInt.getVNInfoBefore(VNI->def)) + return false; - return true; + DupCopies.push_back(MI); + return true; + } } -/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// joinIntervals - Attempt to join these two intervals. On failure, this /// returns false. -bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { - LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; }); - - // If a live interval is a physical register, check for interference with any - // aliases. The interference check implemented here is a bit more conservative - // than the full interfeence check below. We allow overlapping live ranges - // only when one is a copy of the other. - if (CP.isPhys()) { - // Optimization for reserved registers like ESP. - // We can only merge with a reserved physreg if RHS has a single value that - // is a copy of CP.DstReg(). The live range of the reserved register will - // look like a set of dead defs - we don't properly track the live range of - // reserved registers. - if (RegClassInfo.isReserved(CP.getDstReg())) { - assert(CP.isFlipped() && RHS.containsOneValue() && - "Invalid join with reserved register"); - // Deny any overlapping intervals. This depends on all the reserved - // register live ranges to look like dead defs. - for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { - if (!LIS->hasInterval(*AS)) { - // Make sure at least DstReg itself exists before attempting a join. - if (*AS == CP.getDstReg()) - LIS->getOrCreateInterval(CP.getDstReg()); - continue; - } - if (RHS.overlaps(LIS->getInterval(*AS))) { - DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); - return false; - } - } - // Skip any value computations, we are not adding new values to the - // reserved register. Also skip merging the live ranges, the reserved - // register live range doesn't need to be accurate as long as all the - // defs are there. - return true; - } - - // Check if a register mask clobbers DstReg. - BitVector UsableRegs; - if (LIS->checkRegMaskInterference(RHS, UsableRegs) && - !UsableRegs.test(CP.getDstReg())) { - DEBUG(dbgs() << "\t\tRegister mask interference.\n"); - return false; - } +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + // Handle physreg joins separately. + if (CP.isPhys()) + return joinReservedPhysReg(CP); - for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ - if (!LIS->hasInterval(*AS)) - continue; - const LiveInterval &LHS = LIS->getInterval(*AS); - LiveInterval::const_iterator LI = LHS.begin(); - for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); - RI != RE; ++RI) { - LI = std::lower_bound(LI, LHS.end(), RI->start); - // Does LHS have an overlapping live range starting before RI? - if ((LI != LHS.begin() && LI[-1].end > RI->start) && - (RI->start != RI->valno->def || - !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), TRI); - dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; - }); - return false; - } - - // Check that LHS ranges beginning in this range are copies. - for (; LI != LHS.end() && LI->start < RI->end; ++LI) { - if (LI->start != LI->valno->def || - !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) { - DEBUG({ - dbgs() << "\t\tInterference from alias: "; - LHS.print(dbgs(), TRI); - dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; - }); - return false; - } - } - } - } - } + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); // Compute the final value assignment, assuming that the live ranges can be // coalesced. @@ -1468,9 +1242,11 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { SmallVector NewVNInfo; SmallVector DupCopies; + SmallVector DeadCopies; LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); - DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; }); + DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS + << '\n'); // Loop over the value numbers of the LHS, seeing if any are defined from // the RHS. @@ -1481,21 +1257,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); // The copy could be to an aliased physreg. - if (!lr) continue; + if (!OtherVNI) + continue; // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. - if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) continue; - LHSValsDefinedFromRHS[VNI] = lr->valno; + LHSValsDefinedFromRHS[VNI] = OtherVNI; } // Loop over the value numbers of the RHS, seeing if any are defined from @@ -1507,21 +1286,24 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { continue; MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); assert(MI && "Missing def"); - if (!MI->isCopyLike()) // Src not defined by a copy? + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? continue; // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); // The copy could be to an aliased physreg. - if (!lr) continue; + if (!OtherVNI) + continue; // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. - if (!CP.isCoalescable(MI) && - !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) continue; - RHSValsDefinedFromLHS[VNI] = lr->valno; + RHSValsDefinedFromLHS[VNI] = OtherVNI; } LHSValNoAssignments.resize(LHS.getNumValNums(), -1); @@ -1563,6 +1345,10 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { LiveInterval::const_iterator J = RHS.begin(); LiveInterval::const_iterator JE = RHS.end(); + // Collect interval end points that will no longer be kills. + SmallVector LHSOldKills; + SmallVector RHSOldKills; + // Skip ahead until the first place of potential sharing. if (I != IE && J != JE) { if (I->start < J->start) { @@ -1576,20 +1362,21 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { while (I != IE && J != JE) { // Determine if these two live ranges overlap. - bool Overlaps; - if (I->start < J->start) { - Overlaps = I->end > J->start; - } else { - Overlaps = J->end > I->start; - } - // If so, check value # info to determine if they are really different. - if (Overlaps) { + if (I->end > J->start && J->end > I->start) { // If the live range overlap will map to the same value number in the // result liverange, we can still coalesce them. If not, we can't. if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; + + // Extended live ranges should no longer be killed. + if (!I->end.isBlock() && I->end < J->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) + LHSOldKills.push_back(MI); + if (!J->end.isBlock() && J->end < I->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) + RHSOldKills.push_back(MI); } if (I->end < J->end) @@ -1598,47 +1385,48 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { ++J; } - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } + // Clear kill flags where live ranges are extended. + while (!LHSOldKills.empty()) + LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); + while (!RHSOldKills.empty()) + RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); if (LHSValNoAssignments.empty()) LHSValNoAssignments.push_back(-1); if (RHSValNoAssignments.empty()) RHSValNoAssignments.push_back(-1); + // Now erase all the redundant copies. + for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { + MachineInstr *MI = DeadCopies[i]; + if (!ErasedInstrs.insert(MI)) + continue; + DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) + << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + SmallVector SourceRegisters; for (SmallVector::iterator I = DupCopies.begin(), E = DupCopies.end(); I != E; ++I) { MachineInstr *MI = *I; + if (!ErasedInstrs.insert(MI)) + continue; - // We have pretended that the assignment to B in + // If MI is a copy, then we have pretended that the assignment to B in // A = X // B = X // was actually a copy from A. Now that we decided to coalesce A and B, // transform the code into // A = X - // X = X - // and mark the X as coalesced to keep the illusion. - unsigned Src = MI->getOperand(1).getReg(); - SourceRegisters.push_back(Src); - MI->getOperand(0).substVirtReg(Src, 0, *TRI); - - markAsJoined(MI); + // In the case of the implicit_def, we just have to remove it. + if (!MI->isImplicitDef()) { + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); } // If B = X was the last use of X in a liverange, we have to shrink it now @@ -1678,73 +1466,58 @@ namespace { }; } -void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, - std::vector &TryAgain) { - DEBUG(dbgs() << MBB->getName() << ":\n"); - - SmallVector VirtCopies; - SmallVector PhysCopies; - SmallVector ImpDefCopies; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E;) { - MachineInstr *Inst = MII++; - - // If this isn't a copy nor a extract_subreg, we can't join intervals. - unsigned SrcReg, DstReg; - if (Inst->isCopy()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isSubregToReg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - } else +// Try joining WorkList copies starting from index From. +// Null out any successful joins. +bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { + assert(From <= WorkList.size() && "Out of range"); + bool Progress = false; + for (unsigned i = From, e = WorkList.size(); i != e; ++i) { + if (!WorkList[i]) continue; - - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(Inst); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(Inst); - else - VirtCopies.push_back(Inst); - } - - // Try coalescing implicit copies and insert_subreg first, - // followed by copies to / from physical registers, then finally copies - // from virtual registers to virtual registers. - for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = ImpDefCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = PhysCopies[i]; - bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); - } - for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { - MachineInstr *TheCopy = VirtCopies[i]; + // Skip instruction pointers that have already been erased, for example by + // dead code elimination. + if (ErasedInstrs.erase(WorkList[i])) { + WorkList[i] = 0; + continue; + } bool Again = false; - if (!JoinCopy(TheCopy, Again)) - if (Again) - TryAgain.push_back(TheCopy); + bool Success = joinCopy(WorkList[i], Again); + Progress |= Success; + if (Success || !Again) + WorkList[i] = 0; } + return Progress; } -void RegisterCoalescer::joinIntervals() { +void +RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + // Collect all copy-like instructions in MBB. Don't start coalescing anything + // yet, it might invalidate the iterator. + const unsigned PrevSize = WorkList.size(); + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) + if (MII->isCopyLike()) + WorkList.push_back(MII); + + // Try coalescing the collected copies immediately, and remove the nulls. + // This prevents the WorkList from getting too large since most copies are + // joinable on the first attempt. + if (copyCoalesceWorkList(PrevSize)) + WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), + (MachineInstr*)0), WorkList.end()); +} + +void RegisterCoalescer::joinAllIntervals() { DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + assert(WorkList.empty() && "Old data still around."); - std::vector TryAgainList; if (Loops->empty()) { // If there are no loops in the function, join intervals in function order. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - CopyCoalesceInMBB(I, TryAgainList); + copyCoalesceInMBB(I); } else { // Otherwise, join intervals in inner loops before other intervals. // Unfortunately we can't just iterate over loop hierarchy here because @@ -1763,34 +1536,20 @@ void RegisterCoalescer::joinIntervals() { // Finally, join intervals in loop nest order. for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - CopyCoalesceInMBB(MBBs[i].second, TryAgainList); + copyCoalesceInMBB(MBBs[i].second); } // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - MachineInstr *&TheCopy = TryAgainList[i]; - if (!TheCopy) - continue; - - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy= 0; // Mark this one as done. - ProgressMade = true; - } - } - } + while (copyCoalesceWorkList()) + /* empty */ ; } void RegisterCoalescer::releaseMemory() { - JoinedCopies.clear(); - ReMatCopies.clear(); - ReMatDefs.clear(); + ErasedInstrs.clear(); + WorkList.clear(); + DeadDefs.clear(); + InflateRegs.clear(); } bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { @@ -1814,138 +1573,11 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { RegClassInfo.runOnMachineFunction(fn); // Join (coalesce) intervals if requested. - if (EnableJoining) { - joinIntervals(); - DEBUG({ - dbgs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); - I != E; ++I){ - I->second->print(dbgs(), TRI); - dbgs() << "\n"; - } - }); - } - - // Perform a final pass over the instructions and compute spill weights - // and remove identity moves. - SmallVector DeadDefs, InflateRegs; - for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* mbb = mbbi; - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); - mii != mie; ) { - MachineInstr *MI = mii; - if (JoinedCopies.count(MI)) { - // Delete all coalesced copies. - bool DoDelete = true; - assert(MI->isCopyLike() && "Unrecognized copy instruction"); - unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); - - // Collect candidates for register class inflation. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) - InflateRegs.push_back(SrcReg); - if (TargetRegisterInfo::isVirtualRegister(DstReg) && - RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) - InflateRegs.push_back(DstReg); - - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - MI->getNumOperands() > 2) - // Do not delete extract_subreg, insert_subreg of physical - // registers unless the definition is dead. e.g. - // %DO = INSERT_SUBREG %D0, %S0, 1 - // or else the scavenger may complain. LowerSubregs will - // delete them later. - DoDelete = false; - - if (MI->allDefsAreDead()) { - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - LIS->hasInterval(SrcReg)) - LIS->shrinkToUses(&LIS->getInterval(SrcReg)); - DoDelete = true; - } - if (!DoDelete) { - // We need the instruction to adjust liveness, so make it a KILL. - if (MI->isSubregToReg()) { - MI->RemoveOperand(3); - MI->RemoveOperand(1); - } - MI->setDesc(TII->get(TargetOpcode::KILL)); - mii = llvm::next(mii); - } else { - LIS->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; - } - continue; - } - - // Now check if this is a remat'ed def instruction which is now dead. - if (ReMatDefs.count(MI)) { - bool isDead = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - DeadDefs.push_back(Reg); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // Remat may also enable register class inflation. - if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) - InflateRegs.push_back(Reg); - } - if (MO.isDead()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI->use_nodbg_empty(Reg)) { - isDead = false; - break; - } - } - if (isDead) { - while (!DeadDefs.empty()) { - unsigned DeadDef = DeadDefs.back(); - DeadDefs.pop_back(); - RemoveDeadDef(LIS->getInterval(DeadDef), MI); - } - LIS->RemoveMachineInstrFromMaps(mii); - mii = mbbi->erase(mii); - continue; - } else - DeadDefs.clear(); - } - - ++mii; - - // Check for now unnecessary kill flags. - if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - unsigned reg = MO.getReg(); - if (!reg || !LIS->hasInterval(reg)) continue; - if (!LIS->getInterval(reg).killedAt(DefIdx)) { - MO.setIsKill(false); - continue; - } - // When leaving a kill flag on a physreg, check if any subregs should - // remain alive. - if (!TargetRegisterInfo::isPhysicalRegister(reg)) - continue; - for (const uint16_t *SR = TRI->getSubRegisters(reg); - unsigned S = *SR; ++SR) - if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) - MI->addRegisterDefined(S, TRI); - } - } - } + if (EnableJoining) + joinAllIntervals(); // After deleting a lot of copies, register classes may be less constrained. - // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> + // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> // DPR inflation. array_pod_sort(InflateRegs.begin(), InflateRegs.end()); InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 310b933..8a6df98 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -26,7 +26,6 @@ namespace llvm { /// two registers can be coalesced, CoalescerPair can determine if a copy /// instruction would become an identity copy after coalescing. class CoalescerPair { - const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; /// DstReg - The register that will be left after coalescing. It can be a @@ -36,10 +35,13 @@ namespace llvm { /// SrcReg - the virtual register that will be coalesced into dstReg. unsigned SrcReg; - /// subReg_ - The subregister index of srcReg in DstReg. It is possible the - /// coalesce SrcReg into a subreg of the larger DstReg when DstReg is a - /// virtual register. - unsigned SubIdx; + /// DstIdx - The sub-register index of the old DstReg in the new coalesced + /// register. + unsigned DstIdx; + + /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced + /// register. + unsigned SrcIdx; /// Partial - True when the original copy was a partial subregister copy. bool Partial; @@ -52,12 +54,13 @@ namespace llvm { bool Flipped; /// NewRC - The register class of the coalesced register, or NULL if DstReg - /// is a physreg. + /// is a physreg. This register class may be a super-register of both + /// SrcReg and DstReg. const TargetRegisterClass *NewRC; public: - CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) - : TII(tii), TRI(tri), DstReg(0), SrcReg(0), SubIdx(0), + CoalescerPair(const TargetRegisterInfo &tri) + : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} /// setRegisters - set registers to match the copy instruction MI. Return @@ -94,9 +97,13 @@ namespace llvm { /// getSrcReg - Return the virtual register that will be coalesced away. unsigned getSrcReg() const { return SrcReg; } - /// getSubIdx - Return the subregister index in DstReg that SrcReg will be - /// coalesced into, or 0. - unsigned getSubIdx() const { return SubIdx; } + /// getDstIdx - Return the subregister index that DstReg will be coalesced + /// into, or 0. + unsigned getDstIdx() const { return DstIdx; } + + /// getSrcIdx - Return the subregister index that SrcReg will be coalesced + /// into, or 0. + unsigned getSrcIdx() const { return SrcIdx; } /// getNewRC - Return the register class of the coalesced register. const TargetRegisterClass *getNewRC() const { return NewRC; } diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp new file mode 100644 index 0000000..43448c8 --- /dev/null +++ b/lib/CodeGen/RegisterPressure.cpp @@ -0,0 +1,841 @@ +//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterPressure class which can be used to track +// MachineInstr level register pressure. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// Increase register pressure for each set impacted by this register class. +static void increaseSetPressure(std::vector &CurrSetPressure, + std::vector &MaxSetPressure, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; + for (const int *PSet = TRI->getRegClassPressureSets(RC); + *PSet != -1; ++PSet) { + CurrSetPressure[*PSet] += Weight; + if (&CurrSetPressure != &MaxSetPressure + && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { + MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; + } + } +} + +/// Decrease register pressure for each set impacted by this register class. +static void decreaseSetPressure(std::vector &CurrSetPressure, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; + for (const int *PSet = TRI->getRegClassPressureSets(RC); + *PSet != -1; ++PSet) { + assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSet] -= Weight; + } +} + +/// Directly increase pressure only within this RegisterPressure result. +void RegisterPressure::increase(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI); +} + +/// Directly decrease pressure only within this RegisterPressure result. +void RegisterPressure::decrease(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + decreaseSetPressure(MaxSetPressure, RC, TRI); +} + +void RegisterPressure::dump(const TargetRegisterInfo *TRI) { + dbgs() << "Live In: "; + for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; + dbgs() << '\n'; + dbgs() << "Live Out: "; + for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) + dbgs() << PrintReg(LiveOutRegs[i], TRI) << " "; + dbgs() << '\n'; + for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) { + if (MaxSetPressure[i] != 0) + dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i] + << '\n'; + } +} + +/// Increase the current pressure as impacted by these physical registers and +/// bump the high water mark if needed. +void RegPressureTracker::increasePhysRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getMinimalPhysRegClass(Regs[I]), TRI); +} + +/// Simply decrease the current pressure as impacted by these physcial +/// registers. +void RegPressureTracker::decreasePhysRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]), + TRI); +} + +/// Increase the current pressure as impacted by these virtual registers and +/// bump the high water mark if needed. +void RegPressureTracker::increaseVirtRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + MRI->getRegClass(Regs[I]), TRI); +} + +/// Simply decrease the current pressure as impacted by these virtual registers. +void RegPressureTracker::decreaseVirtRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void IntervalPressure::reset() { + TopIdx = BottomIdx = SlotIndex(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void RegionPressure::reset() { + TopPos = BottomPos = MachineBasicBlock::const_iterator(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// If the current top is not less than or equal to the next index, open it. +/// We happen to need the SlotIndex for the next top for pressure update. +void IntervalPressure::openTop(SlotIndex NextTop) { + if (TopIdx <= NextTop) + return; + TopIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current top is the previous instruction (before receding), open it. +void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) { + if (TopPos != PrevTop) + return; + TopPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// If the current bottom is not greater than the previous index, open it. +void IntervalPressure::openBottom(SlotIndex PrevBottom) { + if (BottomIdx > PrevBottom) + return; + BottomIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current bottom is the previous instr (before advancing), open it. +void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { + if (BottomPos != PrevBottom) + return; + BottomPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// Setup the RegPressureTracker. +/// +/// TODO: Add support for pressure without LiveIntervals. +void RegPressureTracker::init(const MachineFunction *mf, + const RegisterClassInfo *rci, + const LiveIntervals *lis, + const MachineBasicBlock *mbb, + MachineBasicBlock::const_iterator pos) +{ + MF = mf; + TRI = MF->getTarget().getRegisterInfo(); + RCI = rci; + MRI = &MF->getRegInfo(); + MBB = mbb; + + if (RequireIntervals) { + assert(lis && "IntervalPressure requires LiveIntervals"); + LIS = lis; + } + + CurrPos = pos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()) + ++CurrPos; + + CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); + + if (RequireIntervals) + static_cast(P).reset(); + else + static_cast(P).reset(); + P.MaxSetPressure = CurrSetPressure; + + LivePhysRegs.clear(); + LivePhysRegs.setUniverse(TRI->getNumRegs()); + LiveVirtRegs.clear(); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); +} + +/// Does this pressure result have a valid top position and live ins. +bool RegPressureTracker::isTopClosed() const { + if (RequireIntervals) + return static_cast(P).TopIdx.isValid(); + return (static_cast(P).TopPos == + MachineBasicBlock::const_iterator()); +} + +/// Does this pressure result have a valid bottom position and live outs. +bool RegPressureTracker::isBottomClosed() const { + if (RequireIntervals) + return static_cast(P).BottomIdx.isValid(); + return (static_cast(P).BottomPos == + MachineBasicBlock::const_iterator()); +} + +/// Set the boundary for the top of the region and summarize live ins. +void RegPressureTracker::closeTop() { + if (RequireIntervals) + static_cast(P).TopIdx = + LIS->getInstructionIndex(CurrPos).getRegSlot(); + else + static_cast(P).TopPos = CurrPos; + + assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); + P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); + P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + for (SparseSet::const_iterator I = + LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + P.LiveInRegs.push_back(*I); + std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); + P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), + P.LiveInRegs.end()); +} + +/// Set the boundary for the bottom of the region and summarize live outs. +void RegPressureTracker::closeBottom() { + if (RequireIntervals) + if (CurrPos == MBB->end()) + static_cast(P).BottomIdx = LIS->getMBBEndIdx(MBB); + else + static_cast(P).BottomIdx = + LIS->getInstructionIndex(CurrPos).getRegSlot(); + else + static_cast(P).BottomPos = CurrPos; + + assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); + P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); + P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + for (SparseSet::const_iterator I = + LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + P.LiveOutRegs.push_back(*I); + std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); + P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), + P.LiveOutRegs.end()); +} + +/// Finalize the region boundaries and record live ins and live outs. +void RegPressureTracker::closeRegion() { + if (!isTopClosed() && !isBottomClosed()) { + assert(LivePhysRegs.empty() && LiveVirtRegs.empty() && + "no region boundary"); + return; + } + if (!isBottomClosed()) + closeBottom(); + else if (!isTopClosed()) + closeTop(); + // If both top and bottom are closed, do nothing. +} + +/// Return true if Reg aliases a register in Regs SparseSet. +static bool hasRegAlias(unsigned Reg, SparseSet &Regs, + const TargetRegisterInfo *TRI) { + assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (Regs.count(*AI)) + return true; + return false; +} + +/// Return true if Reg aliases a register in unsorted Regs SmallVector. +/// This is only valid for physical registers. +static SmallVectorImpl::iterator +findRegAlias(unsigned Reg, SmallVectorImpl &Regs, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SmallVectorImpl::iterator I = + std::find(Regs.begin(), Regs.end(), *AI); + if (I != Regs.end()) + return I; + } + return Regs.end(); +} + +/// Return true if Reg can be inserted into Regs SmallVector. For virtual +/// register, do a linear search. For physical registers check for aliases. +static SmallVectorImpl::iterator +findReg(unsigned Reg, bool isVReg, SmallVectorImpl &Regs, + const TargetRegisterInfo *TRI) { + if(isVReg) + return std::find(Regs.begin(), Regs.end(), Reg); + return findRegAlias(Reg, Regs, TRI); +} + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +template +struct RegisterOperands { + SmallVector Uses; + SmallVector Defs; + SmallVector DeadDefs; + + /// Push this operand's register onto the correct vector. + void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) { + if (MO.readsReg()) { + if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end()) + Uses.push_back(MO.getReg()); + } + if (MO.isDef()) { + if (MO.isDead()) { + if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end()) + DeadDefs.push_back(MO.getReg()); + } + else { + if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) + Defs.push_back(MO.getReg()); + } + } + } +}; +typedef RegisterOperands PhysRegOperands; +typedef RegisterOperands VirtRegOperands; + +/// Collect physical and virtual register operands. +static void collectOperands(const MachineInstr *MI, + PhysRegOperands &PhysRegOpers, + VirtRegOperands &VirtRegOpers, + const TargetRegisterInfo *TRI, + const RegisterClassInfo *RCI) { + for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { + const MachineOperand &MO = *OperI; + if (!MO.isReg() || !MO.getReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegOpers.collect(MO, TRI); + else if (RCI->isAllocatable(MO.getReg())) + PhysRegOpers.collect(MO, TRI); + } + // Remove redundant physreg dead defs. + for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) { + unsigned Reg = PhysRegOpers.DeadDefs[i-1]; + if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end()) + PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]); + } +} + +/// Force liveness of registers. +void RegPressureTracker::addLiveRegs(ArrayRef Regs) { + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + if (TargetRegisterInfo::isVirtualRegister(Regs[i])) { + if (LiveVirtRegs.insert(Regs[i]).second) + increaseVirtRegPressure(Regs[i]); + } + else { + if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) { + LivePhysRegs.insert(Regs[i]); + increasePhysRegPressure(Regs[i]); + } + } + } +} + +/// Add PhysReg to the live in set and increase max pressure. +void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) { + assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); + if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end()) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); +} + +/// Add PhysReg to the live out set and increase max pressure. +void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) { + assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); + if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end()) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); +} + +/// Add VirtReg to the live in set and increase max pressure. +void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) { + assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); + if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) != + P.LiveInRegs.end()) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + P.increase(MRI->getRegClass(Reg), TRI); +} + +/// Add VirtReg to the live out set and increase max pressure. +void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) { + assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); + if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) != + P.LiveOutRegs.end()) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + P.increase(MRI->getRegClass(Reg), TRI); +} + +/// Recede across the previous instruction. +bool RegPressureTracker::recede() { + // Check for the top of the analyzable region. + if (CurrPos == MBB->begin()) { + closeRegion(); + return false; + } + if (!isBottomClosed()) + closeBottom(); + + // Open the top of the region using block iterators. + if (!RequireIntervals && isTopClosed()) + static_cast(P).openTop(CurrPos); + + // Find the previous instruction. + do + --CurrPos; + while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + + if (CurrPos->isDebugValue()) { + closeRegion(); + return false; + } + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the top of the region using slot indexes. + if (RequireIntervals && isTopClosed()) + static_cast(P).openTop(SlotIdx); + + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Kill liveness at live defs. + // TODO: consider earlyclobbers? + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (LivePhysRegs.erase(Reg)) + decreasePhysRegPressure(Reg); + else + discoverPhysLiveOut(Reg); + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (LiveVirtRegs.erase(Reg)) + decreaseVirtRegPressure(Reg); + else + discoverVirtLiveOut(Reg); + } + + // Generate liveness for uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { + increasePhysRegPressure(Reg); + LivePhysRegs.insert(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (!LiveVirtRegs.count(Reg)) { + // Adjust liveouts if LiveIntervals are available. + if (RequireIntervals) { + const LiveInterval *LI = &LIS->getInterval(Reg); + if (!LI->killedAt(SlotIdx)) + discoverVirtLiveOut(Reg); + } + increaseVirtRegPressure(Reg); + LiveVirtRegs.insert(Reg); + } + } + return true; +} + +/// Advance across the current instruction. +bool RegPressureTracker::advance() { + // Check for the bottom of the analyzable region. + if (CurrPos == MBB->end()) { + closeRegion(); + return false; + } + if (!isTopClosed()) + closeTop(); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the bottom of the region using slot indexes. + if (isBottomClosed()) { + if (RequireIntervals) + static_cast(P).openBottom(SlotIdx); + else + static_cast(P).openBottom(CurrPos); + } + + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Kill liveness at last uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) + discoverPhysLiveIn(Reg); + else { + // Allocatable physregs are always single-use before regalloc. + decreasePhysRegPressure(Reg); + LivePhysRegs.erase(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (RequireIntervals) { + const LiveInterval *LI = &LIS->getInterval(Reg); + if (LI->killedAt(SlotIdx)) { + if (LiveVirtRegs.erase(Reg)) + decreaseVirtRegPressure(Reg); + else + discoverVirtLiveIn(Reg); + } + } + else if (!LiveVirtRegs.count(Reg)) { + discoverVirtLiveIn(Reg); + increaseVirtRegPressure(Reg); + } + } + + // Generate liveness for defs. + for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Defs[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { + increasePhysRegPressure(Reg); + LivePhysRegs.insert(Reg); + } + } + for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Defs[i]; + if (LiveVirtRegs.insert(Reg).second) + increaseVirtRegPressure(Reg); + } + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Find the next instruction. + do + ++CurrPos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()); + return true; +} + +/// Find the max change in excess pressure across all sets. +static void computeExcessPressureDelta(ArrayRef OldPressureVec, + ArrayRef NewPressureVec, + RegPressureDelta &Delta, + const TargetRegisterInfo *TRI) { + int ExcessUnits = 0; + unsigned PSetID = ~0U; + for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { + unsigned POld = OldPressureVec[i]; + unsigned PNew = NewPressureVec[i]; + int PDiff = (int)PNew - (int)POld; + if (!PDiff) // No change in this set in the common case. + continue; + // Only consider change beyond the limit. + unsigned Limit = TRI->getRegPressureSetLimit(i); + if (Limit > POld) { + if (Limit > PNew) + PDiff = 0; // Under the limit + else + PDiff = PNew - Limit; // Just exceeded limit. + } + else if (Limit > PNew) + PDiff = Limit - POld; // Just obeyed limit. + + if (std::abs(PDiff) > std::abs(ExcessUnits)) { + ExcessUnits = PDiff; + PSetID = i; + } + } + Delta.Excess.PSetID = PSetID; + Delta.Excess.UnitIncrease = ExcessUnits; +} + +/// Find the max change in max pressure that either surpasses a critical PSet +/// limit or exceeds the current MaxPressureLimit. +/// +/// FIXME: comparing each element of the old and new MaxPressure vectors here is +/// silly. It's done now to demonstrate the concept but will go away with a +/// RegPressureTracker API change to work with pressure differences. +static void computeMaxPressureDelta(ArrayRef OldMaxPressureVec, + ArrayRef NewMaxPressureVec, + ArrayRef CriticalPSets, + ArrayRef MaxPressureLimit, + RegPressureDelta &Delta) { + Delta.CriticalMax = PressureElement(); + Delta.CurrentMax = PressureElement(); + + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { + unsigned POld = OldMaxPressureVec[i]; + unsigned PNew = NewMaxPressureVec[i]; + if (PNew == POld) // No change in this set in the common case. + continue; + + while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; + if (PDiff > Delta.CriticalMax.UnitIncrease) { + Delta.CriticalMax.PSetID = i; + Delta.CriticalMax.UnitIncrease = PDiff; + } + } + + // Find the greatest increase above MaxPressureLimit. + // (Ignores negative MDiff). + int MDiff = (int)PNew - (int)MaxPressureLimit[i]; + if (MDiff > Delta.CurrentMax.UnitIncrease) { + Delta.CurrentMax.PSetID = i; + Delta.CurrentMax.UnitIncrease = PNew; + } + } +} + +/// Record the upward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { + // Account for register pressure similar to RegPressureTracker::recede(). + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Boost max pressure for all dead defs together. + // Since CurrSetPressure and MaxSetPressure + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + + // Kill liveness at live defs. + decreasePhysRegPressure(PhysRegOpers.Defs); + decreaseVirtRegPressure(VirtRegOpers.Defs); + + // Generate liveness for uses. + for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = PhysRegOpers.Uses[i]; + if (!hasRegAlias(Reg, LivePhysRegs, TRI)) + increasePhysRegPressure(Reg); + } + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + if (!LiveVirtRegs.count(Reg)) + increaseVirtRegPressure(Reg); + } +} + +/// Consider the pressure increase caused by traversing this instruction +/// bottom-up. Find the pressure set with the most change beyond its pressure +/// limit based on the tracker's current pressure, and return the change in +/// number of register units of that pressure set introduced by this +/// instruction. +/// +/// This assumes that the current LiveOut set is sufficient. +/// +/// FIXME: This is expensive for an on-the-fly query. We need to cache the +/// result per-SUnit with enough information to adjust for the current +/// scheduling position. But this works as a proof of concept. +void RegPressureTracker:: +getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef CriticalPSets, + ArrayRef MaxPressureLimit) { + // Snapshot Pressure. + // FIXME: The snapshot heap space should persist. But I'm planning to + // summarize the pressure effect so we don't need to snapshot at all. + std::vector SavedPressure = CurrSetPressure; + std::vector SavedMaxPressure = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). +static bool findUseBetween(unsigned Reg, + SlotIndex PriorUseIdx, SlotIndex NextUseIdx, + const MachineRegisterInfo *MRI, + const LiveIntervals *LIS) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); + UI != UE; UI.skipInstruction()) { + const MachineInstr* MI = &*UI; + SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; + } + return false; +} + +/// Record the downward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { + // Account for register pressure similar to RegPressureTracker::recede(). + PhysRegOperands PhysRegOpers; + VirtRegOperands VirtRegOpers; + collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, RCI); + + // Kill liveness at last uses. Assume allocatable physregs are single-use + // rather than checking LiveIntervals. + decreasePhysRegPressure(PhysRegOpers.Uses); + if (RequireIntervals) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = VirtRegOpers.Uses[i]; + const LiveInterval *LI = &LIS->getInterval(Reg); + // FIXME: allow the caller to pass in the list of vreg uses that remain to + // be bottom-scheduled to avoid searching uses at each query. + SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + if (LI->killedAt(SlotIdx) + && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseVirtRegPressure(Reg); + } + } + } + + // Generate liveness for defs. + increasePhysRegPressure(PhysRegOpers.Defs); + increaseVirtRegPressure(VirtRegOpers.Defs); + + // Boost pressure for all dead defs together. + increasePhysRegPressure(PhysRegOpers.DeadDefs); + increaseVirtRegPressure(VirtRegOpers.DeadDefs); + decreasePhysRegPressure(PhysRegOpers.DeadDefs); + decreaseVirtRegPressure(VirtRegOpers.DeadDefs); +} + +/// Consider the pressure increase caused by traversing this instruction +/// top-down. Find the register class with the most change in its pressure limit +/// based on the tracker's current pressure, and return the number of excess +/// register units of that pressure set introduced by this instruction. +/// +/// This assumes that the current LiveIn set is sufficient. +void RegPressureTracker:: +getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef CriticalPSets, + ArrayRef MaxPressureLimit) { + // Snapshot Pressure. + std::vector SavedPressure = CurrSetPressure; + std::vector SavedMaxPressure = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.UnitIncrease >= 0 && + Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Get the pressure of each PSet after traversing this instruction bottom-up. +void RegPressureTracker:: +getUpwardPressure(const MachineInstr *MI, + std::vector &PressureResult, + std::vector &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} + +/// Get the pressure of each PSet after traversing this instruction top-down. +void RegPressureTracker:: +getDownwardPressure(const MachineInstr *MI, + std::vector &PressureResult, + std::vector &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 03bd82e..d673794 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -37,16 +37,13 @@ using namespace llvm; void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - RegsAvailable.reset(SubReg); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RegsAvailable.reset(*SubRegs); } bool RegScavenger::isAliasUsed(unsigned Reg) const { - if (isUsed(Reg)) - return true; - for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R) - if (isUsed(*R)) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (isUsed(*AI)) return true; return false; } @@ -114,8 +111,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(Reg); - for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++) - BV.set(*R); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + BV.set(*SubRegs); } void RegScavenger::forward() { @@ -195,9 +192,8 @@ void RegScavenger::forward() { // Ideally we would like a way to model this, but leaving the // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; - for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - if (isUsed(SubReg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + if (isUsed(*SubRegs)) { SubUsed = true; break; } @@ -296,9 +292,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, isVirtKillInsn = true; continue; } - Candidates.reset(MO.getReg()); - for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++) - Candidates.reset(*R); + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); } // If we're not in a virtual reg's live range, this is a valid // restore point. diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp deleted file mode 100644 index 6020908..0000000 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "rendermf" - -#include "RenderMachineFunction.h" - -#include "VirtRegMap.h" - -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" - -#include - -using namespace llvm; - -char RenderMachineFunction::ID = 0; -INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenderMachineFunction, "rendermf", - "Render machine functions (and related info) to HTML pages", - false, false) - -static cl::opt -outputFileSuffix("rmf-file-suffix", - cl::desc("Appended to function name to get output file name " - "(default: \".html\")"), - cl::init(".html"), cl::Hidden); - -static cl::opt -machineFuncsToRender("rmf-funcs", - cl::desc("Comma separated list of functions to render" - ", or \"*\"."), - cl::init(""), cl::Hidden); - -static cl::opt -pressureClasses("rmf-classes", - cl::desc("Register classes to render pressure for."), - cl::init(""), cl::Hidden); - -static cl::opt -showIntervals("rmf-intervals", - cl::desc("Live intervals to show alongside code."), - cl::init(""), cl::Hidden); - -static cl::opt -filterEmpty("rmf-filter-empty-intervals", - cl::desc("Don't display empty intervals."), - cl::init(true), cl::Hidden); - -static cl::opt -showEmptyIndexes("rmf-empty-indexes", - cl::desc("Render indexes not associated with instructions or " - "MBB starts."), - cl::init(false), cl::Hidden); - -static cl::opt -useFancyVerticals("rmf-fancy-verts", - cl::desc("Use SVG for vertical text."), - cl::init(true), cl::Hidden); - -static cl::opt -prettyHTML("rmf-pretty-html", - cl::desc("Pretty print HTML. For debugging the renderer only.."), - cl::init(false), cl::Hidden); - - -namespace llvm { - - bool MFRenderingOptions::renderingOptionsProcessed; - std::set MFRenderingOptions::mfNamesToRender; - bool MFRenderingOptions::renderAllMFs = false; - - std::set MFRenderingOptions::classNamesToRender; - bool MFRenderingOptions::renderAllClasses = false; - - std::set > - MFRenderingOptions::intervalNumsToRender; - unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly; - - template - void MFRenderingOptions::splitComaSeperatedList(const std::string &s, - OutputItr outItr) { - std::string::const_iterator curPos = s.begin(); - std::string::const_iterator nextComa = std::find(curPos, s.end(), ','); - while (nextComa != s.end()) { - std::string elem; - std::copy(curPos, nextComa, std::back_inserter(elem)); - *outItr = elem; - ++outItr; - curPos = llvm::next(nextComa); - nextComa = std::find(curPos, s.end(), ','); - } - - if (curPos != s.end()) { - std::string elem; - std::copy(curPos, s.end(), std::back_inserter(elem)); - *outItr = elem; - ++outItr; - } - } - - void MFRenderingOptions::processOptions() { - if (!renderingOptionsProcessed) { - processFuncNames(); - processRegClassNames(); - processIntervalNumbers(); - renderingOptionsProcessed = true; - } - } - - void MFRenderingOptions::processFuncNames() { - if (machineFuncsToRender == "*") { - renderAllMFs = true; - } else { - splitComaSeperatedList(machineFuncsToRender, - std::inserter(mfNamesToRender, - mfNamesToRender.begin())); - } - } - - void MFRenderingOptions::processRegClassNames() { - if (pressureClasses == "*") { - renderAllClasses = true; - } else { - splitComaSeperatedList(pressureClasses, - std::inserter(classNamesToRender, - classNamesToRender.begin())); - } - } - - void MFRenderingOptions::processIntervalNumbers() { - std::set intervalRanges; - splitComaSeperatedList(showIntervals, - std::inserter(intervalRanges, - intervalRanges.begin())); - std::for_each(intervalRanges.begin(), intervalRanges.end(), - processIntervalRange); - } - - void MFRenderingOptions::processIntervalRange( - const std::string &intervalRangeStr) { - if (intervalRangeStr == "*") { - intervalTypesToRender |= All; - } else if (intervalRangeStr == "virt-nospills*") { - intervalTypesToRender |= VirtNoSpills; - } else if (intervalRangeStr == "spills*") { - intervalTypesToRender |= VirtSpills; - } else if (intervalRangeStr == "virt*") { - intervalTypesToRender |= AllVirt; - } else if (intervalRangeStr == "phys*") { - intervalTypesToRender |= AllPhys; - } else { - std::istringstream iss(intervalRangeStr); - unsigned reg1, reg2; - if ((iss >> reg1 >> std::ws)) { - if (iss.eof()) { - intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1)); - } else { - char c; - iss >> c; - if (c == '-' && (iss >> reg2)) { - intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1)); - } else { - dbgs() << "Warning: Invalid interval range \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } else { - dbgs() << "Warning: Invalid interval number \"" - << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n"; - } - } - } - - void MFRenderingOptions::setup(MachineFunction *mf, - const TargetRegisterInfo *tri, - LiveIntervals *lis, - const RenderMachineFunction *rmf) { - this->mf = mf; - this->tri = tri; - this->lis = lis; - this->rmf = rmf; - - clear(); - } - - void MFRenderingOptions::clear() { - regClassesTranslatedToCurrentFunction = false; - regClassSet.clear(); - - intervalsTranslatedToCurrentFunction = false; - intervalSet.clear(); - } - - void MFRenderingOptions::resetRenderSpecificOptions() { - intervalSet.clear(); - intervalsTranslatedToCurrentFunction = false; - } - - bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const { - processOptions(); - - return (renderAllMFs || - mfNamesToRender.find(mf->getFunction()->getName()) != - mfNamesToRender.end()); - } - - const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{ - translateRegClassNamesToCurrentFunction(); - return regClassSet; - } - - const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const { - translateIntervalNumbersToCurrentFunction(); - return intervalSet; - } - - bool MFRenderingOptions::renderEmptyIndexes() const { - return showEmptyIndexes; - } - - bool MFRenderingOptions::fancyVerticals() const { - return useFancyVerticals; - } - - void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const { - if (!regClassesTranslatedToCurrentFunction) { - processOptions(); - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - if (renderAllClasses || - classNamesToRender.find(trc->getName()) != - classNamesToRender.end()) { - regClassSet.insert(trc); - } - } - regClassesTranslatedToCurrentFunction = true; - } - } - - void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const { - if (!intervalsTranslatedToCurrentFunction) { - processOptions(); - - // If we're not just doing explicit then do a copy over all matching - // types. - if (intervalTypesToRender != ExplicitOnly) { - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (filterEmpty && li->empty()) - continue; - - if ((TargetRegisterInfo::isPhysicalRegister(li->reg) && - (intervalTypesToRender & AllPhys))) { - intervalSet.insert(li); - } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) { - if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || - ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) { - intervalSet.insert(li); - } - } - } - } - - // If we need to process the explicit list... - if (intervalTypesToRender != All) { - for (std::set >::const_iterator - regRangeItr = intervalNumsToRender.begin(), - regRangeEnd = intervalNumsToRender.end(); - regRangeItr != regRangeEnd; ++regRangeItr) { - const std::pair &range = *regRangeItr; - for (unsigned reg = range.first; reg != range.second; ++reg) { - if (lis->hasInterval(reg)) { - intervalSet.insert(&lis->getInterval(reg)); - } - } - } - } - - intervalsTranslatedToCurrentFunction = true; - } - } - - // ---------- TargetRegisterExtraInformation implementation ---------- - - TargetRegisterExtraInfo::TargetRegisterExtraInfo() - : mapsPopulated(false) { - } - - void TargetRegisterExtraInfo::setup(MachineFunction *mf, - MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, - LiveIntervals *lis) { - this->mf = mf; - this->mri = mri; - this->tri = tri; - this->lis = lis; - } - - void TargetRegisterExtraInfo::reset() { - if (!mapsPopulated) { - initWorst(); - //initBounds(); - initCapacity(); - mapsPopulated = true; - } - - resetPressureAndLiveStates(); - } - - void TargetRegisterExtraInfo::clear() { - prWorst.clear(); - vrWorst.clear(); - capacityMap.clear(); - pressureMap.clear(); - //liveStatesMap.clear(); - mapsPopulated = false; - } - - void TargetRegisterExtraInfo::initWorst() { - assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() && - "Worst map already initialised?"); - - // Start with the physical registers. - for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) { - WorstMapLine &pregLine = prWorst[preg]; - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - unsigned numOverlaps = 0; - for (TargetRegisterClass::iterator rItr = trc->begin(), - rEnd = trc->end(); - rItr != rEnd; ++rItr) { - unsigned trcPReg = *rItr; - if (tri->regsOverlap(preg, trcPReg)) - ++numOverlaps; - } - - pregLine[trc] = numOverlaps; - } - } - - // Now the register classes. - for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rc1Itr != rcEnd; ++rc1Itr) { - const TargetRegisterClass *trc1 = *rc1Itr; - WorstMapLine &classLine = vrWorst[trc1]; - - for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin(); - rc2Itr != rcEnd; ++rc2Itr) { - const TargetRegisterClass *trc2 = *rc2Itr; - - unsigned worst = 0; - - for (TargetRegisterClass::iterator trc1Itr = trc1->begin(), - trc1End = trc1->end(); - trc1Itr != trc1End; ++trc1Itr) { - unsigned trc1Reg = *trc1Itr; - unsigned trc1RegWorst = 0; - - for (TargetRegisterClass::iterator trc2Itr = trc2->begin(), - trc2End = trc2->end(); - trc2Itr != trc2End; ++trc2Itr) { - unsigned trc2Reg = *trc2Itr; - if (tri->regsOverlap(trc1Reg, trc2Reg)) - ++trc1RegWorst; - } - if (trc1RegWorst > worst) { - worst = trc1RegWorst; - } - } - - if (worst != 0) { - classLine[trc2] = worst; - } - } - } - } - - unsigned TargetRegisterExtraInfo::getWorst( - unsigned reg, - const TargetRegisterClass *trc) const { - const WorstMapLine *wml = 0; - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - PRWorstMap::const_iterator prwItr = prWorst.find(reg); - assert(prwItr != prWorst.end() && "Missing prWorst entry."); - wml = &prwItr->second; - } else { - const TargetRegisterClass *regTRC = mri->getRegClass(reg); - VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC); - assert(vrwItr != vrWorst.end() && "Missing vrWorst entry."); - wml = &vrwItr->second; - } - - WorstMapLine::const_iterator wmlItr = wml->find(trc); - if (wmlItr == wml->end()) - return 0; - - return wmlItr->second; - } - - void TargetRegisterExtraInfo::initCapacity() { - assert(!mapsPopulated && capacityMap.empty() && - "Capacity map already initialised?"); - - for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - unsigned capacity = trc->getRawAllocationOrder(*mf).size(); - - if (capacity != 0) - capacityMap[trc] = capacity; - } - } - - unsigned TargetRegisterExtraInfo::getCapacity( - const TargetRegisterClass *trc) const { - CapacityMap::const_iterator cmItr = capacityMap.find(trc); - assert(cmItr != capacityMap.end() && - "vreg with unallocable register class"); - return cmItr->second; - } - - void TargetRegisterExtraInfo::resetPressureAndLiveStates() { - pressureMap.clear(); - //liveStatesMap.clear(); - - // Iterate over all slots. - - - // Iterate over all live intervals. - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) - continue; - - // For all ranges in the current interal. - for (LiveInterval::iterator lrItr = li->begin(), - lrEnd = li->end(); - lrItr != lrEnd; ++lrItr) { - LiveRange *lr = &*lrItr; - - // For all slots in the current range. - for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) { - - // Record increased pressure at index for all overlapping classes. - for (TargetRegisterInfo::regclass_iterator - rcItr = tri->regclass_begin(), - rcEnd = tri->regclass_end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - - if (trc->getRawAllocationOrder(*mf).empty()) - continue; - - unsigned worstAtI = getWorst(li->reg, trc); - - if (worstAtI != 0) { - pressureMap[i][trc] += worstAtI; - } - } - } - } - } - } - - unsigned TargetRegisterExtraInfo::getPressureAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - PressureMap::const_iterator pmItr = pressureMap.find(i); - if (pmItr == pressureMap.end()) - return 0; - const PressureMapLine &pmLine = pmItr->second; - PressureMapLine::const_iterator pmlItr = pmLine.find(trc); - if (pmlItr == pmLine.end()) - return 0; - return pmlItr->second; - } - - bool TargetRegisterExtraInfo::classOverCapacityAtSlot( - const TargetRegisterClass *trc, - SlotIndex i) const { - return (getPressureAtSlot(trc, i) > getCapacity(trc)); - } - - // ---------- MachineFunctionRenderer implementation ---------- - - void RenderMachineFunction::Spacer::print(raw_ostream &os) const { - if (!prettyHTML) - return; - for (unsigned i = 0; i < ns; ++i) { - os << " "; - } - } - - RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const { - return Spacer(ns); - } - - raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) { - s.print(os); - return os; - } - - template - std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const { - std::string r; - - for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) { - char c = *sItr; - - switch (c) { - case '<': r.append("<"); break; - case '>': r.append(">"); break; - case '&': r.append("&"); break; - case ' ': r.append(" "); break; - case '\"': r.append("""); break; - default: r.push_back(c); break; - } - } - - return r; - } - - RenderMachineFunction::LiveState - RenderMachineFunction::getLiveStateAt(const LiveInterval *li, - SlotIndex i) const { - const MachineInstr *mi = sis->getInstructionFromIndex(i); - - // For uses/defs recorded use/def indexes override current liveness and - // instruction operands (Only for the interval which records the indexes). - // FIXME: This is all wrong, uses and defs share the same slots. - if (i.isEarlyClobber() || i.isRegister()) { - UseDefs::const_iterator udItr = useDefs.find(li); - if (udItr != useDefs.end()) { - const SlotSet &slotSet = udItr->second; - if (slotSet.count(i)) { - if (i.isEarlyClobber()) { - return Used; - } - // else - return Defined; - } - } - } - - // If the slot is a load/store, or there's no info in the use/def set then - // use liveness and instruction operand info. - if (li->liveAt(i)) { - - if (mi == 0) { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } else { - if (i.isRegister() && mi->definesRegister(li->reg, tri)) { - return Defined; - } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { - return Used; - } else { - if (vrm == 0 || - (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) { - return AliveReg; - } else { - return AliveStack; - } - } - } - } - return Dead; - } - - RenderMachineFunction::PressureState - RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const { - if (trei.getPressureAtSlot(trc, i) == 0) { - return Zero; - } else if (trei.classOverCapacityAtSlot(trc, i)){ - return High; - } - return Low; - } - - /// \brief Render a machine instruction. - void RenderMachineFunction::renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const { - std::string s; - raw_string_ostream oss(s); - oss << *mi; - - os << escapeChars(oss.str()); - } - - template - void RenderMachineFunction::renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const { - if (ro.fancyVerticals()) { - os << indent << "\n" - << indent + s(6) << "" << t << "\n" - << indent + s(4) << "\">\n" - << indent << "\n"; - } else { - std::ostringstream oss; - oss << t; - std::string tStr(oss.str()); - - os << indent; - for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end(); - tStrItr != tStrEnd; ++tStrItr) { - os << *tStrItr << "
"; - } - os << "\n"; - } - } - - void RenderMachineFunction::insertCSS(const Spacer &indent, - raw_ostream &os) const { - os << indent << "\n"; - } - - void RenderMachineFunction::renderFunctionSummary( - const Spacer &indent, raw_ostream &os, - const char * const renderContextStr) const { - os << indent << "

Function: " << mf->getFunction()->getName() - << "

\n" - << indent << "

Rendering context: " << renderContextStr << "

\n"; - } - - - void RenderMachineFunction::renderPressureTableLegend( - const Spacer &indent, - raw_ostream &os) const { - os << indent << "

Rendering Pressure Legend:

\n" - << indent << "\n" - << indent + s(2) << "\n" - << indent + s(4) << "" - "\n" - << indent + s(2) << "\n" - << indent + s(2) << "\n" - << indent + s(4) << "" - "" - "\n" - << indent + s(2) << "\n" - << indent + s(2) << "\n" - << indent + s(4) << "" - "" - "\n" - << indent + s(2) << "\n" - << indent + s(2) << "\n" - << indent + s(4) << "" - "" - "\n" - << indent + s(2) << "\n" - << indent << "
PressureDescriptionAppearance
No PressureNo physical registers of this class requested.  
Low PressureSufficient physical registers to meet demand.  
High PressurePotentially insufficient physical registers to meet demand.  
\n"; - } - - template - void RenderMachineFunction::renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair &rleAccumulator, - const std::map &cellTypeStrs) const { - - if (rleAccumulator.second == 0) - return; - - typename std::map::const_iterator ctsItr = - cellTypeStrs.find(rleAccumulator.first); - - assert(ctsItr != cellTypeStrs.end() && "No string for given cell type."); - - os << indent + s(4) << "second << "\""; - if (rleAccumulator.second > 1) - os << " colspan=" << rleAccumulator.second; - os << ">\n"; - } - - - void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const { - - std::map lsStrs; - lsStrs[Dead] = "l-n"; - lsStrs[Defined] = "l-d"; - lsStrs[Used] = "l-u"; - lsStrs[AliveReg] = "l-r"; - lsStrs[AliveStack] = "l-s"; - - std::map psStrs; - psStrs[Zero] = "p-z"; - psStrs[Low] = "p-l"; - psStrs[High] = "p-h"; - - // Open the table... - - os << indent << "\n" - << indent + s(2) << "\n"; - - // Render the header row... - - os << indent + s(4) << "\n" - << indent + s(4) << "\n"; - - // Render class names if necessary... - if (!ro.regClasses().empty()) { - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - os << indent + s(4) << "\n"; - } - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "\n"; - - // Render interval numbers if necessary... - if (!ro.intervals().empty()) { - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = *liItr; - os << indent + s(4) << "\n"; - } - } - - os << indent + s(2) << "\n"; - - // End header row, start with the data rows... - - MachineInstr *mi = 0; - - // Data rows: - for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex(); - i = i.getNextSlot()) { - - // Render the slot column. - os << indent + s(2) << "\n"; - - // Render the code column. - if (i.isBlock()) { - MachineBasicBlock *mbb = sis->getMBBFromIndex(i); - mi = sis->getInstructionFromIndex(i); - - if (i == sis->getMBBStartIdx(mbb) || mi != 0 || - ro.renderEmptyIndexes()) { - os << indent + s(4) << "\n" - << indent + s(4) << "\n"; - } else { - i = i.getDeadSlot(); // <- Will be incremented to the next index. - continue; - } - } - - // Render the class columns. - if (!ro.regClasses().empty()) { - std::pair psRLEAccumulator(Zero, 0); - for (MFRenderingOptions::RegClassSet::const_iterator - rcItr = ro.regClasses().begin(), - rcEnd = ro.regClasses().end(); - rcItr != rcEnd; ++rcItr) { - const TargetRegisterClass *trc = *rcItr; - PressureState newPressure = getPressureStateAt(trc, i); - - if (newPressure == psRLEAccumulator.first) { - ++psRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - psRLEAccumulator.first = newPressure; - psRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs); - } - - // FIXME: Is there a nicer way to insert space between columns in HTML? - if (!ro.regClasses().empty() && !ro.intervals().empty()) - os << indent + s(4) << "\n"; - - if (!ro.intervals().empty()) { - std::pair lsRLEAccumulator(Dead, 0); - for (MFRenderingOptions::IntervalSet::const_iterator - liItr = ro.intervals().begin(), - liEnd = ro.intervals().end(); - liItr != liEnd; ++liItr) { - const LiveInterval *li = *liItr; - LiveState newLiveness = getLiveStateAt(li, i); - - if (newLiveness == lsRLEAccumulator.first) { - ++lsRLEAccumulator.second; - } else { - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - lsRLEAccumulator.first = newLiveness; - lsRLEAccumulator.second = 1; - } - } - renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs); - } - os << indent + s(2) << "\n"; - } - - os << indent << "
indexinstr\n"; - renderVertical(indent + s(6), os, trc->getName()); - os << indent + s(4) << "  \n"; - renderVertical(indent + s(6), os, li->reg); - os << indent + s(4) << "
" << i << " \n"; - - if (i == sis->getMBBStartIdx(mbb)) { - os << indent + s(6) << "BB#" << mbb->getNumber() << ": \n"; - } else if (mi != 0) { - os << indent + s(6) << "  "; - renderMachineInstr(os, mi); - } else { - // Empty interval - leave blank. - } - os << indent + s(4) << "
\n"; - - if (!ro.regClasses().empty()) - renderPressureTableLegend(indent, os); - } - - void RenderMachineFunction::renderFunctionPage( - raw_ostream &os, - const char * const renderContextStr) const { - os << "\n" - << s(2) << "\n" - << s(4) << "" << fqn << "\n"; - - insertCSS(s(4), os); - - os << s(2) << "\n" - << s(2) << "\n"; - - renderFunctionSummary(s(4), os, renderContextStr); - - os << s(4) << "


\n"; - - //renderLiveIntervalInfoTable(" ", os); - - os << s(4) << "


\n"; - - renderCodeTablePlusPI(s(4), os); - - os << s(2) << "\n" - << "\n"; - } - - void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired(); - au.addRequired(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tri = mf->getTarget().getRegisterInfo(); - lis = &getAnalysis(); - sis = &getAnalysis(); - - trei.setup(mf, mri, tri, lis); - ro.setup(mf, tri, lis, this); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - return false; - } - - void RenderMachineFunction::releaseMemory() { - trei.clear(); - ro.clear(); - spillIntervals.clear(); - spillFor.clear(); - useDefs.clear(); - } - - void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - const MachineInstr *mi = &*rItr; - if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); - } - if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); - } - } - } - - void RenderMachineFunction::rememberSpills( - const LiveInterval *li, - const std::vector &spills) { - - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - for (std::vector::const_iterator siItr = spills.begin(), - siEnd = spills.end(); - siItr != siEnd; ++siItr) { - const LiveInterval *spill = *siItr; - spillIntervals[li].insert(spill); - spillFor[spill] = li; - } - } - - bool RenderMachineFunction::isSpill(const LiveInterval *li) const { - SpillForMap::const_iterator sfItr = spillFor.find(li); - if (sfItr == spillFor.end()) - return false; - return true; - } - - void RenderMachineFunction::renderMachineFunction( - const char *renderContextStr, - const VirtRegMap *vrm, - const char *renderSuffix) { - if (!ro.shouldRenderCurrentMachineFunction()) - return; - - this->vrm = vrm; - trei.reset(); - - std::string rpFileName(mf->getFunction()->getName().str() + - (renderSuffix ? renderSuffix : "") + - outputFileSuffix); - - std::string errMsg; - raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary); - - renderFunctionPage(outFile, renderContextStr); - - ro.resetRenderSpecificOptions(); - } - - std::string RenderMachineFunction::escapeChars(const std::string &s) const { - return escapeChars(s.begin(), s.end()); - } - -} diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h deleted file mode 100644 index 8571992..0000000 --- a/lib/CodeGen/RenderMachineFunction.h +++ /dev/null @@ -1,338 +0,0 @@ -//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H -#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H - -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Target/TargetRegisterInfo.h" - -#include -#include -#include -#include - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - class MachineInstr; - class MachineRegisterInfo; - class RenderMachineFunction; - class TargetRegisterClass; - class TargetRegisterInfo; - class VirtRegMap; - class raw_ostream; - - /// \brief Helper class to process rendering options. Tries to be as lazy as - /// possible. - class MFRenderingOptions { - public: - - struct RegClassComp { - bool operator()(const TargetRegisterClass *trc1, - const TargetRegisterClass *trc2) const { - std::string trc1Name(trc1->getName()), trc2Name(trc2->getName()); - return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(), - trc2Name.begin(), trc2Name.end()); - } - }; - - typedef std::set RegClassSet; - - struct IntervalComp { - bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { - return li1->reg < li2->reg; - } - }; - - typedef std::set IntervalSet; - - /// Initialise the rendering options. - void setup(MachineFunction *mf, const TargetRegisterInfo *tri, - LiveIntervals *lis, const RenderMachineFunction *rmf); - - /// Clear translations of options to the current function. - void clear(); - - /// Reset any options computed for this specific rendering. - void resetRenderSpecificOptions(); - - /// Should we render the current function. - bool shouldRenderCurrentMachineFunction() const; - - /// Return the set of register classes to render pressure for. - const RegClassSet& regClasses() const; - - /// Return the set of live intervals to render liveness for. - const IntervalSet& intervals() const; - - /// Render indexes which are not associated with instructions / MBB starts. - bool renderEmptyIndexes() const; - - /// Return whether or not to render using SVG for fancy vertical text. - bool fancyVerticals() const; - - private: - - static bool renderingOptionsProcessed; - static std::set mfNamesToRender; - static bool renderAllMFs; - - static std::set classNamesToRender; - static bool renderAllClasses; - - - static std::set > intervalNumsToRender; - typedef enum { ExplicitOnly = 0, - AllPhys = 1, - VirtNoSpills = 2, - VirtSpills = 4, - AllVirt = 6, - All = 7 } - IntervalTypesToRender; - static unsigned intervalTypesToRender; - - template - static void splitComaSeperatedList(const std::string &s, OutputItr outItr); - - static void processOptions(); - - static void processFuncNames(); - static void processRegClassNames(); - static void processIntervalNumbers(); - - static void processIntervalRange(const std::string &intervalRangeStr); - - MachineFunction *mf; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - const RenderMachineFunction *rmf; - - mutable bool regClassesTranslatedToCurrentFunction; - mutable RegClassSet regClassSet; - - mutable bool intervalsTranslatedToCurrentFunction; - mutable IntervalSet intervalSet; - - void translateRegClassNamesToCurrentFunction() const; - - void translateIntervalNumbersToCurrentFunction() const; - }; - - /// \brief Provide extra information about the physical and virtual registers - /// in the function being compiled. - class TargetRegisterExtraInfo { - public: - TargetRegisterExtraInfo(); - - /// \brief Set up TargetRegisterExtraInfo with pointers to necessary - /// sources of information. - void setup(MachineFunction *mf, MachineRegisterInfo *mri, - const TargetRegisterInfo *tri, LiveIntervals *lis); - - /// \brief Recompute tables for changed function. - void reset(); - - /// \brief Free all tables in TargetRegisterExtraInfo. - void clear(); - - /// \brief Maximum number of registers from trc which alias reg. - unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const; - - /// \brief Returns the number of allocable registers in trc. - unsigned getCapacity(const TargetRegisterClass *trc) const; - - /// \brief Return the number of registers of class trc that may be - /// needed at slot i. - unsigned getPressureAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - /// \brief Return true if the number of registers of type trc that may be - /// needed at slot i is greater than the capacity of trc. - bool classOverCapacityAtSlot(const TargetRegisterClass *trc, - SlotIndex i) const; - - private: - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - - typedef std::map WorstMapLine; - typedef std::map VRWorstMap; - VRWorstMap vrWorst; - - typedef std::map PRWorstMap; - PRWorstMap prWorst; - - typedef std::map CapacityMap; - CapacityMap capacityMap; - - typedef std::map PressureMapLine; - typedef std::map PressureMap; - PressureMap pressureMap; - - bool mapsPopulated; - - /// \brief Initialise the 'worst' table. - void initWorst(); - - /// \brief Initialise the 'capacity' table. - void initCapacity(); - - /// \brief Initialise/Reset the 'pressure' and live states tables. - void resetPressureAndLiveStates(); - }; - - /// \brief Render MachineFunction objects and related information to a HTML - /// page. - class RenderMachineFunction : public MachineFunctionPass { - public: - static char ID; - - RenderMachineFunction() : MachineFunctionPass(ID) { - initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - void rememberUseDefs(const LiveInterval *li); - - void rememberSpills(const LiveInterval *li, - const std::vector &spills); - - bool isSpill(const LiveInterval *li) const; - - /// \brief Render this machine function to HTML. - /// - /// @param renderContextStr This parameter will be included in the top of - /// the html file to explain where (in the - /// codegen pipeline) this function was rendered - /// from. Set it to something like - /// "Pre-register-allocation". - /// @param vrm If non-null the VRM will be queried to determine - /// whether a virtual register was allocated to a - /// physical register or spilled. - /// @param renderFilePrefix This string will be appended to the function - /// name (before the output file suffix) to enable - /// multiple renderings from the same function. - void renderMachineFunction(const char *renderContextStr, - const VirtRegMap *vrm = 0, - const char *renderSuffix = 0); - - private: - class Spacer; - friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s); - - std::string fqn; - - MachineFunction *mf; - MachineRegisterInfo *mri; - const TargetRegisterInfo *tri; - LiveIntervals *lis; - SlotIndexes *sis; - const VirtRegMap *vrm; - - TargetRegisterExtraInfo trei; - MFRenderingOptions ro; - - - - // Utilities. - typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState; - LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const; - - typedef enum { Zero, Low, High } PressureState; - PressureState getPressureStateAt(const TargetRegisterClass *trc, - SlotIndex i) const; - - typedef std::map > - SpillIntervals; - SpillIntervals spillIntervals; - - typedef std::map SpillForMap; - SpillForMap spillFor; - - typedef std::set SlotSet; - typedef std::map UseDefs; - UseDefs useDefs; - - // ---------- Rendering methods ---------- - - /// For inserting spaces when pretty printing. - class Spacer { - public: - explicit Spacer(unsigned numSpaces) : ns(numSpaces) {} - Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); } - void print(raw_ostream &os) const; - private: - unsigned ns; - }; - - Spacer s(unsigned ns) const; - - template - std::string escapeChars(Iterator sBegin, Iterator sEnd) const; - - /// \brief Render a machine instruction. - void renderMachineInstr(raw_ostream &os, - const MachineInstr *mi) const; - - /// \brief Render vertical text. - template - void renderVertical(const Spacer &indent, - raw_ostream &os, - const T &t) const; - - /// \brief Insert CSS layout info. - void insertCSS(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a brief summary of the function (including rendering - /// context). - void renderFunctionSummary(const Spacer &indent, - raw_ostream &os, - const char * const renderContextStr) const; - - /// \brief Render a legend for the pressure table. - void renderPressureTableLegend(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render a consecutive set of HTML cells of the same class using - /// the colspan attribute for run-length encoding. - template - void renderCellsWithRLE( - const Spacer &indent, raw_ostream &os, - const std::pair &rleAccumulator, - const std::map &cellTypeStrs) const; - - /// \brief Render code listing, potentially with register pressure - /// and live intervals shown alongside. - void renderCodeTablePlusPI(const Spacer &indent, - raw_ostream &os) const; - - /// \brief Render the HTML page representing the MachineFunction. - void renderFunctionPage(raw_ostream &os, - const char * const renderContextStr) const; - - std::string escapeChars(const std::string &s) const; - }; -} - -#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */ diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 8fd6426..752f8e4 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -64,10 +64,27 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector::const_iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) - if (*I == D) + for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + if (I->overlaps(D)) { + // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). + if (I->getLatency() < D.getLatency()) { + SUnit *PredSU = I->getSUnit(); + // Find the corresponding successor in N. + SDep ForwardD = *I; + ForwardD.setSUnit(this); + for (SmallVector::iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + if (*II == ForwardD) { + II->setLatency(D.getLatency()); + break; + } + } + I->setLatency(D.getLatency()); + } return false; + } + } // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index d46eb89..9c1dba3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -21,17 +21,24 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; +static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Enable use of AA during MI GAD construction")); + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, @@ -40,7 +47,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), - LoopRegs(MLI, MDT), FirstDbgValue(0) { + LoopRegs(MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -126,7 +133,8 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { + BB = bb; LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) @@ -134,7 +142,8 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { } void ScheduleDAGInstrs::finishBlock() { - // Nothing to do. + // Subclasses should no longer refer to the old block. + BB = 0; } /// Initialize the map with the number of registers. @@ -159,7 +168,7 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned endcount) { - BB = bb; + assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; EndIndex = endcount; @@ -232,7 +241,8 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); unsigned DataLatency = SU->Latency; - for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; std::vector &UseList = Uses[*Alias]; @@ -261,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // Adjust the dependence latency using operand def/use // information (if any), and then allow the target to // perform its own adjustments. - const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); + SDep dep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - computeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); + unsigned Latency = computeOperandLatency(SU, UseSU, dep); + dep.setLatency(Latency); + + ST.adjustSchedDependency(SU, UseSU, dep); } UseSU->addPred(dep); } @@ -285,7 +297,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // TODO: Using a latency of 1 here for output dependencies assumes // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; - for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; std::vector &DefList = Defs[*Alias]; @@ -398,9 +411,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - // SSA defs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) return; // Add output dependence to the next nearest def of this vreg. @@ -410,7 +424,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI == VRegDefs.end()) VRegDefs.insert(VReg2SUnit(Reg, SU)); else { @@ -436,10 +450,11 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(); - LiveInterval *LI = &LIS->getInterval(Reg); - VNInfo *VNI = LI->getVNInfoBefore(UseIdx); + LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + VNInfo *VNI = LRQ.valueIn(); + // VNI will be valid because MachineOperand::readsReg() is checked by caller. + assert(VNI && "No value to read by operand"); MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); // Phis and other noninstructions (after coalescing) have a NULL Def. if (Def) { @@ -449,11 +464,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Create a data dependence. // // TODO: Handle "special" address latencies cleanly. - const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg); if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - computeOperandLatency(DefSU, SU, const_cast(dep)); + unsigned Latency = computeOperandLatency(DefSU, SU, const_cast(dep)); + dep.setLatency(Latency); + const TargetSubtargetInfo &ST = TM.getSubtarget(); ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); } @@ -462,11 +479,217 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { } // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = findVRegDef(Reg); + VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); if (DefI != VRegDefs.end() && DefI->SU != SU) DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg)); } +/// Return true if MI is an instruction we are unable to reason about +/// (like a call or something with unmodeled side effects). +static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { + if (MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasVolatileMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) + return true; + return false; +} + +// This MI might have either incomplete info, or known to be unsafe +// to deal with (i.e. volatile object). +static inline bool isUnsafeMemoryObject(MachineInstr *MI, + const MachineFrameInfo *MFI) { + if (!MI || MI->memoperands_empty()) + return true; + // We purposefully do no check for hasOneMemOperand() here + // in hope to trigger an assert downstream in order to + // finish implementation. + if ((*MI->memoperands_begin())->isVolatile() || + MI->hasUnmodeledSideEffects()) + return true; + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return true; + + V = getUnderlyingObject(V); + if (const PseudoSourceValue *PSV = dyn_cast(V)) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return true; + } + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) + return true; + + return false; +} + +/// This returns true if the two MIs need a chain edge betwee them. +/// If these are not even memory operations, we still may need +/// chain deps between them. The question really is - could +/// these two MIs be reordered during scheduling from memory dependency +/// point of view. +static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, + MachineInstr *MIa, + MachineInstr *MIb) { + // Cover a trivial case - no edge is need to itself. + if (MIa == MIb) + return false; + + if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) + return true; + + // If we are dealing with two "normal" loads, we do not need an edge + // between them - they could be reordered. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + + // To this point analysis is generic. From here on we do need AA. + if (!AA) + return true; + + MachineMemOperand *MMOa = *MIa->memoperands_begin(); + MachineMemOperand *MMOb = *MIb->memoperands_begin(); + + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + llvm_unreachable("Multiple memory operands."); + + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // FIXME: Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); + int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; + int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + + AliasAnalysis::AliasResult AAResult = AA->alias( + AliasAnalysis::Location(MMOa->getValue(), Overlapa, + MMOa->getTBAAInfo()), + AliasAnalysis::Location(MMOb->getValue(), Overlapb, + MMOb->getTBAAInfo())); + + return (AAResult != AliasAnalysis::NoAlias); +} + +/// This recursive function iterates over chain deps of SUb looking for +/// "latest" node that needs a chain edge to SUa. +static unsigned +iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, + SmallPtrSet &Visited) { + if (!SUa || !SUb || SUb == ExitSU) + return *Depth; + + // Remember visited nodes. + if (!Visited.insert(SUb)) + return *Depth; + // If there is _some_ dependency already in place, do not + // descend any further. + // TODO: Need to make sure that if that dependency got eliminated or ignored + // for any reason in the future, we would not violate DAG topology. + // Currently it does not happen, but makes an implicit assumption about + // future implementation. + // + // Independently, if we encounter node that is some sort of global + // object (like a call) we already have full set of dependencies to it + // and we can stop descending. + if (SUa->isSucc(SUb) || + isGlobalMemoryObject(AA, SUb->getInstr())) + return *Depth; + + // If we do need an edge, or we have exceeded depth budget, + // add that edge to the predecessors chain of SUb, + // and stop descending. + if (*Depth > 200 || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + SUb->addPred(SDep(SUa, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + return *Depth; + } + // Track current depth. + (*Depth)++; + // Iterate over chain dependencies only. + for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); + return *Depth; +} + +/// This function assumes that "downward" from SU there exist +/// tail/leaf of already constructed DAG. It iterates downward and +/// checks whether SU can be aliasing any node dominated +/// by it. +static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SU, SUnit *ExitSU, std::set &CheckList, + unsigned LatencyToLoad) { + if (!SU) + return; + + SmallPtrSet Visited; + unsigned Depth = 0; + + for (std::set::iterator I = CheckList.begin(), IE = CheckList.end(); + I != IE; ++I) { + if (SU == *I) + continue; + if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { + unsigned Latency = ((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0; + (*I)->addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, + /*isNormalMemory=*/true)); + } + // Now go through all the chain successors and iterate from them. + // Keep track of visited nodes. + for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), + JE = (*I)->Succs.end(); J != JE; ++J) + if (J->isCtrl()) + iterateChainSucc (AA, MFI, SU, J->getSUnit(), + ExitSU, &Depth, Visited); + } +} + +/// Check whether two objects need a chain edge, if so, add it +/// otherwise remember the rejected SU. +static inline +void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, + SUnit *SUa, SUnit *SUb, + std::set &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { + // If this is a false dependency, + // do not add the edge, but rememeber the rejected node. + if (!EnableAASchedMI || + MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) + SUb->addPred(SDep(SUa, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, + isNormalMemory)); + else { + // Duplicate entries should be ignored. + RejectList.insert(SUb); + DEBUG(dbgs() << "\tReject chain dep between SU(" + << SUa->NodeNum << ") and SU(" + << SUb->NodeNum << ")\n"); + } +} + /// Create an SUnit for each real instruction, numbered in top-down toplological /// order. The instruction order A < B, implies that no edge exists from B to A. /// @@ -502,7 +725,11 @@ void ScheduleDAGInstrs::initSUnits() { } } -void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { +/// If RegPressure is non null, compute register pressure as a side effect. The +/// DAG builder is an efficient place to do it because it already visits +/// operands. +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, + RegPressureTracker *RPTracker) { // Create an SUnit for each real instruction. initSUnits(); @@ -518,6 +745,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // that are known not to alias std::map AliasMemDefs, NonAliasMemDefs; std::map > AliasMemUses, NonAliasMemUses; + std::set RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. @@ -553,6 +781,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { PrevMI = MI; continue; } + if (RPTracker) { + RPTracker->recede(); + assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); + } assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && "Cannot schedule terminators or labels!"); @@ -587,11 +819,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // after stack slots are lowered to actual addresses. // TODO: Use an AliasAnalysis and do real alias-analysis queries, and // produce more precise dependence information. -#define STORE_LOAD_LATENCY 1 - unsigned TrueMemOrderLatency = 0; - if (MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasVolatileMemoryRef() && - (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { + unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; + if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map::iterator I = @@ -603,36 +832,48 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); } - NonAliasMemDefs.clear(); - NonAliasMemUses.clear(); // Add SU to the barrier chain. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); BarrierChain = SU; + // This is a barrier event that acts as a pivotal node in the DAG, + // so it is safe to clear list of exposed nodes. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + RejectMemNodes.clear(); + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); // fall-through new_alias_chain: // Chain all possibly aliasing memory references though SU. - if (AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (AliasChain) { + unsigned ChainLatency = 0; + if (AliasChain->getInstr()->mayLoad()) + ChainLatency = TrueMemOrderLatency; + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + ChainLatency); + } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); for (std::map::iterator I = AliasMemDefs.begin(), - E = AliasMemDefs.end(); I != E; ++I) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - } + E = AliasMemDefs.end(); I != E; ++I) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); for (std::map >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + TrueMemOrderLatency); } + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { bool MayAlias = true; - TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is @@ -642,8 +883,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { std::map::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (MayAlias) @@ -658,20 +899,28 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, - /*Reg=*/0, /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + TrueMemOrderLatency, true); J->second.clear(); } if (MayAlias) { // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + // But we also should check dependent instructions for the + // SU in question. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); } // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } else { @@ -688,7 +937,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { /*isArtificial=*/true)); } else if (MI->mayLoad()) { bool MayAlias = true; - TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { @@ -700,8 +948,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { std::map::iterator IE = ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); if (MayAlias) AliasMemUses[V].push_back(SU); else @@ -711,15 +958,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // potentially aliasing stores. for (std::map::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; } - + if (MayAlias) + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } @@ -735,8 +983,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { } void ScheduleDAGInstrs::computeLatency(SUnit *SU) { - // Compute the latency for the node. - if (!InstrItins || InstrItins->isEmpty()) { + // Compute the latency for the node. We only provide a default for missing + // itineraries. Empty itineraries still have latency properties. + if (!InstrItins) { SU->Latency = 1; // Simplistic target-independent heuristic: assume that loads take @@ -748,63 +997,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { - if (!InstrItins || InstrItins->isEmpty()) - return; - +unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, + const SDep& dep, + bool FindMin) const { // For a data dependency with a known register... if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) - return; - - const unsigned Reg = dep.getReg(); - - // ... find the definition of the register in the defining - // instruction - MachineInstr *DefMI = Def->getInstr(); - int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); - if (DefIdx != -1) { - const MachineOperand &MO = DefMI->getOperand(DefIdx); - if (MO.isReg() && MO.isImplicit() && - DefIdx >= (int)DefMI->getDesc().getNumOperands()) { - // This is an implicit def, getOperandLatency() won't return the correct - // latency. e.g. - // %D6, %D7 = VLD1q16 %R2, 0, ..., %Q3 - // %Q1 = VMULv8i16 %Q1, %Q3, ... - // What we want is to compute latency between def of %D6/%D7 and use of - // %Q3 instead. - unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); - if (DefMI->getOperand(Op2).isReg()) - DefIdx = Op2; - } - MachineInstr *UseMI = Use->getInstr(); - // For all uses of the register, calculate the maxmimum latency - int Latency = -1; - if (UseMI) { - for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = UseMI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (MOReg != Reg) - continue; - - int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, - UseMI, i); - Latency = std::max(Latency, UseCycle); - } - } else { - // UseMI is null, then it must be a scheduling barrier. - if (!InstrItins || InstrItins->isEmpty()) - return; - unsigned DefClass = DefMI->getDesc().getSchedClass(); - Latency = InstrItins->getOperandCycle(DefClass, DefIdx); - } + return 1; - // If we found a latency, then replace the existing dependence latency. - if (Latency >= 0) - dep.setLatency(Latency); - } + return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(), + Use->getInstr(), dep.getReg(), FindMin); } void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 3d22035..e675366 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -39,13 +39,11 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, DebugType = ParentDebugType; #endif - // Determine the maximum depth of any itinerary. This determines the - // depth of the scoreboard. We always make the scoreboard at least 1 - // cycle deep to avoid dealing with the boundary condition. + // Determine the maximum depth of any itinerary. This determines the depth of + // the scoreboard. We always make the scoreboard at least 1 cycle deep to + // avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; if (ItinData && !ItinData->isEmpty()) { - IssueWidth = ItinData->IssueWidth; - for (unsigned idx = 0; ; ++idx) { if (ItinData->isEndMarker(idx)) break; @@ -63,16 +61,26 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, // Find the next power-of-2 >= ItinDepth while (ItinDepth > ScoreboardDepth) { ScoreboardDepth *= 2; + // Don't set MaxLookAhead until we find at least one nonzero stage. + // This way, an itinerary with no stages has MaxLookAhead==0, which + // completely bypasses the scoreboard hazard logic. + MaxLookAhead = ScoreboardDepth; } } - MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); RequiredScoreboard.reset(ScoreboardDepth); - DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " - << ScoreboardDepth << '\n'); + // If MaxLookAhead is not set above, then we are not enabled. + if (!isEnabled()) + DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); + else { + // A nonempty itinerary must have a SchedModel. + IssueWidth = ItinData->SchedModel->IssueWidth; + DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " + << ScoreboardDepth << '\n'); + } } void ScoreboardHazardRecognizer::Reset() { @@ -151,7 +159,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } if (!freeUnits) { - DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); DEBUG(DAG->dumpNode(SU)); return Hazard; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index a6bdc3b..75e8167 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -23,3 +23,5 @@ add_llvm_library(LLVMSelectionDAG TargetLowering.cpp TargetSelectionDAGInfo.cpp ) + +add_dependencies(LLVMSelectionDAG intrinsics_gen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0914c66..4e29879 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -215,6 +215,7 @@ namespace { SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); + SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); @@ -227,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -328,15 +332,12 @@ namespace { class WorkListRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: - explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} + explicit WorkListRemover(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} virtual void NodeDeleted(SDNode *N, SDNode *E) { DC.removeFromWorkList(N); } - - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } }; } @@ -619,8 +620,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesWith(N, To, &DeadNodes); - + DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { @@ -650,7 +650,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorkList(TLO.New.getNode()); @@ -707,9 +707,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { Trunc.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); removeFromWorkList(Load); DAG.DeleteNode(Load); AddToWorkList(Trunc.getNode()); @@ -961,8 +960,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { Result.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); removeFromWorkList(N); DAG.DeleteNode(N); AddToWorkList(Result.getNode()); @@ -1047,12 +1046,12 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DAG.TransferDbgValues(SDValue(N, 0), RV); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) - DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); + DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && N->getNumValues() == 1 && "Type mismatch"); SDValue OpV = RV; - DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); + DAG.ReplaceAllUsesWith(N, &OpV); } // Push the new node and any users onto the worklist @@ -1131,6 +1130,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); + case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); @@ -1143,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -1325,10 +1328,12 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorkList(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); @@ -1640,7 +1645,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, - N1.getOperand(0)); + N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C if (N0.getOpcode() == ISD::ADD && @@ -2341,7 +2346,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) - && Level == AfterLegalizeVectorOps) { + && Level == AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2528,7 +2533,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); // Replace uses of the EXTLOAD with the new ZEXTLOAD. - CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } } // Fold the AND away, taking care not to fold to the old load node if we @@ -2710,6 +2722,34 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDValue NewAdd = + DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + N0.getOperand(0), DAG.getConstant(ADDC, VT)); + CombineTo(N0.getNode(), NewAdd); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + } + } + + return SDValue(); } @@ -4526,8 +4566,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); } return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType().getScalarType()); @@ -5012,6 +5054,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast(N0); EVT PtrType = N0.getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + // It's not possible to generate a constant of extended or untyped type. + return SDValue(); + // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { @@ -5041,8 +5087,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; @@ -5225,7 +5270,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); - + EVT IndexTy = N0->getOperand(1).getValueType(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), @@ -5233,7 +5278,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), TrTy, V, - DAG.getConstant(Index, MVT::i32)); + DAG.getConstant(Index, IndexTy)); } } @@ -5607,7 +5652,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } - // fold (fadd c1, c2) -> (fadd c1, c2) + // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); // canonicalize constant to RHS @@ -5636,6 +5681,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // FADD -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma x, y, z) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + } + return SDValue(); } @@ -5645,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5665,17 +5731,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold + // (fsub x, x) -> 0.0 & // (fsub x, (fadd x, y)) -> (fneg y) & // (fsub x, (fadd y, x)) -> (fneg y) if (DAG.getTarget().Options.UnsafeFPMath) { + if (N0 == N1) + return DAG.getConstantFP(0.0f, VT); + if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); @@ -5689,6 +5759,40 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // FSUB -> FMA combines: + if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || + DAG.getTarget().Options.UnsafeFPMath) && + DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + return SDValue(); } @@ -5720,6 +5824,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) return N1; + // fold (fmul A, 1.0) -> A + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); @@ -5753,6 +5860,26 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + ConstantFPSDNode *N1CFP = dyn_cast(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + + // Canonicalize (fma c, x, y) -> (fma x, c, y) + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + + return SDValue(); +} + SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5893,6 +6020,38 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -5918,6 +6077,25 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); } + // The next optimizations are desireable only if SELECT_CC can be lowered. + // Check against MVT::Other for SELECT_CC, which is a workaround for targets + // having to say they don't support SELECT_CC on every type the DAG knows + // about, since there is no way to mark an opcode illegal at all value types + // (See also visitSELECT) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + } + } + return SDValue(); } @@ -6071,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); @@ -6185,7 +6399,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } // Replace the uses of SRL with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6214,7 +6428,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Tmp.getNode()->dump(&DAG); dbgs() << '\n'); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), @@ -6240,7 +6454,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), @@ -6431,21 +6645,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); DAG.DeleteNode(Ptr.getNode()); @@ -6559,13 +6769,10 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { dbgs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } // Finally, since the node is now dead, remove it from the graph. @@ -6573,8 +6780,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0), - &DeadNodes); + Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Op); DAG.DeleteNode(Op); return true; @@ -6609,7 +6815,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { Chain.getNode()->dump(&DAG); dbgs() << "\n"); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); if (N->use_empty()) { removeFromWorkList(N); @@ -6629,11 +6835,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1)), - &DeadNodes); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); + DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6955,8 +7160,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { AddToWorkList(NewLD.getNode()); AddToWorkList(NewVal.getNode()); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; } @@ -7013,8 +7217,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { AddToWorkList(NewLD.getNode()); AddToWorkList(NewST.getNode()); WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), - &DeadNodes); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; } @@ -7058,7 +7261,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Tmp; switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP type"); - case MVT::f80: // We don't do this for these yet. + case MVT::f16: // We don't do this for these yet. + case MVT::f80: case MVT::f128: case MVT::ppcf128: break; @@ -7323,8 +7527,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } + EVT IndexTy = N->getOperand(1).getValueType(); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, - InVec, DAG.getConstant(OrigElt, MVT::i32)); + InVec, DAG.getConstant(OrigElt, IndexTy)); } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -7472,7 +7677,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. AddToWorkList(Load.getNode()); @@ -7489,6 +7694,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR @@ -7496,12 +7706,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // using shuffles. EVT SourceType = MVT::Other; bool AllAnyExt = true; - bool AllUndef = true; + for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. if (In.getOpcode() == ISD::UNDEF) continue; - AllUndef = false; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -7529,9 +7738,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { AllAnyExt &= AnyExt; } - if (AllUndef) - return DAG.getUNDEF(VT); - // In order to have valid types, all of the inputs must be extended from the // same source type and all of the inputs must be any or zero extend. // Scalar sizes must be a power of two. @@ -7707,6 +7913,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (N->getNumOperands() == 1) return N->getOperand(0); + // Check if all of the operands are undefs. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(N->getValueType(0)); + return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 0c1ac69..683fac6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,6 +40,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -51,10 +52,10 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" @@ -484,7 +485,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { if (const ConstantInt *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; // N = N + Offset - TotalOffs += + TotalOffs += TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); @@ -573,7 +574,10 @@ bool FastISel::SelectCall(const User *I) { // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: return true; + case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(Call); if (!DIVariable(DI->getVariable()).Verify() || @@ -642,7 +646,7 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addCImm(CI).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); - else + else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(CI->getZExtValue()).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); @@ -786,13 +790,24 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } - // Remove dead code. However, ignore call instructions since we've flushed + // Remove dead code. However, ignore call instructions since we've flushed // the local value map and recomputed the insert point. if (!isa(I)) { recomputeInsertPt(); @@ -1037,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo) +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), @@ -1046,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo) TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()) { + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { } FastISel::~FastISel() {} @@ -1306,6 +1323,30 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { @@ -1345,6 +1386,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); + const TargetRegisterClass *RC = MRI.getRegClass(Op0); + MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), ResultReg) .addReg(Op0, getKillRegState(Op0IsKill), Idx); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8dde919..3e18ea7 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -15,13 +15,13 @@ #define DEBUG_TYPE "function-lowering-info" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 1467d88..4488d27 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -48,16 +48,31 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { return N; } -/// CountOperands - The inputs to target nodes have any actual inputs first, +/// countOperands - The inputs to target nodes have any actual inputs first, /// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. -unsigned InstrEmitter::CountOperands(SDNode *Node) { +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned &NumImpUses) { unsigned N = Node->getNumOperands(); while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + for (unsigned I = N; I; --I) { + if (isa(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast(Node->getOperand(I - 1))) + if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + return N; } @@ -114,8 +129,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); const TargetRegisterClass *RC = 0; - if (i+II.getNumDefs() < II.getNumOperands()) - RC = TII->getRegClass(II, i+II.getNumDefs(), TRI); + if (i+II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); + } if (!UseRC) UseRC = RC; else if (RC) { @@ -196,7 +213,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. unsigned VRBase = 0; - const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI); + const TargetRegisterClass *RC = + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -293,7 +311,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, if (II) { const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) - DstRC = TII->getRegClass(*II, IIOpNum, TRI); + DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { @@ -334,8 +352,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the -/// operand number (in the II) that we are adding. IIOpNum and II are used for -/// assertions only. +/// operand number (in the II) that we are adding. void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, @@ -350,7 +367,11 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, const ConstantFP *CFP = F->getConstantFPValue(); MI->addOperand(MachineOperand::CreateFPImm(CFP)); } else if (RegisterSDNode *R = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { @@ -390,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (BlockAddressSDNode *BA = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), BA->getTargetFlags())); + } else if (TargetIndexSDNode *TI = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), + TI->getOffset(), + TI->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && @@ -458,7 +483,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned SrcReg, DstReg, DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && - SubIdx == DefSubIdx) { + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { // Optimize these: // r1025 = s/zext r1024, 4 // r1026 = extract_subreg r1025, 4 @@ -467,6 +493,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(TRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); } else { // VReg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register @@ -548,7 +575,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); - const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + const TargetRegisterClass *DstRC = + TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -566,7 +594,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(RC); + unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -691,7 +719,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); - unsigned NodeOperands = CountOperands(Node); + unsigned NumImpUses = 0; + unsigned NodeOperands = countOperands(Node, NumImpUses); bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; @@ -700,7 +729,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && - NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index c081f38..9eddee9 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -105,12 +105,6 @@ public: /// (which do not go into the machine instrs.) static unsigned CountResults(SDNode *Node); - /// CountOperands - The inputs to target nodes have any actual inputs first, - /// followed by an optional chain operand, then flag operands. Compute - /// the number of actual operands that will go into the resulting - /// MachineInstr. - static unsigned CountOperands(SDNode *Node); - /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a96a997..908ebb9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -20,10 +24,6 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -70,6 +70,9 @@ private: SDValue OptimizeFloatStore(StoreSDNode *ST); + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform @@ -150,21 +153,21 @@ public: // Node replacement helpers void ReplacedNode(SDNode *N) { if (N->use_empty()) { - DAG.RemoveDeadNode(N, this); + DAG.RemoveDeadNode(N); } else { ForgetNode(N); } } void ReplaceNode(SDNode *Old, SDNode *New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { - DAG.ReplaceAllUsesWith(Old, New, this); + DAG.ReplaceAllUsesWith(Old, New); ReplacedNode(Old); } }; @@ -203,7 +206,8 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, } SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + : SelectionDAG::DAGUpdateListener(dag), + TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), DAG(dag) { } @@ -424,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), @@ -432,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (VT.isFloatingPoint() && LoadedVT != VT) - Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; ChainResult = Chain; @@ -638,9 +643,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // probably means that we need to integrate dag combiner and legalizer // together. // We generally can't do this one for long doubles. - SDValue Tmp1 = ST->getChain(); - SDValue Tmp2 = ST->getBasePtr(); - SDValue Tmp3; + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -648,19 +652,19 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF(). + SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { - Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); } @@ -673,11 +677,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, + Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); @@ -688,14 +692,448 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { return SDValue(0, 0); } +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + DebugLoc dl = Node->getDebugLoc(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + { + SDValue Value = ST->getValue(); + EVT VT = Value.getValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast(Node), + DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + assert(VT.isVector() && "Unknown legal promote case!"); + Value = DAG.getNode(ISD::BITCAST, dl, + TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + } else { + SDValue Value = ST->getValue(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, + isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Value.getValueType()))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); + if (ST->getAlignment() < ABIAlignment) + ExpandUnalignedStore(cast(Node), DAG, TLI, this); + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + DebugLoc dl = Node->getDebugLoc(); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + EVT VT = Node->getValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast(Node), + DAG, TLI, RVal, RChain); + } + } + break; + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + } + case TargetLowering::Promote: { + // Only promote a load of vector type to another. + assert(VT.isVector() && "Cannot promote this load!"); + // Change base type to a different vector type. + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + ReplacedNode(Node); + } + return; + } + + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: { + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Value, Chain); + } + } + } + break; + } + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); + break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); + } + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + } +} + /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; - DebugLoc dl = Node->getDebugLoc(); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == TargetLowering::TypeLegal && @@ -708,9 +1146,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - bool isCustom = false; - // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; @@ -816,9 +1251,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (SimpleFinishLegalizing) { - SmallVector Ops; - for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(Node->getOperand(i)); + SDNode *NewNode = Node; switch (Node->getOpcode()) { default: break; case ISD::SHL: @@ -828,11 +1261,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + if (!Node->getOperand(1).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(1)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); - Ops[1] = Handle.getValue(); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Handle.getValue()); } break; case ISD::SRL_PARTS: @@ -840,18 +1276,21 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + if (!Node->getOperand(2).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(2)); HandleSDNode Handle(SAO); LegalizeOp(SAO.getNode()); - Ops[2] = Handle.getValue(); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Node->getOperand(1), + Handle.getValue()); } break; } - SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode, this); + DAG.ReplaceAllUsesWith(Node, NewNode); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); ReplacedNode(Node); @@ -860,27 +1299,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { switch (Action) { case TargetLowering::Legal: return; - case TargetLowering::Custom: + case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) - ResultVals.push_back(Tmp1); + ResultVals.push_back(Res); else - ResultVals.push_back(Tmp1.getValue(i)); + ResultVals.push_back(Res.getValue(i)); } - if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + if (Res.getNode() != Node || Res.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data()); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); ReplacedNode(Node); } return; } - + } // FALL THROUGH case TargetLowering::Expand: ExpandNode(Node); @@ -904,428 +1343,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::CALLSEQ_END: break; case ISD::LOAD: { - LoadSDNode *LD = cast(Node); - Tmp1 = LD->getChain(); // Legalize the chain. - Tmp2 = LD->getBasePtr(); // Legalize the base pointer. - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) { - EVT VT = Node->getValueType(0); - Tmp3 = SDValue(Node, 0); - Tmp4 = SDValue(Node, 1); - - switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp3, Tmp4); - } - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Tmp3, DAG); - if (Tmp1.getNode()) { - Tmp3 = Tmp1; - Tmp4 = Tmp1.getValue(1); - } - break; - case TargetLowering::Promote: { - // Only promote a load of vector type to another. - assert(VT.isVector() && "Cannot promote this load!"); - // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); - - Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); - Tmp4 = Tmp1.getValue(1); - break; - } - } - if (Tmp4.getNode() != Node) { - assert(Tmp3.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); - ReplacedNode(Node); - } - return; - } - - EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - - if (SrcWidth != SrcVT.getStoreSizeInBits() && - // Some targets pretend to have an i1 loading operation, and actually - // load an i8. This trick is correct for ZEXTLOAD because the top 7 - // bits are guaranteed to be zero; it helps the optimizers understand - // that these bits are zero. It is also useful for EXTLOAD, since it - // tells the optimizers that those bits are undefined. It would be - // nice to have an effective generic way of getting these benefits... - // Until such a way is found, don't insist on promoting i1 here. - (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { - // Promote to a byte-sized load if not loading an integral number of - // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - unsigned NewWidth = SrcVT.getStoreSizeInBits(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); - SDValue Ch; - - // The extra bits are guaranteed to be zero, since we stored them that - // way. A zext load from NVT thus automatically gives zext from SrcVT. - - ISD::LoadExtType NewExtType = - ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - - Ch = Result.getValue(1); // The chain. - - if (ExtType == ISD::SEXTLOAD) - // Having the top bits zero doesn't help when sign extending. - Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); - - Tmp1 = Result; - Tmp2 = Ch; - } else if (SrcWidth & (SrcWidth - 1)) { - // If not loading a power-of-2 number of bits, expand as two loads. - assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Load size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi, Ch; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) - // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } else { - // Big endian - avoid unaligned loads. - // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 - // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - dl, Node->getValueType(0), Tmp1, Tmp2, - LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Hi.getValueType()))); - - // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } - - Tmp2 = Ch; - } else { - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: - Tmp1 = SDValue(Node, 0); - Tmp2 = SDValue(Node, 1); - - if (isCustom) { - Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp3.getNode()) { - Tmp1 = Tmp3; - Tmp2 = Tmp3.getValue(1); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp1, Tmp2); - } - } - } - break; - case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp2 = Load.getValue(1); - break; - } - - assert(!SrcVT.isVector() && - "Vector Loads are handled in LegalizeVectorOps"); - - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = ValRes; - Tmp2 = Result.getValue(1); - break; - } - } - - // Since loads produce two values, make sure to remember that we legalized - // both of them. - if (Tmp2.getNode() != Node) { - assert(Tmp1.getNode() != Node && "Load must be completely replaced"); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); - ReplacedNode(Node); - } - break; + return LegalizeLoadOps(Node); } case ISD::STORE: { - StoreSDNode *ST = cast(Node); - Tmp1 = ST->getChain(); - Tmp2 = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - break; - } - - { - Tmp3 = ST->getValue(); - EVT VT = Tmp3.getValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), - DAG, TLI, this); - } - break; - case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - ReplaceNode(SDValue(Node, 0), Tmp1); - break; - case TargetLowering::Promote: { - assert(VT.isVector() && "Unknown legal promote case!"); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, - TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - break; - } - } else { - Tmp3 = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (TLI.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - RoundVT, - isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(RoundWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(ExtraWidth, - TLI.getShiftAmountTy(Tmp3.getValueType()))); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - } - - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); - } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { - Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), DAG, TLI, this); - } - break; - case TargetLowering::Custom: - ReplaceNode(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG)); - break; - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); - Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - } - break; + return LegalizeStoreOps(Node); } } } @@ -1795,11 +1816,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (isTailCall) InChain = TCChain; - std::pair CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair CallInfo = TLI.LowerCallTo(CLI); + if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). @@ -1828,11 +1851,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - std::pair CallInfo = - TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; } @@ -1860,11 +1885,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - std::pair CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } @@ -1919,9 +1945,11 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != 0; } -/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// useDivRem - Only issue divrem libcall if both quotient and remainder are /// needed. -static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { +static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { + // The other use might have been replaced with a divrem already. + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; unsigned OtherOpcode = 0; if (isSigned) OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; @@ -1935,7 +1963,7 @@ static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { SDNode *User = *UI; if (User == Node) continue; - if (User->getOpcode() == OtherOpcode && + if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) && User->getOperand(0) == Op0 && User->getOperand(1) == Op1) return true; @@ -1992,11 +2020,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, TLI.getPointerTy()); DebugLoc dl = Node->getDebugLoc(); - std::pair CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, @@ -2570,14 +2599,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; - std::pair CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), Args, DAG, dl); + std::pair CallResult = TLI.LowerCallTo(CLI); + Results.push_back(CallResult.second); break; } @@ -2647,13 +2679,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; - std::pair CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + TargetLowering:: + CallLoweringInfo CLI(Node->getOperand(0), + Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); + std::pair CallResult = TLI.LowerCallTo(CLI); + Results.push_back(CallResult.second); break; } @@ -3059,7 +3094,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { "Don't know how to expand this subtraction!"); Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; } @@ -3074,7 +3109,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(1); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, false))) { + useDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y @@ -3102,7 +3137,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDVTList VTs = DAG.getVTList(VT, VT); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || (isDivRemLibcallAvailable(Node, isSigned, TLI) && - UseDivRem(Node, isSigned, true))) + useDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 95ddb1e..e8e968a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -588,18 +588,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { unsigned NumElts = InVT.getVectorNumElements(); assert(NumElts == NVT.getVectorNumElements() && "Dst and Src must have the same number of elements"); - EVT EltVT = InVT.getScalarType(); assert(isPowerOf2_32(NumElts) && "Promoted vector type must be a power of two"); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2); + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts/2); - - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(0)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp, - DAG.getIntPtrConstant(NumElts/2)); EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); @@ -2273,9 +2269,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), - RHS, DAG.getConstant(0, VT), ISD::SETNE); + RHS, DAG.getConstant(0, VT), ISD::SETNE); SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero); SDValue Overflow; Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE); @@ -2296,8 +2292,8 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, PtrVT), Temp, - MachinePointerInfo(), false, false, 0); + DAG.getConstant(0, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2319,16 +2315,17 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Args.push_back(Entry); SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); - std::pair CallInfo = - TLI.LowerCallTo(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + TargetLowering:: + CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Func, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 439aa4d..39337ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -628,7 +628,8 @@ namespace { public: explicit NodeUpdateListener(DAGTypeLegalizer &dtl, SmallSetVector &nta) - : DTL(dtl), NodesToAnalyze(nta) {} + : SelectionDAG::DAGUpdateListener(dtl.getDAG()), + DTL(dtl), NodesToAnalyze(nta) {} virtual void NodeDeleted(SDNode *N, SDNode *E) { assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && @@ -680,7 +681,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SmallSetVector NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); do { - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + DAG.ReplaceAllUsesOfValueWith(From, To); // The old node may still be present in a map like ExpandedIntegers or // PromotedIntegers. Inform maps about the replacement. @@ -709,7 +710,7 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SDValue NewVal(M, i); if (M->getNodeId() == Processed) RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); // OldVal may be a target of the ReplacedValues map which was marked // NewNode to force reanalysis because it was updated. Ensure that // anything that ReplacedValues mapped to OldVal will now be mapped @@ -950,7 +951,7 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) if (i != ResNo) ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); - return SDValue(N, ResNo); + return SDValue(N->getOperand(ResNo)); } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -1054,12 +1055,14 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - std::pair CallInfo = - TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); + return CallInfo.first; } @@ -1086,11 +1089,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - std::pair CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e866445..94fc976 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -135,6 +135,8 @@ public: ReplacedValues[SDValue(Old, i)] = SDValue(New, i); } + SelectionDAG &getDAG() const { return DAG; } + private: SDNode *AnalyzeNewNode(SDNode *N); void AnalyzeNewValue(SDValue &Val); @@ -151,7 +153,7 @@ private: /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES /// node with the corresponding input operand, except for the result 'ResNo', - /// which is returned. + /// for which the corresponding input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); @@ -509,10 +511,12 @@ private: void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); @@ -553,6 +557,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned OpNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index a8ff7c6..06f6bd6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -168,6 +168,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); DebugLoc dl = N->getDebugLoc(); // Convert to a vector of the expanded element type, for example @@ -175,6 +176,15 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT OldVT = N->getValueType(0); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9fe4480..704f99b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -71,6 +71,9 @@ class VectorLegalizer { // operands to a different type and bitcasting the result back to the // original type. SDValue PromoteVectorOp(SDValue Op); + // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input + // operand to the next size up. + SDValue PromoteVectorOpINT_TO_FP(SDValue Op); public: bool Run(); @@ -231,9 +234,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; + switch (Op.getOpcode()) { + default: + // "Promote" the operation by bitcasting + Result = PromoteVectorOp(Op); + Changed = true; + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + Result = PromoteVectorOpINT_TO_FP(Op); + Changed = true; + break; + } break; case TargetLowering::Legal: break; case TargetLowering::Custom: { @@ -293,6 +306,44 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + EVT VT = Op.getOperand(0).getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + + // Normal getTypeToPromoteTo() doesn't work here, as that will promote + // by widening the vector w/ the same element width and twice the number + // of elements. We want the other way around, the same number of elements, + // each twice the width. + // + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); + assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + + DebugLoc dl = Op.getDebugLoc(); + SmallVector Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], + Operands.size()); +} + SDValue VectorLegalizer::ExpandLoad(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5f23f01..4709202 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -48,7 +48,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; - case ISD::BUILD_VECTOR: R = N->getOperand(0); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; @@ -115,6 +115,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SRL: R = ScalarizeVecRes_BinOp(N); break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -129,6 +132,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Op0.getValueType(), Op0, Op1, Op2); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -141,6 +152,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { NewVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return InOp; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); @@ -436,7 +457,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; - + // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) return; @@ -448,7 +469,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split the result of this operator!"); + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::VSELECT: @@ -529,6 +551,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -548,6 +573,22 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); } +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a @@ -977,7 +1018,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to split this operator's operand!"); + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; @@ -1203,15 +1246,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { DebugLoc DL = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); - + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); - + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); - + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} +} @@ -1755,8 +1798,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { if (InputWidened) InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(j)); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1816,7 +1859,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, DAG.getIntPtrConstant(0)); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } } @@ -1832,7 +1875,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getIntPtrConstant(i)); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); + SatOp, CvtCode); } SDValue UndefVal = DAG.getUNDEF(EltVT); @@ -1936,7 +1979,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { Cond1 = GetWidenedVector(Cond1); if (Cond1.getValueType() != CondWidenVT) - Cond1 = ModifyToType(Cond1, CondWidenVT); + Cond1 = ModifyToType(Cond1, CondWidenVT); } SDValue InOp1 = GetWidenedVector(N->getOperand(1)); @@ -2202,7 +2245,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getIntPtrConstant(0)); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2371,10 +2414,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), - isVolatile, - isNonTemporal, isInvariant, - MinAlign(Align, Increment)); + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector Loads; @@ -2563,7 +2604,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); - } while (StWidth != 0 && StWidth >= NewVTWidth); + } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index ff0136e..c3794d5 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -50,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : const TargetMachine &tm = (*IS->MF).getTarget(); ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); - // This hard requirment could be relaxed, but for now + // This hard requirement could be relaxed, but for now // do not let it procede. assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -318,7 +318,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } @@ -353,7 +353,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { } /// Estimates change in reg pressure from this SU. -/// It is acheived by trivial tracking of defined +/// It is achieved by trivial tracking of defined /// and used vregs in dependent instructions. /// The RawPressure flag makes this function to ignore /// existing reg file sizes, and report raw def/use diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 24da432..b7ce48a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -441,19 +441,14 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallVector &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { - if (RegAdded.insert(Reg)) { - LRegs.push_back(Reg); - Added = true; - } - } - for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) { - LRegs.push_back(*Alias); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { + if (RegAdded.insert(*AI)) { + LRegs.push_back(*AI); Added = true; } } + } return Added; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 2cb5d37..bf0a437 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -266,7 +266,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, - unsigned &RegClass, unsigned &Cost) { + unsigned &RegClass, unsigned &Cost, + const MachineFunction &MF) { EVT VT = RegDefPos.GetValue(); // Special handling for untyped values. These values can only come from @@ -285,7 +286,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned Idx = RegDefPos.GetIdx(); const MCInstrDesc Desc = TII->get(Opcode); - const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a // better way to determine it. @@ -852,7 +853,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } /// After backtracking, the hazard checker needs to be restored to a state -/// corresponding the the current cycle. +/// corresponding the current cycle. void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { HazardRec->Reset(); @@ -1181,7 +1182,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SmallSet &RegAdded, SmallVector &LRegs, const TargetRegisterInfo *TRI) { - for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. if (!LiveRegDefs[*AliasI]) continue; @@ -1920,7 +1921,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) return true; @@ -2034,7 +2035,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { continue; unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); RegPressure[RCId] += Cost; break; } @@ -2049,7 +2050,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (SkipRegDefs > 0) continue; unsigned RCId, Cost; - GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. @@ -2330,22 +2331,21 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // and latency. if (!checkPref || (left->SchedulingPref == Sched::ILP || right->SchedulingPref == Sched::ILP)) { - if (DisableSchedCycles) { + // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer + // is enabled, grouping instructions by cycle, then its height is already + // covered so only its depth matters. We also reach this point if both stall + // but have the same height. + if (!SPQ->getHazardRec()->isEnabled()) { if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; } - else { - // If neither instruction stalls (!LStall && !RStall) then - // its height is already covered so only its depth matters. We also reach - // this if both stall but have the same height. - int LDepth = left->getDepth() - LPenalty; - int RDepth = right->getDepth() - RPenalty; - if (LDepth != RDepth) { - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << "\n"); - return LDepth < RDepth ? 1 : -1; - } + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; @@ -2363,7 +2363,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 75940ec..84e41fc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -61,6 +61,7 @@ namespace llvm { if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; + if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; @@ -98,12 +99,6 @@ namespace llvm { /// virtual void computeLatency(SUnit *SU); - /// computeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void computeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const { } - virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 92671d1..f4fe892 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -14,16 +14,16 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" +#include "llvm/CallingConv.h" #include "llvm/Constants.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -71,7 +71,9 @@ static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { } } -SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {} +// Default null implementations of the callbacks. +void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} //===----------------------------------------------------------------------===// // ConstantFPSDNode Class @@ -217,6 +219,22 @@ bool ISD::isScalarToVector(const SDNode *N) { return true; } +/// allOperandsUndef - Return true if the node has at least one operand +/// and all operands of the specified node are ISD::UNDEF. +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + + for (unsigned i = 0, e = N->getNumOperands(); i != e ; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + + return true; +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -385,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -420,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } case ISD::LOAD: { const LoadSDNode *LD = cast(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); break; } case ISD::STORE: { const StoreSDNode *ST = cast(N); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -449,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { const AtomicSDNode *AT = cast(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); break; } case ISD::VECTOR_SHUFFLE: { @@ -465,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast(N)->getPointerInfo().getAddrSpace()); } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -544,16 +582,15 @@ void SelectionDAG::RemoveDeadNodes() { /// RemoveDeadNodes - This method deletes the unreachable nodes in the /// given list, and any nodes that become unreachable as a result. -void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes) { // Process the worklist, deleting the nodes and adding their uses to the // worklist. while (!DeadNodes.empty()) { SDNode *N = DeadNodes.pop_back_val(); - if (UpdateListener) - UpdateListener->NodeDeleted(N, 0); + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, 0); // Take the node out of the appropriate CSE map. RemoveNodeFromCSEMaps(N); @@ -574,7 +611,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes, } } -void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ +void SelectionDAG::RemoveDeadNode(SDNode *N){ SmallVector DeadNodes(1, N); // Create a dummy node that adds a reference to the root node, preventing @@ -582,7 +619,7 @@ void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ // dead node.) HandleSDNode Dummy(getRoot()); - RemoveDeadNodes(DeadNodes, UpdateListener); + RemoveDeadNodes(DeadNodes); } void SelectionDAG::DeleteNode(SDNode *N) { @@ -684,8 +721,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { /// node. This transfer can potentially trigger recursive merging. /// void -SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, - DAGUpdateListener *UpdateListener) { +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // For node types that aren't CSE'd, just act as if no identical node // already exists. if (!doNotCSE(N)) { @@ -694,20 +730,19 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N, // If there was already an existing matching node, use ReplaceAllUsesWith // to replace the dead one with the existing one. This can cause // recursive merging of other unrelated nodes down the line. - ReplaceAllUsesWith(N, Existing, UpdateListener); + ReplaceAllUsesWith(N, Existing); - // N is now dead. Inform the listener if it exists and delete it. - if (UpdateListener) - UpdateListener->NodeDeleted(N, Existing); + // N is now dead. Inform the listeners and delete it. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, Existing); DeleteNodeNotInCSEMaps(N); return; } } - // If the node doesn't already exist, we updated it. Inform a listener if - // it exists. - if (UpdateListener) - UpdateListener->NodeUpdated(N); + // If the node doesn't already exist, we updated it. Inform listeners. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeUpdated(N); } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -855,7 +890,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0) { + Root(getEntryNode()), Ordering(0), UpdateListeners(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); @@ -867,6 +902,7 @@ void SelectionDAG::init(MachineFunction &mf) { } SelectionDAG::~SelectionDAG() { + assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); delete Ordering; delete DbgInfo; @@ -1084,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1183,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); @@ -1949,6 +2004,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); + KnownOne &= (~KnownZero); return; } case ISD::FGETSIGN: @@ -2246,8 +2302,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (Op.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(Op); + if (LoadSDNode *LD = dyn_cast(Op)) { unsigned ExtType = LD->getExtensionType(); switch (ExtType) { default: break; @@ -2428,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -2675,6 +2748,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1 == N2) return N1; break; case ISD::CONCAT_VECTORS: + // Concat of UNDEFs is UNDEF. + if (N1.getOpcode() == ISD::UNDEF && + N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. if (N1.getOpcode() == ISD::BUILD_VECTOR && @@ -3708,8 +3786,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, @@ -3717,6 +3795,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), TLI.getPointerTy()), Args, *this, dl); + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3761,8 +3841,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, @@ -3770,6 +3850,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), TLI.getPointerTy()), Args, *this, dl); + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3822,8 +3904,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in DebugLoc - std::pair CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, TLI.getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, @@ -3831,6 +3913,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), Args, *this, dl); + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; } @@ -3874,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -3946,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4002,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4079,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4198,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4287,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4354,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4378,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4654,13 +4746,7 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) continue; - bool NoMatch = false; - for (unsigned i = 2; i != NumVTs; ++i) - if (VTs[i] != I->VTs[i]) { - NoMatch = true; - break; - } - if (!NoMatch) + if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) return *I; } @@ -5237,11 +5323,7 @@ namespace { /// pointed to by a use iterator is deleted, increment the use iterator /// so that it doesn't dangle. /// -/// This class also manages a "downlink" DAGUpdateListener, to forward -/// messages to ReplaceAllUsesWith's callers. -/// class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { - SelectionDAG::DAGUpdateListener *DownLink; SDNode::use_iterator &UI; SDNode::use_iterator &UE; @@ -5249,21 +5331,13 @@ class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { // Increment the iterator as needed. while (UI != UE && N == *UI) ++UI; - - // Then forward the message. - if (DownLink) DownLink->NodeDeleted(N, E); - } - - virtual void NodeUpdated(SDNode *N) { - // Just forward the message. - if (DownLink) DownLink->NodeUpdated(N); } public: - RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl, + RAUWUpdateListener(SelectionDAG &d, SDNode::use_iterator &ui, SDNode::use_iterator &ue) - : DownLink(dl), UI(ui), UE(ue) {} + : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; } @@ -5273,8 +5347,7 @@ public: /// /// This version assumes From has a single result value. /// -void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDNode *From = FromN.getNode(); assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && "Cannot replace with this method!"); @@ -5288,7 +5361,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // is replaced by To, we don't want to replace of all its users with To // too. See PR3018 for more info. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5307,7 +5380,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5321,8 +5394,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, /// This version assumes that for each value of From, there is a /// corresponding value in To in the same position with the same type. /// -void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { #ifndef NDEBUG for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) assert((!From->hasAnyUseOfValue(i) || @@ -5337,7 +5409,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5356,7 +5428,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5369,16 +5441,14 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, /// /// This version can replace From with any result values. To must match the /// number and types of values returned by From. -void SelectionDAG::ReplaceAllUsesWith(SDNode *From, - const SDValue *To, - DAGUpdateListener *UpdateListener) { +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. - return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener); + return ReplaceAllUsesWith(SDValue(From, 0), To[0]); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; @@ -5398,7 +5468,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5409,14 +5479,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The Deleted /// vector is handled the same way as for ReplaceAllUsesWith. -void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, - DAGUpdateListener *UpdateListener){ +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ // Handle the really simple, really trivial case efficiently. if (From == To) return; // Handle the simple, trivial, case efficiently. if (From.getNode()->getNumValues() == 1) { - ReplaceAllUsesWith(From, To, UpdateListener); + ReplaceAllUsesWith(From, To); return; } @@ -5424,7 +5493,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), UE = From.getNode()->use_end(); - RAUWUpdateListener Listener(UpdateListener, UI, UE); + RAUWUpdateListener Listener(*this, UI, UE); while (UI != UE) { SDNode *User = *UI; bool UserRemovedFromCSEMaps = false; @@ -5460,7 +5529,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, &Listener); + AddModifiedNodeToCSEMaps(User); } // If we just RAUW'd the root, take note. @@ -5489,11 +5558,10 @@ namespace { /// handled the same way as for ReplaceAllUsesWith. void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, - unsigned Num, - DAGUpdateListener *UpdateListener){ + unsigned Num){ // Handle the simple, trivial case efficiently. if (Num == 1) - return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener); + return ReplaceAllUsesOfValueWith(*From, *To); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -5538,7 +5606,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. - AddModifiedNodeToCSEMaps(User, UpdateListener); + AddModifiedNodeToCSEMaps(User); } } @@ -5579,7 +5647,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { } } - // Visit all the nodes. As we iterate, moves nodes into sorted order, + // Visit all the nodes. As we iterate, move nodes into sorted order, // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f1e879b..ba5bd79 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" @@ -42,7 +43,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" @@ -51,6 +51,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -843,7 +844,7 @@ void SelectionDAGBuilder::clear() { } /// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is seperated from the clear so that debug +/// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached @@ -941,7 +942,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ - case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break; + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/Instruction.def" } @@ -1578,17 +1579,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + assert(CB.CC == ISD::SETCC_INVALID && + "Condition is undefined for to-the-range belonging check."); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - - if (cast(CB.CmpLHS)->isMinValue(true)) { + + if (cast(CB.CmpLHS)->isMinValue(false)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETLE); + ISD::SETULE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1826,9 +1828,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast(Callee); if (isa(Callee)) visitInlineAsm(&I); - else + else if (Fn && Fn->isIntrinsic()) { + assert(Fn->getIntrinsicID() == Intrinsic::donothing); + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + } else LowerCallTo(&I, getValue(Callee), false, LandingPad); // If the value of the invoke is used outside of its defining block, make it @@ -1901,8 +1907,6 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, const Value* SV, MachineBasicBlock *Default, MachineBasicBlock *SwitchBB) { - Case& BackCase = *(CR.Range.second-1); - // Size is the number of Cases represented by this range. size_t Size = CR.Range.second - CR.Range.first; if (Size > 3) @@ -1970,11 +1974,28 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } } + // Order cases by weight so the most likely case will be checked first. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI) { + for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { + uint32_t IWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), + I->BB->getBasicBlock()); + for (CaseItr J = CR.Range.first; J < I; ++J) { + uint32_t JWeight = BPI->getEdgeWeight(SwitchBB->getBasicBlock(), + J->BB->getBasicBlock()); + if (IWeight > JWeight) + std::swap(*I, *J); + } + } + } // Rearrange the case blocks so that the last one falls through if possible. - if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { + Case &BackCase = *(CR.Range.second-1); + if (Size > 1 && + NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. - for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) { + // We start at the bottom as it's the case with the least weight. + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ if (I->BB == NextBlock) { std::swap(*I, BackCase); break; @@ -2006,7 +2027,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETLE; + CC = ISD::SETCC_INVALID; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2031,14 +2052,14 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !TLI.getTargetMachine().Options.DisableJumpTables && + return TLI.supportJumpTables() && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); + APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2104,7 +2125,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast(I->Low)->getValue(); const APInt &High = cast(I->High)->getValue(); - if (Low.sle(TEI) && TEI.sle(High)) { + if (Low.ule(TEI) && TEI.ule(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2261,7 +2282,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2333,7 +2354,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { + if (maxValue.ult(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2407,57 +2428,46 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - size_t numCmps = 0; + + /// Use a shorter form of declaration, and also + /// show the we want to use CRSBuilder as Clusterifier. + typedef IntegersSubsetMapping Clusterifier; + + Clusterifier TheClusterifier; - BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - - Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), - SMBB, ExtraWeight)); - } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size() >= 2) - // Must recompute end() each iteration because it may be - // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); - J != Cases.end(); ) { - const APInt& nextValue = cast(J->Low)->getValue(); - const APInt& currentValue = cast(I->High)->getValue(); - MachineBasicBlock* nextBB = J->BB; - MachineBasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { - I->High = J->High; - J = Cases.erase(J); - - if (BranchProbabilityInfo *BPI = FuncInfo.BPI) { - uint32_t CurWeight = currentBB->getBasicBlock() ? - BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16; - uint32_t NextWeight = nextBB->getBasicBlock() ? - BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16; - - BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(), - CurWeight + NextWeight); - } - } else { - I = J++; - } + TheClusterifier.add(i.getCaseValueEx(), SMBB); + } + + TheClusterifier.optimize(); + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + size_t numCmps = 0; + for (Clusterifier::RangeIterator i = TheClusterifier.begin(), + e = TheClusterifier.end(); i != e; ++i, ++numCmps) { + Clusterifier::Cluster &C = *i; + unsigned W = 0; + if (BPI) { + W = BPI->getEdgeWeight(SI.getParent(), C.second->getBasicBlock()); + if (!W) + W = 16; + W *= C.first.Weight; + BPI->setEdgeWeight(SI.getParent(), C.second->getBasicBlock(), W); } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; + // FIXME: Currently work with ConstantInt based numbers. + // Changing it to APInt based is a pretty heavy for this commit. + Cases.push_back(Case(C.first.getLow().toConstantInt(), + C.first.getHigh().toConstantInt(), C.second, W)); + + if (C.first.getLow() != C.first.getHigh()) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2804,7 +2814,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { } // Utility for visitShuffleVector - Return true if every element in Mask, -// begining from position Pos and ending in Pos+Size, falls within the +// beginning from position Pos and ending in Pos+Size, falls within the // specified sequential range [L, L+Pos). or is undef. static bool isSequentialInRange(const SmallVectorImpl &Mask, unsigned Pos, unsigned Size, int Low) { @@ -4914,6 +4924,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::pow: visitPow(I); return 0; + case Intrinsic::fabs: + setValue(&I, DAG.getNode(ISD::FABS, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; + case Intrinsic::floor: + setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -4921,6 +4941,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(I.getType()); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isOperationLegal(ISD::FMA, VT) && + TLI.isFMAFasterThanMulAndAdd(VT)){ + setValue(&I, DAG.getNode(ISD::FMA, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + SDValue Mul = DAG.getNode(ISD::FMUL, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, dl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return 0; + } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, MVT::i16, getValue(I.getArgOperand(0)))); @@ -5077,16 +5120,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } TargetLowering::ArgListTy Args; - std::pair Result = - TLI.LowerCallTo(getRoot(), I.getType(), + TargetLowering:: + CallLoweringInfo CLI(getRoot(), I.getType(), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), Args, DAG, getCurDebugLoc()); + std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } + case Intrinsic::debugtrap: { + DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot())); + return 0; + } case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::usub_with_overflow: @@ -5139,6 +5187,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::lifetime_end: // Discard region information. return 0; + case Intrinsic::donothing: + // ignore + return 0; } } @@ -5157,14 +5208,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector Outs; - SmallVector Offsets; GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - Outs, TLI, &Offsets); + Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; @@ -5247,16 +5297,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; - std::pair Result = - TLI.LowerCallTo(getRoot(), RetTy, - CS.paramHasAttr(0, Attribute::SExt), - CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), - CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), - CS.getCallingConv(), - isTailCall, - CS.doesNotReturn(), - !CS.getInstruction()->use_empty(), - Callee, Args, DAG, getCurDebugLoc()); + TargetLowering:: + CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, + getCurDebugLoc(), CS); + std::pair Result = TLI.LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5272,7 +5316,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = Outs.size(); + + SmallVector RetTys; + SmallVector Offsets; + RetTy = FTy->getReturnType(); + ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + + unsigned NumValues = RetTys.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); @@ -5280,8 +5330,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, - Add, + SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, false, false, 1); Values[i] = L; @@ -5292,30 +5341,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - SmallVector RetTys; - RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys); - ISD::NodeType AssertOp = ISD::DELETED_NODE; - SmallVector ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - - SDValue ReturnValue = - getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, - RegisterVT, VT, AssertOp); - ReturnValues.push_back(ReturnValue); - CurReg += NumRegs; - } - setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size())); + &Values[0], Values.size())); } // Assign order to nodes here. If the call does not produce a result, it won't @@ -5482,6 +5511,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. @@ -5512,150 +5557,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || - (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || - (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType()) { + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } - } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || - (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || - (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) return; - } - } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || - (LibInfo->has(LibFunc::sinf) && Name == "sinf") || - (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) return; - } - } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || - (LibInfo->has(LibFunc::cosf) && Name == "cosf") || - (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) return; - } - } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || - (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || - (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) return; - } - } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || - (LibInfo->has(LibFunc::floorf) && Name == "floorf") || - (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; - } - } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || - (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || - (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; - } - } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || - (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || - (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) return; - } - } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || - (LibInfo->has(LibFunc::rintf) && Name == "rintf") || - (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) return; - } - } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || - (LibInfo->has(LibFunc::truncf) && Name == "truncf") || - (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; - } - } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || - (LibInfo->has(LibFunc::log2f) && Name == "log2f") || - (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) return; - } - } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || - (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || - (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) return; - } - } else if (Name == "memcmp") { + break; + case LibFunc::memcmp: if (visitMemCmpCall(I)) return; + break; } } } @@ -5952,11 +5944,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + std::pair MatchRC = + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, + std::pair InputRC = + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6225,8 +6217,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); - assert(!OpInfo.isIndirect && - "Don't know how to handle indirect register inputs yet!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "Don't know how to handle indirect register inputs yet " + "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); + break; + } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { @@ -6369,24 +6368,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// FIXME: When all targets are /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair -TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, - bool RetSExt, bool RetZExt, bool isVarArg, - bool isInreg, unsigned NumFixedArgs, - CallingConv::ID CallConv, bool isTailCall, - bool doesNotRet, bool isReturnValueUsed, - SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, - DebugLoc dl) const { +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle all of the outgoing arguments. - SmallVector Outs; - SmallVector OutVals; + CLI.Outs.clear(); + CLI.OutVals.clear(); + ArgListTy &Args = CLI.Args; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; - Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); + Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -6419,8 +6412,8 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT PartVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); + EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -6429,89 +6422,88 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, else if (Args[i].isZExt) ExtendKind = ISD::ZERO_EXTEND; - getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), - i < NumFixedArgs); + i < CLI.NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); - Outs.push_back(MyFlags); - OutVals.push_back(Parts[j]); + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); } } } // Handle the incoming return values from the call. - SmallVector Ins; + CLI.Ins.clear(); SmallVector RetTys; - ComputeValueVTs(*this, RetTy, RetTys); + ComputeValueVTs(*this, CLI.RetTy, RetTys); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT.getSimpleVT(); - MyFlags.Used = isReturnValueUsed; - if (RetSExt) + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) MyFlags.Flags.setSExt(); - if (RetZExt) + if (CLI.RetZExt) MyFlags.Flags.setZExt(); - if (isInreg) + if (CLI.IsInReg) MyFlags.Flags.setInReg(); - Ins.push_back(MyFlags); + CLI.Ins.push_back(MyFlags); } } SmallVector InVals; - Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall, - Outs, OutVals, Ins, dl, DAG, InVals); + CLI.Chain = LowerCall(CLI, InVals); // Verify that the target's LowerCall behaved as expected. - assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); - assert((!isTailCall || InVals.empty()) && + assert((!CLI.IsTailCall || InVals.empty()) && "LowerCall emitted a return value for a tail call!"); - assert((isTailCall || InVals.size() == Ins.size()) && + assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && "LowerCall didn't emit the correct number of values!"); // For a tail call, the return value is merely live-out and there aren't // any nodes in the DAG representing it. Return a special value to // indicate that a tail call has been emitted and no more Instructions // should be processed in the current block. - if (isTailCall) { - DAG.setRoot(Chain); + if (CLI.IsTailCall) { + CLI.DAG.setRoot(CLI.Chain); return std::make_pair(SDValue(), SDValue()); } - DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); - assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (RetSExt) + if (CLI.RetSExt) AssertOp = ISD::AssertSext; - else if (RetZExt) + else if (CLI.RetZExt) AssertOp = ISD::AssertZext; SmallVector ReturnValues; unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg], + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, AssertOp)); CurReg += NumRegs; @@ -6521,12 +6513,12 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy, // such a node, so we just return a null return value in that case. In // that case, nothing will actually look at the value. if (ReturnValues.empty()) - return std::make_pair(SDValue(), Chain); + return std::make_pair(SDValue(), CLI.Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&RetTys[0], RetTys.size()), + SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, + CLI.DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size()); - return std::make_pair(Res, Chain); + return std::make_pair(Res, CLI.Chain); } void TargetLowering::LowerOperationWrapper(SDNode *N, @@ -6746,7 +6738,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Note down frame index. if (FrameIndexSDNode *FI = - dyn_cast(ArgValues[0].getNode())) + dyn_cast(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8393b41..4090002 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -180,17 +180,6 @@ private: typedef std::vector CaseRecVector; - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator()(const Case &C1, const Case &C2) { - assert(isa(C1.Low) && isa(C2.High)); - const ConstantInt* CI1 = cast(C1.Low); - const ConstantInt* CI2 = cast(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; - struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -351,7 +340,7 @@ public: void clear(); /// clearDanglingDebugInfo - Clear the dangling debug information - /// map. This function is seperated from the clear so that debug + /// map. This function is separated from the clear so that debug /// information that is dangling in a basic block can be properly /// resolved in a different basic block. This allows the /// SelectionDAG to resolve dangling debug information attached @@ -531,6 +520,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index f981afb..13cd011 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" #include "llvm/Assembly/Writer.h" @@ -19,7 +20,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: @@ -265,6 +266,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STACKSAVE: return "stacksave"; case ISD::STACKRESTORE: return "stackrestore"; case ISD::TRAP: return "trap"; + case ISD::DEBUGTRAP: return "debugtrap"; // Bit manipulation case ISD::BSWAP: return "bswap"; @@ -408,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 605509b..4e5e3ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,12 +14,8 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" @@ -27,7 +23,10 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -38,6 +37,7 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -263,8 +263,6 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -void SelectionDAGISel::ISelUpdater::anchor() { } - SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), @@ -451,9 +449,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } } } - done:; } + done: // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); @@ -468,8 +466,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If To is also scheduled to be replaced, find what its ultimate // replacement is. for (;;) { - DenseMap::iterator J = - FuncInfo->RegFixups.find(To); + DenseMap::iterator J = FuncInfo->RegFixups.find(To); if (J == E) break; To = J->second; } @@ -703,6 +700,25 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->clear(); } +namespace { +/// ISelUpdater - helper class to handle updates of the instruction selection +/// graph. +class ISelUpdater : public SelectionDAG::DAGUpdateListener { + SelectionDAG::allnodes_iterator &ISelPosition; +public: + ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) + : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} + + /// NodeDeleted - Handle nodes deleted from the graph. If the node being + /// deleted is the current ISelPosition node, update ISelPosition. + /// + virtual void NodeDeleted(SDNode *N, SDNode *E) { + if (ISelPosition == SelectionDAG::allnodes_iterator(N)) + ++ISelPosition; + } +}; +} // end anonymous namespace + void SelectionDAGISel::DoInstructionSelection() { DEBUG(errs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() @@ -719,9 +735,13 @@ void SelectionDAGISel::DoInstructionSelection() { // a reference to the root node, preventing it from being deleted, // and tracking any changes of the root. HandleSDNode Dummy(CurDAG->getRoot()); - ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); + SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); ++ISelPosition; + // Make sure that ISelPosition gets properly updated when nodes are deleted + // in calls made from this function. + ISelUpdater ISU(*CurDAG, ISelPosition); + // The AllNodes list is now topological-sorted. Visit the // nodes by starting at the end of the list (the root of the // graph) and preceding back toward the beginning (the entry @@ -748,10 +768,8 @@ void SelectionDAGISel::DoInstructionSelection() { // If after the replacement this node is not used any more, // remove this dead node. - if (Node->use_empty()) { // Don't delete EntryToken, etc. - ISelUpdater ISU(ISelPosition); - CurDAG->RemoveDeadNode(Node, &ISU); - } + if (Node->use_empty()) // Don't delete EntryToken, etc. + CurDAG->RemoveDeadNode(Node); } CurDAG->setRoot(Dummy.getValue()); @@ -961,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo); + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal RPOT(&Fn); @@ -1680,8 +1698,6 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, bool isMorphNodeTo) { SmallVector NowDeadNodes; - ISelUpdater ISU(ISelPosition); - // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { @@ -1705,7 +1721,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); - CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. if (ChainNode->use_empty() && @@ -1728,7 +1744,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && "Doesn't have a glue result"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputGlue, &ISU); + InputGlue); // If the node became dead and we haven't already seen it, delete it. if (FRN->use_empty() && @@ -1738,7 +1754,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, } if (!NowDeadNodes.empty()) - CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); + CurDAG->RemoveDeadNodes(NowDeadNodes); DEBUG(errs() << "ISEL: Match complete!\n"); } @@ -1759,7 +1775,7 @@ enum ChainResult { /// The walk we do here is guaranteed to be small because we quickly get down to /// already selected nodes "below" us. static ChainResult -WalkChainUsers(SDNode *ChainedNode, +WalkChainUsers(const SDNode *ChainedNode, SmallVectorImpl &ChainedNodesInPattern, SmallVectorImpl &InteriorChainedNodes) { ChainResult Result = CR_Simple; @@ -1992,14 +2008,14 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SelectionDAGISel &SDISel) { + const SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SelectionDAGISel &SDISel, SDNode *N) { + const SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } @@ -2062,7 +2078,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, SelectionDAGISel &SDISel) { + SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); @@ -2075,7 +2091,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, SelectionDAGISel &SDISel) { + SDValue N, const SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); @@ -2094,7 +2110,8 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// MatcherIndex to continue with. static unsigned IsPredicateKnownToFail(const unsigned char *Table, unsigned Index, SDValue N, - bool &Result, SelectionDAGISel &SDISel, + bool &Result, + const SelectionDAGISel &SDISel, SmallVectorImpl > &RecordedNodes) { switch (Table[Index++]) { default: @@ -2759,9 +2776,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, (SDNode*) 0)); } - } else { + } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), EmitNodeInfo); + } else { + // NodeToMatch was eliminated by CSE when the target changed the DAG. + // We will visit the equivalent node later. + DEBUG(dbgs() << "Node was eliminated by CSE\n"); + return 0; } // If the node had chain/glue results, update our notion of the current @@ -2959,6 +2981,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getFunction()->getName(); } else { bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; unsigned iid = diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6cde05a..173ffac 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -13,13 +13,13 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e341e15..f0c50c1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -32,13 +33,6 @@ #include using namespace llvm; -/// We are in the process of implementing a new TypeLegalization action -/// - the promotion of vector elements. This feature is disabled by default -/// and only enabled using this flag. -static cl::opt -AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), - cl::desc("Allow promotion of integer vector element types")); - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -521,8 +515,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getTargetData()), TLOF(*tlof), - mayPromoteElements(AllowPromoteIntElem) { + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -604,6 +597,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; + predictableSelectIsExpensive = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -618,6 +612,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, MinStackArgumentAlignment = 1; ShouldFoldAtomicFences = false; InsertFencesForAtomic = false; + SupportJumpTables = true; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -708,42 +703,34 @@ bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { return false; } -/// hasLegalSuperRegRegClasses - Return true if the specified register class -/// has one or more super-reg register classes that are legal. -bool -TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{ - if (*RC->superregclasses_begin() == 0) - return false; - for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), - E = RC->superregclasses_end(); I != E; ++I) { - const TargetRegisterClass *RRC = *I; - if (isLegalRC(RRC)) - return true; - } - return false; -} - /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". std::pair TargetLowering::findRepresentativeClass(EVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; if (!RC) return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. const TargetRegisterClass *BestRC = RC; - for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(), - E = RC->superregclasses_end(); I != E; ++I) { - const TargetRegisterClass *RRC = *I; - if (RRC->isASubClass() || !isLegalRC(RRC)) + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) continue; - if (!hasLegalSuperRegRegClasses(RRC)) - return std::make_pair(RRC, 1); - BestRC = RRC; + BestRC = SuperRC; } return std::make_pair(BestRC, 1); } - /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLowering::computeRegisterProperties() { @@ -835,11 +822,8 @@ void TargetLowering::computeRegisterProperties() { unsigned NElts = VT.getVectorNumElements(); if (NElts != 1) { bool IsLegalWiderType = false; - // If we allow the promotion of vector elements using a flag, - // then return TypePromoteInteger on vector elements. // First try to promote the elements of integer vectors. If no legal // promotion was found, fallback to the widen-vector method. - if (mayPromoteElements) for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; // Promote vectors of integers to vectors with the same number @@ -940,9 +924,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, unsigned NumElts = VT.getVectorNumElements(); // If there is a wider vector type with the same element type as this one, - // we should widen to that legal vector type. This handles things like - // <2 x float> -> <4 x float>. - if (NumElts != 1 && getTypeAction(Context, VT) == TypeWidenVector) { + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { RegisterVT = getTypeToTransformTo(Context, VT); if (isTypeLegal(RegisterVT)) { IntermediateVT = RegisterVT; @@ -1000,13 +987,11 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, /// TODO: Move this out of TargetLowering.cpp. void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, SmallVectorImpl &Outs, - const TargetLowering &TLI, - SmallVectorImpl *Offsets) { + const TargetLowering &TLI) { SmallVector ValueVTs; ComputeValueVTs(TLI, ReturnType, ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; - unsigned Offset = 0; for (unsigned j = 0, f = NumValues; j != f; ++j) { EVT VT = ValueVTs[j]; @@ -1029,8 +1014,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1045,10 +1028,6 @@ void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); - if (Offsets) { - Offsets->push_back(Offset); - Offset += PartSize; - } } } } @@ -2019,7 +1998,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Make sure we're not loosing bits from the constant. + // Make sure we're not losing bits from the constant. if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { @@ -2343,6 +2322,55 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (ConstantSDNode *AndRHS = + dyn_cast(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC = NewC.lshr(ShiftBits); + if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(N0.getValueType()); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } } if (isa(N0.getNode())) { @@ -2411,25 +2439,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal; + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + // We can always fold X == X for integer setcc's. if (N0.getValueType().isInteger()) { - switch (getBooleanContents(N0.getValueType().isVector())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); - case ZeroOrNegativeOneBooleanContent: - return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); - } + return DAG.getConstant(EqVal, VT); } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + return DAG.getConstant(EqVal, VT); if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(UOF, VT); + return DAG.getConstant(EqVal, VT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond) + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2998,10 +3034,12 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; if (OpInfo.ConstraintVT != Input.ConstraintVT) { - std::pair MatchRC = - getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); - std::pair InputRC = - getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); + std::pair MatchRC = + getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair InputRC = + getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 0016047..8a6b120 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -26,13 +26,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "shadowstackgc" -#include "llvm/CodeGen/GCs.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 9a86f32..980bd74 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -13,28 +13,28 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sjljehprepare" -#include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 26cf259..c8c3fb3 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -62,7 +62,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { assert(mi2iMap.empty() && "MachineInstr -> Index mapping non-empty at initial numbering?"); - functionSize = 0; unsigned index = 0; MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); @@ -89,8 +88,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // Save this base index in the maps. mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); - - ++functionSize; } // We insert one blank instructions between basic blocks. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 6f33f54..320128a 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -207,6 +207,17 @@ void SpillPlacement::activate(unsigned n) { return; ActiveNodes->set(n); nodes[n].clear(); + + // Very large bundles usually come from big switches, indirect branches, + // landing pads, or loops with many 'continue' statements. It is difficult to + // allocate registers when so many different blocks are involved. + // + // Give a small negative bias to large bundles such that 1/32 of the + // connected blocks need to be interested before we consider expanding the + // region through the bundle. This helps compile time by limiting the number + // of blocks visited and the number of links in the Hopfield network. + if (bundles->getBlocks(n).size() > 100) + nodes[n].Bias = -0.0625f; } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 9959f74..4a2b7ec 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -345,9 +345,11 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Values.clear(); // Reset the LiveRangeCalc instances needed for this spill mode. - LRCalc[0].reset(&VRM.getMachineFunction()); + LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); if (SpillMode) - LRCalc[1].reset(&VRM.getMachineFunction()); + LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. @@ -650,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl &Copies) { // Adjust RegAssign if a register assignment is killed at VNI->def. We // want to avoid calculating the live range of the source register if // possible. - AssignI.find(VNI->def.getPrevSlot()); + AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; // If MI doesn't kill the assigned register, just leave it. @@ -737,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); assert(ParentVNI && "Parent not live at complement def"); @@ -810,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; if (!Dom.first || Dom.second == VNI->def) @@ -924,11 +930,9 @@ bool SplitEditor::transferValues() { DEBUG(dbgs() << '\n'); } - LRCalc[0].calculateValues(LIS.getSlotIndexes(), &MDT, - &LIS.getVNInfoAllocator()); + LRCalc[0].calculateValues(); if (SpillMode) - LRCalc[1].calculateValues(LIS.getSlotIndexes(), &MDT, - &LIS.getVNInfoAllocator()); + LRCalc[1].calculateValues(); return Skipped; } @@ -953,8 +957,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End, - LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); + LRC.extend(LI, End); } } } @@ -1004,8 +1007,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), - &MDT, &LIS.getVNInfoAllocator()); + getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); } } @@ -1049,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); - VNI->setIsPHIDef(ParentVNI->isPHIDef()); + defValue(RegIdx, ParentVNI, ParentVNI->def); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 43a6ad8..f1eab1f 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Triple.h" using namespace llvm; // SSPBufferSize - The lower bound for a buffer to be considered for stack @@ -46,7 +47,7 @@ namespace { Function *F; Module *M; - DominatorTree* DT; + DominatorTree *DT; /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. @@ -70,8 +71,8 @@ namespace { } StackProtector(const TargetLowering *tli) : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); @@ -95,7 +96,7 @@ bool StackProtector::runOnFunction(Function &Fn) { DT = getAnalysisIfAvailable(); if (!RequiresStackProtector()) return false; - + return InsertStackProtectors(); } @@ -111,6 +112,8 @@ bool StackProtector::RequiresStackProtector() const { return false; const TargetData *TD = TLI->getTargetData(); + const TargetMachine &TM = TLI->getTargetMachine(); + Triple Trip(TM.getTargetTriple()); for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -123,11 +126,17 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast(AI->getAllocatedType())) + if (ArrayType *AT = dyn_cast(AI->getAllocatedType())) { + // If we're on a non-Darwin platform, don't add stack protectors + // unless the array is a character array. + if (!Trip.isOSDarwin() && !AT->getElementType()->isIntegerTy(8)) + continue; + // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; + } } } @@ -159,17 +168,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // + // PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 1e940b1..20da36e 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -46,7 +46,6 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { class StackSlotColoring : public MachineFunctionPass { - bool ColorWithRegs; LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; @@ -82,7 +81,7 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { + MachineFunctionPass(ID), NextColor(-1) { initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index c6fdc73..5b06195 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; assert(SrcVNI); - SrcVNI->setHasPHIKill(true); continue; } @@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex PHIIndex = LI->getInstructionIndex(PHI); VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); - DestVNI->setIsPHIDef(true); // Prior to PHI elimination, the live ranges of PHIs begin at their defining // instruction. After PHI elimination, PHI instructions are replaced by VNs @@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, LI->getVNInfoAllocator()); - CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, DestCopyIndex.getRegSlot(), CopyVNI)); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 8ebfbca..a813fa6 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -20,12 +20,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -57,8 +60,10 @@ namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; MachineRegisterInfo *MRI; + OwningPtr RS; bool PreRegAlloc; // SSAUpdateVRs - A list of virtual registers for which to update SSA form. @@ -124,9 +129,13 @@ INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable(); PreRegAlloc = MRI->isSSA(); + RS.reset(); + if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS.reset(new RegScavenger()); bool MadeChange = false; while (TailDuplicateBlocks(MF)) @@ -272,8 +281,8 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, continue; unsigned Dst = Copy->getOperand(0).getReg(); unsigned Src = Copy->getOperand(1).getReg(); - MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); - if (++UI == MRI->use_end()) { + if (MRI->hasOneNonDBGUse(Src) && + MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { // Copy is the only use. Do trivial copy propagation here. MRI->replaceRegWith(Dst, Src); Copy->eraseFromParent(); @@ -429,8 +438,10 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, AddSSAUpdateEntry(Reg, NewReg, PredBB); } else { DenseMap::iterator VI = LocalVRMap.find(Reg); - if (VI != LocalVRMap.end()) + if (VI != LocalVRMap.end()) { MO.setReg(VI->second); + MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); + } } } PredBB->insert(PredBB->instr_end(), NewMI); @@ -775,6 +786,23 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); + if (RS && !TailBB->livein_empty()) { + // Update PredBB livein. + RS->enterBasicBlock(PredBB); + if (!PredBB->empty()) + RS->forward(prior(PredBB->end())); + BitVector RegsLiveAtExit(TRI->getNumRegs()); + RS->getRegsUsed(RegsLiveAtExit, false); + for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), + E = TailBB->livein_end(); I != E; ++I) { + if (!RegsLiveAtExit[*I]) + // If a register is previously livein to the tail but it's not live + // at the end of predecessor BB, then it should be added to its + // livein list. + PredBB->addLiveIn(*I); + } + } + // Clone the contents of TailBB into PredBB. DenseMap LocalVRMap; SmallVector, 4> CopyInfos; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 2beb928..ddee6b2 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -501,6 +501,14 @@ CreateTargetHazardRecognizer(const TargetMachine *TM, return new ScheduleHazardRecognizer(); } +// Default implementation of CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "misched"); +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, @@ -509,6 +517,10 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } +//===----------------------------------------------------------------------===// +// SelectionDAG latency interface. +//===----------------------------------------------------------------------===// + int TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, @@ -537,3 +549,201 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData, return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); } +//===----------------------------------------------------------------------===// +// MachineInstr latency interface. +//===----------------------------------------------------------------------===// + +unsigned +TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +/// Return the default expected latency for a def based on it's opcode. +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, + const MachineInstr *DefMI) const { + if (DefMI->mayLoad()) + return SchedModel->LoadLatency; + if (isHighLatencyDef(DefMI->getOpcode())) + return SchedModel->HighLatency; + return 1; +} + +unsigned TargetInstrInfoImpl:: +getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return MI->mayLoad() ? 2 : 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, + unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + +/// Both DefMI and UseMI must be valid. By default, call directly to the +/// itinerary. This may be overriden by the target. +int TargetInstrInfoImpl:: +getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +/// If we can determine the operand latency from the def only, without itinerary +/// lookup, do so. Otherwise return -1. +static int computeDefOperandLatency( + const TargetInstrInfo *TII, const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) { + + // Let the target hook getInstrLatency handle missing itineraries. + if (!ItinData) + return TII->getInstrLatency(ItinData, DefMI); + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is valid, call getInstrLatency. This uses Stage latency if + // it exists before defaulting to MinLatency. + if (ItinData->SchedModel->MinLatency >= 0) + return TII->getInstrLatency(ItinData, DefMI); + + // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. + // For empty itineraries, short-cirtuit the check and default to one cycle. + if (ItinData->isEmpty()) + return 1; + } + else if(ItinData->isEmpty()) + return TII->defaultDefLatency(ItinData->SchedModel, DefMI); + + // ...operand lookup required + return -1; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use when the operand indices are already known. +/// +/// FindMin may be set to get the minimum vs. expected latency. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx, + bool FindMin) const { + + int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + int OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an +/// unknown use. Depending on the subtarget's itinerary properties, this may or +/// may not need to call getOperandLatency(). +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency. +/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const TargetRegisterInfo *TRI, + const MachineInstr *DefMI, const MachineInstr *UseMI, + unsigned Reg, bool FindMin) const { + + int DefLatency = computeDefOperandLatency(this, ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + // Find the definition of the register in the defining instruction. + int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1) { + const MachineOperand &MO = DefMI->getOperand(DefIdx); + if (MO.isReg() && MO.isImplicit() && + DefIdx >= (int)DefMI->getDesc().getNumOperands()) { + // This is an implicit def, getOperandLatency() won't return the correct + // latency. e.g. + // %D6, %D7 = VLD1q16 %R2, 0, ..., %Q3 + // %Q1 = VMULv8i16 %Q1, %Q3, ... + // What we want is to compute latency between def of %D6/%D7 and use of + // %Q3 instead. + unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (DefMI->getOperand(Op2).isReg()) + DefIdx = Op2; + } + // For all uses of the register, calculate the maxmimum latency + int OperLatency = -1; + + // UseMI is null, then it must be a scheduling barrier. + if (!UseMI) { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } + else { + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i); + OperLatency = std::max(OperLatency, UseCycle); + } + } + // If we found an operand latency, we're done. + if (OperLatency >= 0) + return OperLatency; + } + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9925185..2a2fa9e 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -93,8 +93,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given - // section(".eh_frame") gcc will produce - // .section .eh_frame,"a",@progbits + // section(".eh_frame") gcc will produce: + // + // .section .eh_frame,"a",@progbits if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -349,10 +350,17 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { if (Priority == 65535) return StaticCtorSection; - std::string Name = std::string(".ctors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".ctors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } } const MCSection * @@ -362,10 +370,35 @@ TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { if (Priority == 65535) return StaticDtorSection; - std::string Name = std::string(".dtors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (UseInitArray) { + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); + } else { + std::string Name = std::string(".dtors.") + utostr(65535 - Priority); + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, + SectionKind::getDataRel()); + } +} + +void +TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { + UseInitArray = UseInitArray_; + if (!UseInitArray) + return; + + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); } //===----------------------------------------------------------------------===// @@ -379,7 +412,7 @@ emitModuleFlags(MCStreamer &Streamer, ArrayRef ModuleFlags, Mangler *Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; - unsigned GCFlags = 0; + unsigned ImageInfoFlags = 0; StringRef SectionVal; for (ArrayRef::iterator @@ -396,8 +429,9 @@ emitModuleFlags(MCStreamer &Streamer, if (Key == "Objective-C Image Info Version") VersionVal = cast(Val)->getZExtValue(); else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only") - GCFlags |= cast(Val)->getZExtValue(); + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") + ImageInfoFlags |= cast(Val)->getZExtValue(); else if (Key == "Objective-C Image Info Section") SectionVal = cast(Val)->getString(); } @@ -424,7 +458,7 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.EmitLabel(getContext(). GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.EmitIntValue(VersionVal, 4); - Streamer.EmitIntValue(GCFlags, 4); + Streamer.EmitIntValue(ImageInfoFlags, 4); Streamer.AddBlankLine(); } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index c30b133..aa601af 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); -STATISTIC(NumReMats, "Number of instructions re-materialized"); -STATISTIC(NumDeletes, "Number of dead instructions deleted"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,17 +94,10 @@ namespace { unsigned Reg, MachineBasicBlock::iterator OldPos); - bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc); - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, unsigned &LastDef); - MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, - unsigned Dist); - - bool isProfitableToCommute(unsigned regB, unsigned regC, + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -117,14 +112,6 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist); - typedef std::pair, MachineInstr*> NewKill; - bool canUpdateDeletedKills(SmallVector &Kills, - SmallVector &NewKills, - MachineBasicBlock *MBB, unsigned Dist); - bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI, MachineBasicBlock *MBB); @@ -150,6 +137,11 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &Processed); + typedef SmallVector, 4> TiedPairList; + typedef SmallDenseMap TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void CoalesceExtSubRegs(SmallVector &Srcs, unsigned DstReg); /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part @@ -167,6 +159,8 @@ namespace { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->isTerminator()) + KillMI == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, } } } + assert(KillMO && "Didn't find kill"); // Update kill and LV information. KillMO->setIsKill(false); @@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MBB->remove(MI); MBB->insert(KillPos, MI); + if (LIS) + LIS->handleMove(MI); + ++Num3AddrSunk; return true; } -/// isTwoAddrUse - Return true if the specified MI is using the specified -/// register as a two-address operand. -static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const MCInstrDesc &MCID = UseMI->getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && - (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) - // Earlier use is a two-address one. - return true; - } - return false; -} - -/// isProfitableToReMat - Return true if the heuristics determines it is likely -/// to be profitable to re-materialize the definition of Reg rather than copy -/// the register. -bool -TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, - const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc) { - bool OtherUse = false; - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = UseMO.getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - DenseMap::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end() && DI->second == Loc) - continue; // Current use. - OtherUse = true; - // There is at least one other use in the MBB that will clobber the - // register. - if (isTwoAddrUse(UseMI, Reg)) - return true; - } - } - - // If other uses in MBB are not two-address uses, then don't remat. - if (OtherUse) - return false; - - // No other uses in the same block, remat if it's defined in the same - // block so it does not unnecessarily extend the live range. - return MBB == DefMI->getParent(); -} - /// NoUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, return !(LastUse > LastDef && LastUse < Dist); } -MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, - MachineBasicBlock *MBB, - unsigned Dist) { - unsigned LastUseDist = 0; - MachineInstr *LastUse = 0; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB || MI->isDebugValue()) - continue; - DenseMap::iterator DI = DistanceMap.find(MI); - if (DI == DistanceMap.end()) - continue; - if (DI->second >= Dist) - continue; - - if (MO.isUse() && DI->second > LastUseDist) { - LastUse = DI->first; - LastUseDist = DI->second; - } - } - return LastUse; -} - /// isCopyToReg - Return true if the specified MI is a copy instruction or /// a extract_subreg instruction. It also returns the source and destination /// registers and whether they are physical registers by reference. @@ -483,32 +407,6 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } -/// findLocalKill - Look for an instruction below MI in the MBB that kills the -/// specified register. Returns null if there are any other Reg use between the -/// instructions. -static -MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, - MachineInstr *MI, MachineRegisterInfo *MRI, - DenseMap &DistanceMap) { - MachineInstr *KillMI = 0; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI == MI || UseMI->getParent() != MBB) - continue; - if (DistanceMap.count(UseMI)) - continue; - if (!UI.getOperand().isKill()) - return 0; - if (KillMI) - return 0; // -O0 kill markers cannot be trusted? - KillMI = UseMI; - } - - return KillMI; -} - /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -564,10 +462,11 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool -TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, +TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, + unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { if (OptLevel == CodeGenOpt::None) @@ -604,15 +503,15 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, // %reg1026 = ADD %reg1024, %reg1025 // r0 = MOV %reg1026 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. - unsigned FromRegB = getMappedReg(regB, SrcRegMap); - unsigned FromRegC = getMappedReg(regC, SrcRegMap); - unsigned ToRegB = getMappedReg(regB, DstRegMap); - unsigned ToRegC = getMappedReg(regC, DstRegMap); - if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) && - ((!FromRegC && !ToRegC) || - regsAreCompatible(FromRegB, ToRegC, TRI) || - regsAreCompatible(FromRegC, ToRegB, TRI))) - return true; + unsigned ToRegA = getMappedReg(regA, DstRegMap); + if (ToRegA) { + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); + bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); + if (BComp != CComp) + return !BComp && CComp; + } // If there is a use of regC between its last def (could be livein) and this // instruction, then bail. @@ -653,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); + if (Indexes) + Indexes->replaceMachineInstrInMaps(MI, NewMI); mbbi->insert(mi, NewMI); // Insert the new inst mbbi->erase(mi); // Nuke the old inst. @@ -701,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't // uses RegB, convertToThreeAddress must have created more @@ -810,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, return; } -/// isSafeToDelete - If the specified instruction does not produce any side -/// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, - const TargetInstrInfo *TII, - SmallVector &Kills) { - if (MI->mayStore() || MI->isCall()) - return false; - if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - Kills.push_back(MO.getReg()); - } - return true; -} - -/// canUpdateDeletedKills - Check if all the registers listed in Kills are -/// killed by instructions in MBB preceding the current instruction at -/// position Dist. If so, return true and record information about the -/// preceding kills in NewKills. -bool TwoAddressInstructionPass:: -canUpdateDeletedKills(SmallVector &Kills, - SmallVector &NewKills, - MachineBasicBlock *MBB, unsigned Dist) { - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) - return false; - - MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); - if (!LastKill) - return false; - - bool isModRef = LastKill->definesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), - LastKill)); - } - return true; -} - -/// DeleteUnusedInstr - If an instruction with a tied register operand can -/// be safely deleted, just delete it. -bool -TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned Dist) { - // Check if the instruction has no side effects and if all its defs are dead. - SmallVector Kills; - if (!isSafeToDelete(mi, TII, Kills)) - return false; - - // If this instruction kills some virtual registers, we need to - // update the kill information. If it's not possible to do so, - // then bail out. - SmallVector NewKills; - if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) - return false; - - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - return true; -} - /// RescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. @@ -904,14 +722,19 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -998,6 +821,12 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; + if (MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; + // Ensure that if this is register in question, its the kill we expect. + assert((MOReg != Reg || OtherMI == KillMI) && + "Found multiple kills of a register in a basic block"); } } } @@ -1011,20 +840,13 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, MBB->splice(KillPos, MBB, From, To); DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(MI); + DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; } @@ -1045,7 +867,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) return true; } return false; @@ -1060,14 +882,19 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { + // Bail immediately if we don't have LV available. We use it to find kills + // efficiently. + if (!LV) + return false; + MachineInstr *MI = &*mi; DenseMap::iterator DI = DistanceMap.find(MI); if (DI == DistanceMap.end()) // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); - if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -1093,6 +920,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, continue; if (isDefTooClose(MOReg, DI->second, MI, MBB)) return false; + if (MOReg == Reg && !MO.isKill()) + return false; Uses.insert(MOReg); if (MO.isKill() && MOReg != Reg) Kills.insert(MOReg); @@ -1134,6 +963,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; + if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + // We can't schedule across a use of the register in question. + return false; } else { OtherDefs.push_back(MOReg); } @@ -1164,19 +996,13 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); - if (LV) { - // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - } else { - for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = KillMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - MO.setIsKill(false); - } - MI->addRegisterKilled(Reg, 0); - } + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(KillMI); + + DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; } @@ -1201,15 +1027,10 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. bool regBKilled = isKilled(MI, regB, MRI, TII); - if (!regBKilled && MI.getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { - ++NumDeletes; - return true; // Done with this instruction. - } + + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); // Check if it is profitable to commute the operands. unsigned SrcOp1, SrcOp2; @@ -1230,7 +1051,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { + else if (isProfitableToCommute(regA, regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -1252,9 +1073,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return true; } - if (TargetRegisterInfo::isVirtualRegister(regA)) - ScanUses(regA, &*mbbi, Processed); - if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. @@ -1293,15 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (NewOpc != 0) { const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { - MachineFunction &MF = *mbbi->getParent(); - // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); + TRI->getAllocatableClass( + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, Reg, + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -1378,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->isInlineAsm() ? + MI->getNumOperands() : MCID.getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + SlotIndex CopyIdx; + if (Indexes) + CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - MRI = &MF.getRegInfo(); +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); + Indexes = getAnalysisIfAvailable(); LV = getAnalysisIfAvailable(); + LIS = getAnalysisIfAvailable(); AA = &getAnalysis(); OptLevel = TM.getOptLevel(); @@ -1394,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << MF->getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); - // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs(MRI->getNumVirtRegs()); - - typedef DenseMap, 4> > - TiedOperandMap; - TiedOperandMap TiedOperands(4); + TiedOperandMap TiedOperands; SmallPtrSet Processed; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); @@ -1426,188 +1400,63 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const MCInstrDesc &MCID = mi->getDesc(); - bool FirstTied = true; - DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : MCID.getNumOperands(); - for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { - unsigned DstIdx = 0; - if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) - continue; - - if (FirstTied) { - FirstTied = false; - ++NumTwoAddressInstrs; - DEBUG(dbgs() << '\t' << *mi); - } - - assert(mi->getOperand(SrcIdx).isReg() && - mi->getOperand(SrcIdx).getReg() && - mi->getOperand(SrcIdx).isUse() && - "two address instruction invalid"); - - unsigned regB = mi->getOperand(SrcIdx).getReg(); - TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; } - // Now iterate over the information collected above. - for (TiedOperandMap::iterator OI = TiedOperands.begin(), - OE = TiedOperands.end(); OI != OE; ++OI) { - SmallVector, 4> &TiedPairs = OI->second; - - // If the instruction has a single pair of tied operands, try some - // transformations that may either eliminate the tied operands or - // improve the opportunities for coalescing away the register copy. - if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1) { + SmallVector, 4> &TiedPairs + = TiedOperands.begin()->second; + if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; unsigned DstIdx = TiedPairs[0].second; - - // If the registers are already equal, nothing needs to be done. - if (mi->getOperand(SrcIdx).getReg() == - mi->getOperand(DstIdx).getReg()) - break; // Done with this instruction. - - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, - Processed)) - break; // The tied operands have been eliminated. - } - - bool IsEarlyClobber = false; - bool RemovedKillFlag = false; - bool AllUsesCopied = true; - unsigned LastCopiedReg = 0; - unsigned regB = OI->first; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; - - const MachineOperand &DstMO = mi->getOperand(DstIdx); - unsigned regA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); - - // Grab regB from the instruction because it may have changed if the - // instruction was commuted. - regB = mi->getOperand(SrcIdx).getReg(); - - if (regA == regB) { - // The register is tied to multiple destinations (or else we would - // not have continued this far), but this use of the register - // already matches the tied destination. Leave it. - AllUsesCopied = false; + unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + if (SrcReg != DstReg && + TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) { + // The tied operands have been eliminated or shifted further down the + // block to ease elimination. Continue processing with 'nmi'. + TiedOperands.clear(); + mi = nmi; continue; } - LastCopiedReg = regA; - - assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); - -#ifndef NDEBUG - // First, verify that we don't have a use of "a" in the instruction - // (a = b + a for example) because our transformation will not - // work. This should never occur because we are in SSA form. - for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == DstIdx || - !mi->getOperand(i).isReg() || - mi->getOperand(i).getReg() != regA); -#endif - - // Emit a copy or rematerialize the definition. - const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getVRegDef(regB); - // If it's safe and profitable, remat the definition instead of - // copying it. - if (DefMI && - DefMI->isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, AA, regB) && - isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); - unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); - ++NumReMats; - } else { - BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), - regA).addReg(regB); - } - - MachineBasicBlock::iterator prevMI = prior(mi); - // Update DistanceMap. - DistanceMap.insert(std::make_pair(prevMI, Dist)); - DistanceMap[mi] = ++Dist; - - DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); - - MachineOperand &MO = mi->getOperand(SrcIdx); - assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && - "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(regA); } + } - if (AllUsesCopied) { - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - } - } - } - - // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) - LV->addVirtualRegisterKilled(regB, prior(mi)); - - } else if (RemovedKillFlag) { - // Some tied uses of regB matched their destination registers, so - // regB is still used in this instruction, but a kill flag was - // removed from a different tied use of regB, so now we need to add - // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - MO.setIsKill(true); - break; - } - } - } - - // Schedule the source copy / remat inserted to form two-address - // instruction. FIXME: Does it matter the distance map may not be - // accurate after it's scheduled? - TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); - - MadeChange = true; - + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + processTiedPairs(mi, OI->second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); - } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); } // Clear TiedOperands here instead of at the top of the loop @@ -1617,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } - // Some remat'ed instructions are dead. - for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(i); - if (MRI->use_nodbg_empty(VReg)) { - MachineInstr *DefMI = MRI->getVRegDef(VReg); - DefMI->eraseFromParent(); - } - } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. MadeChange |= EliminateRegSequences(); @@ -1694,9 +1534,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, continue; // Check that the instructions are all in the same basic block. - MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); - MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); - if (SrcDefMI->getParent() != DstDefMI->getParent()) + MachineInstr *SrcDefMI = MRI->getUniqueVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getUniqueVRegDef(DstReg); + if (!SrcDefMI || !DstDefMI || + SrcDefMI->getParent() != DstDefMI->getParent()) continue; // If there are no other uses than copies which feed into @@ -1832,6 +1673,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallVector RealSrcs; SmallSet Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + // Nothing needs to be inserted for operands. + if (MI->getOperand(i).isUndef()) { + MI->getOperand(i).setReg(0); + continue; + } unsigned SrcReg = MI->getOperand(i).getReg(); unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); @@ -1841,7 +1687,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *DefMI = NULL; if (!MI->getOperand(i).getSubReg() && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DefMI = MRI->getVRegDef(SrcReg); + DefMI = MRI->getUniqueVRegDef(SrcReg); } if (DefMI && DefMI->isImplicitDef()) { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 3bab93b..93840f0 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -18,12 +18,14 @@ #define DEBUG_TYPE "regalloc" #include "VirtRegMap.h" +#include "LiveDebugVariables.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -104,11 +106,149 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -void VirtRegMap::rewrite(SlotIndexes *Indexes) { +void VirtRegMap::print(raw_ostream &OS, const Module*) const { + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { + OS << '[' << PrintReg(Reg, TRI) << " -> " + << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { + OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + << "] " << MRI->getRegClass(Reg)->getName() << "\n"; + } + } + OS << '\n'; +} + +void VirtRegMap::dump() const { + print(dbgs()); +} + +//===----------------------------------------------------------------------===// +// VirtRegRewriter +//===----------------------------------------------------------------------===// +// +// The VirtRegRewriter is the last of the register allocator passes. +// It rewrites virtual registers to physical registers as specified in the +// VirtRegMap analysis. It also updates live-in information on basic blocks +// according to LiveIntervals. +// +namespace { +class VirtRegRewriter : public MachineFunctionPass { + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + LiveIntervals *LIS; + VirtRegMap *VRM; + + void rewrite(); + void addMBBLiveIns(); +public: + static char ID; + VirtRegRewriter() : MachineFunctionPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual bool runOnMachineFunction(MachineFunction&); +}; +} // end anonymous namespace + +char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; + +INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) + +char VirtRegRewriter::ID = 0; + +void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + TM = &MF->getTarget(); + TRI = TM->getRegisterInfo(); + TII = TM->getInstrInfo(); + MRI = &MF->getRegInfo(); + Indexes = &getAnalysis(); + LIS = &getAnalysis(); + VRM = &getAnalysis(); DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); - DEBUG(dump()); + DEBUG(VRM->dump()); + + // Add kill flags while we still have virtual registers. + LIS->addKillFlags(); + + // Live-in lists on basic blocks are required for physregs. + addMBBLiveIns(); + + // Rewrite virtual registers. + rewrite(); + + // Write out new DBG_VALUE instructions. + getAnalysis().emitDebugValues(VRM); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); + return true; +} + +// Compute MBB live-in lists from virtual register live ranges and their +// assignments. +void VirtRegRewriter::addMBBLiveIns() { + SmallVector LiveIn; + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + if (MRI->reg_nodbg_empty(VirtReg)) + continue; + LiveInterval &LI = LIS->getInterval(VirtReg); + if (LI.empty() || LIS->intervalIsInOneMBB(LI)) + continue; + // This is a virtual register that is live across basic blocks. Its + // assigned PhysReg must be marked as live-in to those blocks. + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); + + // Scan the segments of LI. + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E; + ++I) { + if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn)) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) + if (!LiveIn[i]->isLiveIn(PhysReg)) + LiveIn[i]->addLiveIn(PhysReg); + LiveIn.clear(); + } + } +} + +void VirtRegRewriter::rewrite() { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; @@ -135,8 +275,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; unsigned VirtReg = MO.getReg(); - unsigned PhysReg = getPhys(VirtReg); - assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && + "Instruction uses unmapped VirtReg"); assert(!Reserved.test(PhysReg) && "Reserved register assignment"); // Preserve semantics of sub-register operands. @@ -207,31 +348,3 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (!MRI->reg_nodbg_empty(Reg)) MRI->setPhysRegUsed(Reg); } - -void VirtRegMap::print(raw_ostream &OS, const Module* M) const { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - - OS << "********** REGISTER MAP **********\n"; - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { - OS << '[' << PrintReg(Reg, TRI) << " -> " - << PrintReg(Virt2PhysMap[Reg], TRI) << "] " - << MRI.getRegClass(Reg)->getName() << "\n"; - } - } - - for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { - OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] - << "] " << MRI.getRegClass(Reg)->getName() << "\n"; - } - } - OS << '\n'; -} - -void VirtRegMap::dump() const { - print(dbgs()); -} diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 8cac311..c320985 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -177,13 +177,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// rewrite - Rewrite all instructions in MF to use only physical registers - /// by mapping all virtual register operands to their assigned physical - /// registers. - /// - /// @param Indexes Optionally remove deleted instructions from indexes. - void rewrite(SlotIndexes *Indexes); - void print(raw_ostream &OS, const Module* M = 0) const; void dump() const; }; diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index 24bf97f..b27d57b 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -82,7 +82,7 @@ void DWARFCompileUnit::clear() { Abbrevs = 0; AddrSize = 0; BaseAddr = 0; - DieArray.clear(); + clearDIEs(false); } void DWARFCompileUnit::dump(raw_ostream &OS) { @@ -97,6 +97,13 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { getCompileUnitDIE(false)->dump(OS, this, -1U); } +const char *DWARFCompileUnit::getCompilationDir() { + extractDIEsIfNeeded(true); + if (DieArray.empty()) + return 0; + return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); +} + void DWARFCompileUnit::setDIERelations() { if (DieArray.empty()) return; @@ -201,7 +208,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { } void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) { - if (DieArray.size() > 1) { + if (DieArray.size() > (unsigned)keep_compile_unit_die) { // std::vectors never get any smaller when resized to a smaller size, // or when clear() or erase() are called, the size will report that it // is smaller, but the memory allocated remains intact (call capacity() @@ -227,8 +234,8 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, // all compile units to stay loaded when they weren't needed. So we can end // up parsing the DWARF and then throwing them all away to keep memory usage // down. - const bool clear_dies = extractDIEsIfNeeded(false) > 1; - + const bool clear_dies = extractDIEsIfNeeded(false) > 1 && + clear_dies_if_already_not_parsed; DieArray[0].buildAddressRangeTable(this, debug_aranges); // Keep memory down by clearing DIEs if this generate function @@ -236,3 +243,13 @@ DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, if (clear_dies) clearDIEs(true); } + +const DWARFDebugInfoEntryMinimal* +DWARFCompileUnit::getFunctionDIEForAddress(int64_t address) { + extractDIEsIfNeeded(false); + for (size_t i = 0, n = DieArray.size(); i != n; i++) { + if (DieArray[i].addressRangeContainsAddress(this, address)) + return &DieArray[i]; + } + return 0; +} diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index d916729..b34a596 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -43,7 +43,7 @@ public: const DWARFAbbreviationDeclarationSet *abbrevs); /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it - /// hasn't already been done. + /// hasn't already been done. Returns the number of DIEs parsed at this call. size_t extractDIEsIfNeeded(bool cu_die_only); void clear(); void dump(raw_ostream &OS); @@ -78,6 +78,8 @@ public: return &DieArray[0]; } + const char *getCompilationDir(); + /// setDIERelations - We read in all of the DIE entries into our flat list /// of DIE entries and now we need to go back through all of them and set the /// parent, sibling and child pointers for quick DIE navigation. @@ -104,6 +106,11 @@ public: void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, bool clear_dies_if_already_not_parsed); + /// getFunctionDIEForAddress - Returns pointer to parsed subprogram DIE, + /// address ranges of which contain the provided address, + /// or NULL if there is no such subprogram. The pointer + /// is valid until DWARFCompileUnit::clear() or clearDIEs() is called. + const DWARFDebugInfoEntryMinimal *getFunctionDIEForAddress(int64_t address); }; } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index dccadc4..797662b 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -8,8 +8,10 @@ //===----------------------------------------------------------------------===// #include "DWARFContext.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -140,30 +142,64 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t offset) { return 0; } -DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) { +DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, + DILineInfoSpecifier specifier) { // First, get the offset of the compile unit. uint32_t cuOffset = getDebugAranges()->findAddress(address); // Retrieve the compile unit. DWARFCompileUnit *cu = getCompileUnitForOffset(cuOffset); if (!cu) - return DILineInfo("", 0, 0); - // Get the line table for this compile unit. - const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); - if (!lineTable) - return DILineInfo("", 0, 0); - // Get the index of the row we're looking for in the line table. - uint64_t hiPC = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_high_pc, - -1ULL); - uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); - if (rowIndex == -1U) - return DILineInfo("", 0, 0); - - // From here, contruct the DILineInfo. - const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; - const std::string &fileName = lineTable->Prologue.FileNames[row.File-1].Name; - - return DILineInfo(fileName.c_str(), row.Line, row.Column); + return DILineInfo(); + SmallString<16> fileName(""); + SmallString<16> functionName(""); + uint32_t line = 0; + uint32_t column = 0; + if (specifier.needs(DILineInfoSpecifier::FunctionName)) { + const DWARFDebugInfoEntryMinimal *function_die = + cu->getFunctionDIEForAddress(address); + if (function_die) { + if (const char *name = function_die->getSubprogramName(cu)) + functionName = name; + } + } + if (specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + // Get the line table for this compile unit. + const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); + if (lineTable) { + // Get the index of the row we're looking for in the line table. + uint32_t rowIndex = lineTable->lookupAddress(address); + if (rowIndex != -1U) { + const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; + // Take file/line info from the line table. + const DWARFDebugLine::FileNameEntry &fileNameEntry = + lineTable->Prologue.FileNames[row.File - 1]; + fileName = fileNameEntry.Name; + if (specifier.needs(DILineInfoSpecifier::AbsoluteFilePath) && + sys::path::is_relative(fileName.str())) { + // Append include directory of file (if it is present in line table) + // and compilation directory of compile unit to make path absolute. + const char *includeDir = 0; + if (uint64_t includeDirIndex = fileNameEntry.DirIdx) { + includeDir = lineTable->Prologue + .IncludeDirectories[includeDirIndex - 1]; + } + SmallString<16> absFileName; + if (includeDir == 0 || sys::path::is_relative(includeDir)) { + if (const char *compilationDir = cu->getCompilationDir()) + sys::path::append(absFileName, compilationDir); + } + if (includeDir) { + sys::path::append(absFileName, includeDir); + } + sys::path::append(absFileName, fileName.str()); + fileName = absFileName; + } + line = row.Line; + column = row.Column; + } + } + } + return DILineInfo(fileName, functionName, line, column); } void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index d2e763a..e55a27e 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -66,7 +66,8 @@ public: const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); - virtual DILineInfo getLineInfoForAddress(uint64_t address); + virtual DILineInfo getLineInfoForAddress(uint64_t address, + DILineInfoSpecifier specifier = DILineInfoSpecifier()); bool isLittleEndian() const { return IsLittleEndian; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index 1788145..ef470e5 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -93,6 +93,7 @@ bool DWARFDebugAranges::generate(DWARFContext *ctx) { cu->buildAddressRangeTable(this, true); } } + sort(true, /* overlap size */ 0); return !isEmpty(); } @@ -221,4 +222,3 @@ bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const { HiPC = Aranges.back().HiPC(); return true; } - diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 236db97..429a36c 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -440,3 +440,54 @@ DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *cu, } } } + +bool +DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( + const DWARFCompileUnit *cu, const uint64_t address) const { + if (!isNULL() && getTag() == DW_TAG_subprogram) { + uint64_t hi_pc = -1ULL; + uint64_t lo_pc = getAttributeValueAsUnsigned(cu, DW_AT_low_pc, -1ULL); + if (lo_pc != -1ULL) + hi_pc = getAttributeValueAsUnsigned(cu, DW_AT_high_pc, -1ULL); + if (hi_pc != -1ULL) { + return (lo_pc <= address && address < hi_pc); + } + } + return false; +} + +const char* +DWARFDebugInfoEntryMinimal::getSubprogramName( + const DWARFCompileUnit *cu) const { + if (isNULL() || getTag() != DW_TAG_subprogram) + return 0; + // Try to get mangled name if possible. + if (const char *name = + getAttributeValueAsString(cu, DW_AT_MIPS_linkage_name, 0)) + return name; + if (const char *name = getAttributeValueAsString(cu, DW_AT_linkage_name, 0)) + return name; + if (const char *name = getAttributeValueAsString(cu, DW_AT_name, 0)) + return name; + // Try to get name from specification DIE. + uint32_t spec_ref = + getAttributeValueAsReference(cu, DW_AT_specification, -1U); + if (spec_ref != -1U) { + DWARFDebugInfoEntryMinimal spec_die; + if (spec_die.extract(cu, &spec_ref)) { + if (const char *name = spec_die.getSubprogramName(cu)) + return name; + } + } + // Try to get name from abstract origin DIE. + uint32_t abs_origin_ref = + getAttributeValueAsReference(cu, DW_AT_abstract_origin, -1U); + if (abs_origin_ref != -1U) { + DWARFDebugInfoEntryMinimal abs_origin_die; + if (abs_origin_die.extract(cu, &abs_origin_ref)) { + if (const char *name = abs_origin_die.getSubprogramName(cu)) + return name; + } + } + return 0; +} diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index 37b3bcd..d5d86b9 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -128,6 +128,15 @@ public: void buildAddressRangeTable(const DWARFCompileUnit *cu, DWARFDebugAranges *debug_aranges) const; + + bool addressRangeContainsAddress(const DWARFCompileUnit *cu, + const uint64_t address) const; + + // If a DIE represents a subprogram, returns its mangled name + // (or short name, if mangled is missing). This name may be fetched + // from specification or abstract origin for this subprogram. + // Returns null if no name is found. + const char* getSubprogramName(const DWARFCompileUnit *cu) const; }; } diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 117fa31..d99575d 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { DWARFDebugLine::State::~State() {} void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) { + if (Sequence::Empty) { + // Record the beginning of instruction sequence. + Sequence::Empty = false; + Sequence::LowPC = Address; + Sequence::FirstRowIndex = row; + } ++row; // Increase the row number. LineTable::appendRow(*this); + if (EndSequence) { + // Record the end of instruction sequence. + Sequence::HighPC = Address; + Sequence::LastRowIndex = row; + if (Sequence::isValid()) + LineTable::appendSequence(*this); + Sequence::reset(); + } Row::postAppend(); } +void DWARFDebugLine::State::finalize() { + row = DoneParsingLineTable; + if (!Sequence::Empty) { + fprintf(stderr, "warning: last sequence in debug line table is not" + "terminated!\n"); + } + // Sort all sequences so that address lookup will work faster. + if (!Sequences.empty()) { + std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC); + // Note: actually, instruction address ranges of sequences should not + // overlap (in shared objects and executables). If they do, the address + // lookup would still work, though, but result would be ambiguous. + // We don't report warning in this case. For example, + // sometimes .so compiled from multiple object files contains a few + // rudimentary sequences for address ranges [0x0, 0xsomething). + } +} + DWARFDebugLine::DumpingState::~DumpingState() {} -void DWARFDebugLine::DumpingState::finalize(uint32_t offset) { +void DWARFDebugLine::DumpingState::finalize() { LineTable::dump(OS); } @@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" " have ended at 0x%8.8x but it ended ad 0x%8.8x\n", prologue_offset, end_prologue_offset, *offset_ptr); + return false; } - return end_prologue_offset; + return true; } bool @@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, } } - state.finalize(*offset_ptr); + state.finalize(); return end_offset; } -static bool findMatchingAddress(const DWARFDebugLine::Row& row1, - const DWARFDebugLine::Row& row2) { - return row1.Address < row2.Address; -} - uint32_t -DWARFDebugLine::LineTable::lookupAddress(uint64_t address, - uint64_t cu_high_pc) const { - uint32_t index = UINT32_MAX; - if (!Rows.empty()) { - // Use the lower_bound algorithm to perform a binary search since we know - // that our line table data is ordered by address. - DWARFDebugLine::Row row; - row.Address = address; - typedef std::vector::const_iterator iterator; - iterator begin_pos = Rows.begin(); - iterator end_pos = Rows.end(); - iterator pos = std::lower_bound(begin_pos, end_pos, row, - findMatchingAddress); - if (pos == end_pos) { - if (address < cu_high_pc) - return Rows.size()-1; - } else { - // Rely on fact that we are using a std::vector and we can do - // pointer arithmetic to find the row index (which will be one less - // that what we found since it will find the first position after - // the current address) since std::vector iterators are just - // pointers to the container type. - index = pos - begin_pos; - if (pos->Address > address) { - if (index > 0) - --index; - else - index = UINT32_MAX; - } - } +DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { + uint32_t unknown_index = UINT32_MAX; + if (Sequences.empty()) + return unknown_index; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence sequence; + sequence.LowPC = address; + SequenceIter first_seq = Sequences.begin(); + SequenceIter last_seq = Sequences.end(); + SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence, + DWARFDebugLine::Sequence::orderByLowPC); + DWARFDebugLine::Sequence found_seq; + if (seq_pos == last_seq) { + found_seq = Sequences.back(); + } else if (seq_pos->LowPC == address) { + found_seq = *seq_pos; + } else { + if (seq_pos == first_seq) + return unknown_index; + found_seq = *(seq_pos - 1); + } + if (!found_seq.containsPC(address)) + return unknown_index; + // Search for instruction address in the rows describing the sequence. + // Rows are stored in a vector, so we may use arithmetical operations with + // iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + found_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + found_seq.LastRowIndex; + RowIter row_pos = std::lower_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + if (row_pos == last_row) { + return found_seq.LastRowIndex - 1; + } + uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row); + if (row_pos->Address > address) { + if (row_pos == first_row) + return unknown_index; + else + index--; } - return index; // Failed to find address. + return index; } diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index bc6a70b..6382b45 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -12,7 +12,6 @@ #include "llvm/Support/DataExtractor.h" #include -#include #include namespace llvm { @@ -22,9 +21,9 @@ class raw_ostream; class DWARFDebugLine { public: struct FileNameEntry { - FileNameEntry() : DirIdx(0), ModTime(0), Length(0) {} + FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {} - std::string Name; + const char *Name; uint64_t DirIdx; uint64_t ModTime; uint64_t Length; @@ -56,7 +55,7 @@ public: // The number assigned to the first special opcode. uint8_t OpcodeBase; std::vector StandardOpcodeLengths; - std::vector IncludeDirectories; + std::vector IncludeDirectories; std::vector FileNames; // Length of the prologue in bytes. @@ -89,6 +88,10 @@ public: void reset(bool default_is_stmt); void dump(raw_ostream &OS) const; + static bool orderByAddress(const Row& LHS, const Row& RHS) { + return LHS.Address < RHS.Address; + } + // The program-counter value corresponding to a machine instruction // generated by the compiler. uint64_t Address; @@ -126,21 +129,63 @@ public: EpilogueBegin:1; }; + // Represents a series of contiguous machine instructions. Line table for each + // compilation unit may consist of multiple sequences, which are not + // guaranteed to be in the order of ascending instruction address. + struct Sequence { + // Sequence describes instructions at address range [LowPC, HighPC) + // and is described by line table rows [FirstRowIndex, LastRowIndex). + uint64_t LowPC; + uint64_t HighPC; + unsigned FirstRowIndex; + unsigned LastRowIndex; + bool Empty; + + Sequence() { reset(); } + void reset() { + LowPC = 0; + HighPC = 0; + FirstRowIndex = 0; + LastRowIndex = 0; + Empty = true; + } + static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) { + return LHS.LowPC < RHS.LowPC; + } + bool isValid() const { + return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); + } + bool containsPC(uint64_t pc) const { + return (LowPC <= pc && pc < HighPC); + } + }; + struct LineTable { void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); } + void appendSequence(const DWARFDebugLine::Sequence &sequence) { + Sequences.push_back(sequence); + } void clear() { Prologue.clear(); Rows.clear(); + Sequences.clear(); } - uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const; + // Returns the index of the row with file/line info for a given address, + // or -1 if there is no such row. + uint32_t lookupAddress(uint64_t address) const; void dump(raw_ostream &OS) const; struct Prologue Prologue; - std::vector Rows; + typedef std::vector RowVector; + typedef RowVector::const_iterator RowIter; + typedef std::vector SequenceVector; + typedef SequenceVector::const_iterator SequenceIter; + RowVector Rows; + SequenceVector Sequences; }; - struct State : public Row, public LineTable { + struct State : public Row, public Sequence, public LineTable { // Special row codes. enum { StartParsingLineTable = 0, @@ -151,8 +196,11 @@ public: virtual ~State(); virtual void appendRowToMatrix(uint32_t offset); - virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; } - virtual void reset() { Row::reset(Prologue.DefaultIsStmt); } + virtual void finalize(); + virtual void reset() { + Row::reset(Prologue.DefaultIsStmt); + Sequence::reset(); + } // The row number that starts at zero for the prologue, and increases for // each row added to the matrix. @@ -162,7 +210,7 @@ public: struct DumpingState : public State { DumpingState(raw_ostream &OS) : OS(OS) {} virtual ~DumpingState(); - virtual void finalize(uint32_t offset); + virtual void finalize(); private: raw_ostream &OS; }; diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h index 1c07c94..911d1d6 100644 --- a/lib/ExecutionEngine/EventListenerCommon.h +++ b/lib/ExecutionEngine/EventListenerCommon.h @@ -14,8 +14,8 @@ #ifndef EVENT_LISTENER_COMMON_H #define EVENT_LISTENER_COMMON_H +#include "llvm/DebugInfo.h" #include "llvm/Metadata.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Path.h" diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 5dfa78f..c11c17e 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -16,11 +16,11 @@ #include "llvm/ExecutionEngine/JITEventListener.h" #define DEBUG_TYPE "amplifier-jit-event-listener" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/Metadata.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" #include "llvm/Support/Debug.h" @@ -138,7 +138,7 @@ void IntelJITEventListener::NotifyFunctionEmitted( // the first instruction that has one if (FunctionMessage.source_file_name == 0) { MDNode *scope = I->Loc.getScope( - Details.MF->getFunction()->getContext()); + Details.MF->getFunction()->getContext()); FunctionMessage.source_file_name = const_cast( Filenames.getFullPath(scope)); } @@ -152,7 +152,7 @@ void IntelJITEventListener::NotifyFunctionEmitted( } Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); + &FunctionMessage); MethodIDs[FnStart] = FunctionMessage.method_id; } diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt index d331f83..74df8f0 100644 --- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -15,3 +15,5 @@ add_llvm_library(LLVMInterpreter if( LLVM_ENABLE_FFI ) target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} ) endif() + +add_dependencies(LLVMInterpreter intrinsics_gen) diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index af47be9..5202b09 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -651,11 +651,40 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { // Check to see if any of the cases match... BasicBlock *Dest = 0; for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { - GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF); - if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { - Dest = cast(i.getCaseSuccessor()); - break; + IntegersSubset& Case = i.getCaseValueEx(); + if (Case.isSingleNumber()) { + // FIXME: Currently work with ConstantInt based numbers. + const ConstantInt *CI = Case.getSingleNumber(0).toConstantInt(); + GenericValue Val = getOperandValue(const_cast(CI), SF); + if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { + Dest = cast(i.getCaseSuccessor()); + break; + } } + if (Case.isSingleNumbersOnly()) { + for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { + // FIXME: Currently work with ConstantInt based numbers. + const ConstantInt *CI = Case.getSingleNumber(n).toConstantInt(); + GenericValue Val = getOperandValue(const_cast(CI), SF); + if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { + Dest = cast(i.getCaseSuccessor()); + break; + } + } + } else + for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { + IntegersSubset::Range r = Case.getItem(n); + // FIXME: Currently work with ConstantInt based numbers. + const ConstantInt *LowCI = r.getLow().toConstantInt(); + const ConstantInt *HighCI = r.getHigh().toConstantInt(); + GenericValue Low = getOperandValue(const_cast(LowCI), SF); + GenericValue High = getOperandValue(const_cast(HighCI), SF); + if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 && + executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) { + Dest = cast(i.getCaseSuccessor()); + break; + } + } } if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default SwitchToNewBasicBlock(Dest, SF); diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index a942299..97995ad 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -361,7 +361,7 @@ bool JIT::removeModule(Module *M) { MutexGuard locked(lock); - if (jitstate->getModule() == M) { + if (jitstate && jitstate->getModule() == M) { delete jitstate; jitstate = 0; } @@ -433,13 +433,18 @@ GenericValue JIT::runFunction(Function *F, } break; case 1: - if (FTy->getNumParams() == 1 && - FTy->getParamType(0)->isIntegerTy(32)) { + if (FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); return rv; } + if (FTy->getParamType(0)->isPointerTy()) { + GenericValue rv; + int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0]))); + return rv; + } break; } } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 504c8bd..ff3a9dc 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -17,9 +17,9 @@ #include "JITDwarfEmitter.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Constants.h" -#include "llvm/Module.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineCodeInfo.h" @@ -108,13 +108,18 @@ namespace { /// particular GlobalVariable so that we can reuse them if necessary. GlobalToIndirectSymMapTy GlobalToIndirectSymMap; +#ifndef NDEBUG /// Instance of the JIT this ResolverState serves. JIT *TheJIT; +#endif public: JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), - FunctionToCallSitesMap(this), - TheJIT(jit) {} + FunctionToCallSitesMap(this) { +#ifndef NDEBUG + TheJIT = jit; +#endif + } FunctionToLazyStubMapTy& getFunctionToLazyStubMap( const MutexGuard& locked) { diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 2d1775c..7be6ef8 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -852,7 +852,7 @@ static int jit_noop() { /// for resolving library symbols, not code generated symbols. /// void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { + bool AbortOnFailure) { // Check to see if this is one of the functions we want to intercept. Note, // we cast to intptr_t here to silence a -pedantic warning that complains // about casting a function pointer to a normal pointer. diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 44f89cf..739ffd7d8 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Target/TargetData.h" using namespace llvm; @@ -45,7 +46,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode); + return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), GVsWithCode); if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -54,9 +55,35 @@ ExecutionEngine *MCJIT::createJIT(Module *M, MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), + isCompiled(false), M(m), OS(Buffer) { setTargetData(TM->getTargetData()); +} + +MCJIT::~MCJIT() { + delete MemMgr; + delete TM; +} + +void MCJIT::emitObject(Module *m) { + /// Currently, MCJIT only supports a single module and the module passed to + /// this function call is expected to be the contained module. The module + /// is passed as a parameter here to prepare for multiple module support in + /// the future. + assert(M == m); + + // Get a thread lock to make sure we aren't trying to compile multiple times + MutexGuard locked(lock); + + // FIXME: Track compilation state on a per-module basis when multiple modules + // are supported. + // Re-compilation is not supported + if (isCompiled) + return; + + PassManager PM; + PM.add(new TargetData(*TM->getTargetData())); // Turn the machine code intermediate representation into bytes in memory @@ -69,23 +96,22 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, // FIXME: When we support multiple modules, we'll want to move the code // gen and finalization out of the constructor here and do it more // on-demand as part of getPointerToFunction(). - PM.run(*M); + PM.run(*m); // Flush the output buffer so the SmallVector gets its data. OS.flush(); // Load the object into the dynamic linker. - MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), + MemoryBuffer* MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), Buffer.size()), "", false); if (Dyld.loadObject(MB)) report_fatal_error(Dyld.getErrorString()); + // Resolve any relocations. Dyld.resolveRelocations(); -} -MCJIT::~MCJIT() { - delete MemMgr; - delete TM; + // FIXME: Add support for per-module compilation state + isCompiled = true; } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { @@ -93,6 +119,10 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { + // FIXME: Add support for per-module compilation state + if (!isCompiled) + emitObject(M); + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); @@ -100,6 +130,7 @@ void *MCJIT::getPointerToFunction(Function *F) { return Addr; } + // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. StringRef BaseName = F->getName(); if (BaseName[0] == '\1') @@ -217,7 +248,11 @@ GenericValue MCJIT::runFunction(Function *F, } void *MCJIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure){ + bool AbortOnFailure) { + // FIXME: Add support for per-module compilation state + if (!isCompiled) + emitObject(M); + if (!isSymbolSearchingDisabled() && MemMgr) { void *ptr = MemMgr->getPointerToNamedFunction(Name, false); if (ptr) @@ -231,7 +266,7 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name, if (AbortOnFailure) { report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); + "' which could not be resolved!"); } return 0; } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 2b3df98..1d272e9 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -29,17 +29,16 @@ class MCJIT : public ExecutionEngine { TargetMachine *TM; MCContext *Ctx; RTDyldMemoryManager *MemMgr; + RuntimeDyld Dyld; - // FIXME: These may need moved to a separate 'jitstate' member like the - // non-MC JIT does for multithreading and such. Just keep them here for now. - PassManager PM; + // FIXME: Add support for multiple modules + bool isCompiled; Module *M; - // FIXME: This really doesn't belong here. + + // FIXME: Move these to a single container which manages JITed objects SmallVector Buffer; // Working buffer into which we JIT. raw_svector_ostream OS; - RuntimeDyld Dyld; - public: ~MCJIT(); @@ -91,6 +90,14 @@ public: TargetMachine *TM); // @} + +protected: + /// emitObject -- Generate a JITed object in memory from the specified module + /// Currently, MCJIT only supports a single module and the module passed to + /// this function call is expected to be the contained module. The module + /// is passed as a parameter here to prepare for multiple module support in + /// the future. + void emitObject(Module *M); }; } // End llvm namespace diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index a68949a..441aaeb 100644 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -22,24 +22,20 @@ namespace llvm { // matching LLVM IR counterparts in the module(s) being compiled. class MCJITMemoryManager : public RTDyldMemoryManager { virtual void anchor(); - JITMemoryManager *JMM; + OwningPtr JMM; - // FIXME: Multiple modules. - Module *M; public: - MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : - JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()), M(m) {} - // We own the JMM, so make sure to delete it. - ~MCJITMemoryManager() { delete JMM; } + MCJITMemoryManager(JITMemoryManager *jmm) : + JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()) {} uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { - return JMM->allocateSpace(Size, Alignment); + return JMM->allocateDataSection(Size, Alignment, SectionID); } uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { - return JMM->allocateSpace(Size, Alignment); + return JMM->allocateCodeSection(Size, Alignment, SectionID); } virtual void *getPointerToNamedFunction(const std::string &Name, diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index e6142e3..6b8e9d1 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -16,9 +16,9 @@ #include "llvm/ExecutionEngine/JITEventListener.h" #define DEBUG_TYPE "oprofile-jit-event-listener" +#include "llvm/DebugInfo.h" #include "llvm/Function.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/Support/Debug.h" diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h index 8206ead..c3e3572 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h @@ -48,7 +48,7 @@ public: virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) {} - // Subclasses can override this method to provide JIT debugging support + // Subclasses can override these methods to provide JIT debugging support virtual void registerWithDebugger() {} virtual void deregisterWithDebugger() {} }; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 1b1840a..b464040 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -39,7 +39,7 @@ namespace { // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { // First, resolve relocations associated with external symbols. - resolveSymbols(); + resolveExternalSymbols(); // Just iterate over the sections we have and resolve all the relocations // in them. Gross overkill, but it gets the job done. @@ -59,8 +59,8 @@ void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, llvm_unreachable("Attempting to remap address of unknown section!"); } -// Subclasses can implement this method to create specialized image instances -// The caller owns the the pointer that is returned. +// Subclasses can implement this method to create specialized image instances. +// The caller owns the pointer that is returned. ObjectImage *RuntimeDyldImpl::createObjectImage(const MemoryBuffer *InputBuffer) { ObjectFile *ObjFile = ObjectFile::createObjectFile(const_cast (InputBuffer)); @@ -75,11 +75,15 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { Arch = (Triple::ArchType)obj->getArch(); - LocalSymbolMap LocalSymbols; // Functions and data symbols from the - // object file. - ObjSectionToIDMap LocalSections; // Used sections from the object file - CommonSymbolMap CommonSymbols; // Common symbols requiring allocation - uint64_t CommonSize = 0; + // Symbols found in this object + StringMap LocalSymbols; + // Used sections from the object file + ObjSectionToIDMap LocalSections; + + // Common symbols requiring allocation, and the total size required to + // allocate all common symbols. + CommonSymbolMap CommonSymbols; + uint64_t CommonSize = 0; error_code err; // Parse symbols @@ -106,28 +110,29 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data) { uint64_t FileOffset; - StringRef sData; + StringRef SectionData; section_iterator si = obj->end_sections(); Check(i->getFileOffset(FileOffset)); Check(i->getSection(si)); if (si == obj->end_sections()) continue; - Check(si->getContents(sData)); + Check(si->getContents(SectionData)); const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + (uintptr_t)FileOffset; - uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin()); + uintptr_t SectOffset = (uintptr_t)(SymPtr - + (const uint8_t*)SectionData.begin()); unsigned SectionID = findOrEmitSection(*obj, *si, SymType == object::SymbolRef::ST_Function, LocalSections); - bool isGlobal = flags & SymbolRef::SF_Global; LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) << " flags: " << flags << " SID: " << SectionID << " Offset: " << format("%p", SectOffset)); + bool isGlobal = flags & SymbolRef::SF_Global; if (isGlobal) - SymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset); } } DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n"); @@ -137,7 +142,7 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { if (CommonSize != 0) emitCommonSymbols(*obj, CommonSymbols, CommonSize, LocalSymbols); - // Parse and proccess relocations + // Parse and process relocations DEBUG(dbgs() << "Parse relocations:\n"); for (section_iterator si = obj->begin_sections(), se = obj->end_sections(); si != se; si.increment(err)) { @@ -150,7 +155,7 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { e = si->end_relocations(); i != e; i.increment(err)) { Check(err); - // If it's first relocation in this section, find its SectionID + // If it's the first relocation in this section, find its SectionID if (isFirstRelocation) { SectionID = findOrEmitSection(*obj, *si, true, LocalSections); DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); @@ -177,10 +182,10 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { return false; } -unsigned RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, - const CommonSymbolMap &Map, - uint64_t TotalSize, - LocalSymbolMap &LocalSymbols) { +void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, + const CommonSymbolMap &CommonSymbols, + uint64_t TotalSize, + SymbolTableMap &SymbolTable) { // Allocate memory for the section unsigned SectionID = Sections.size(); uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*), @@ -197,18 +202,16 @@ unsigned RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, << "\n"); // Assign the address of each symbol - for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end(); - it != itEnd; it++) { - uint64_t Size = it->second; + for (CommonSymbolMap::const_iterator it = CommonSymbols.begin(), + itEnd = CommonSymbols.end(); it != itEnd; it++) { StringRef Name; it->first.getName(Name); Obj.updateSymbolAddress(it->first, (uint64_t)Addr); - LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset); + SymbolTable[Name.data()] = SymbolLoc(SectionID, Offset); + uint64_t Size = it->second; Offset += Size; Addr += Size; } - - return SectionID; } unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, @@ -274,8 +277,8 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, } else { // Even if we didn't load the section, we need to record an entry for it - // to handle later processing (and by 'handle' I mean don't do anything - // with these sections). + // to handle later processing (and by 'handle' I mean don't do anything + // with these sections). Allocate = 0; Addr = 0; DEBUG(dbgs() << "emitSection SectionID: " << SectionID @@ -307,28 +310,26 @@ unsigned RuntimeDyldImpl::findOrEmitSection(ObjectImage &Obj, return SectionID; } -void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value, - unsigned SectionID, uintptr_t Offset, - uint32_t RelType) { - DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName) - << " SID: " << Value.SectionID - << " Addend: " << format("%p", Value.Addend) - << " Offset: " << format("%p", Offset) - << " RelType: " << format("%x", RelType) - << "\n"); +void RuntimeDyldImpl::addRelocationForSection(const RelocationEntry &RE, + unsigned SectionID) { + Relocations[SectionID].push_back(RE); +} - if (Value.SymbolName == 0) { - Relocations[Value.SectionID].push_back(RelocationEntry( - SectionID, - Offset, - RelType, - Value.Addend)); - } else - SymbolRelocations[Value.SymbolName].push_back(RelocationEntry( - SectionID, - Offset, - RelType, - Value.Addend)); +void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, + StringRef SymbolName) { + // Relocation by symbol. If the symbol is found in the global symbol table, + // create an appropriate section relocation. Otherwise, add it to + // ExternalSymbolRelocations. + SymbolTableMap::const_iterator Loc = + GlobalSymbolTable.find(SymbolName); + if (Loc == GlobalSymbolTable.end()) { + ExternalSymbolRelocations[SymbolName].push_back(RE); + } else { + // Copy the RE since we want to modify its addend. + RelocationEntry RECopy = RE; + RECopy.Addend += Loc->second.second; + Relocations[Loc->second.first].push_back(RECopy); + } } uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { @@ -369,12 +370,12 @@ void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; DEBUG(dbgs() << "\tSectionID: " << RE.SectionID << " + " << RE.Offset << " (" << format("%p", Target) << ")" - << " Data: " << RE.Data + << " RelType: " << RE.RelType << " Addend: " << RE.Addend << "\n"); resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, - Value, RE.Data, RE.Addend); + Value, RE.RelType, RE.Addend); } } @@ -385,16 +386,14 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } } -// resolveSymbols - Resolve any relocations to the specified symbols if -// we know where it lives. -void RuntimeDyldImpl::resolveSymbols() { - StringMap::iterator i = SymbolRelocations.begin(), - e = SymbolRelocations.end(); +void RuntimeDyldImpl::resolveExternalSymbols() { + StringMap::iterator i = ExternalSymbolRelocations.begin(), + e = ExternalSymbolRelocations.end(); for (; i != e; i++) { StringRef Name = i->first(); RelocationList &Relocs = i->second; - StringMap::const_iterator Loc = SymbolTable.find(Name); - if (Loc == SymbolTable.end()) { + SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name); + if (Loc == GlobalSymbolTable.end()) { // This is an external symbol, try to get it address from // MemoryManager. uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), @@ -404,15 +403,7 @@ void RuntimeDyldImpl::resolveSymbols() { << "\n"); resolveRelocationList(Relocs, (uintptr_t)Addr); } else { - // Change the relocation to be section relative rather than symbol - // relative and move it to the resolved relocation list. - DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n"); - for (int i = 0, e = Relocs.size(); i != e; ++i) { - RelocationEntry Entry = Relocs[i]; - Entry.Addend += Loc->second.second; - Relocations[Loc->second.first].push_back(Entry); - } - Relocs.clear(); + report_fatal_error("Expected external symbol"); } } } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index db6da8c..75bb586 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -55,7 +55,7 @@ public: const MemoryBuffer& getBuffer() const { return *InputData; } - // Methods for type inquiry through isa, cast, and dyn_cast + // Methods for type inquiry through isa, cast and dyn_cast static inline bool classof(const Binary *v) { return (isa >(v) && classof(cast >(v))); @@ -208,10 +208,9 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, case ELF::R_X86_64_32: case ELF::R_X86_64_32S: { Value += Addend; - // FIXME: Handle the possibility of this assertion failing - assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) || - (Type == ELF::R_X86_64_32S && - (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL)); + assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) || + (Type == ELF::R_X86_64_32S && + ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); uint32_t *Target = reinterpret_cast(LocalAddress); *Target = TruncatedAddr; @@ -220,7 +219,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, case ELF::R_X86_64_PC32: { uint32_t *Placeholder = reinterpret_cast(LocalAddress); int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress; - assert(RealOffset <= 214783647 && RealOffset >= -214783648); + assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); *Placeholder = TruncOffset; break; @@ -248,7 +247,7 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, } default: // There are other relocation types, but it appears these are the - // only ones currently used by the LLVM ELF object writer + // only ones currently used by the LLVM ELF object writer llvm_unreachable("Relocation type not implemented yet!"); break; } @@ -334,28 +333,31 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, + const SymbolTableMap &Symbols, StubMap &Stubs) { uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL); intptr_t Addend = (intptr_t)Rel.AdditionalInfo; - RelocationValueRef Value; - StringRef TargetName; const SymbolRef &Symbol = Rel.Symbol; + + // Obtain the symbol name which is referenced in the relocation + StringRef TargetName; Symbol.getName(TargetName); DEBUG(dbgs() << "\t\tRelType: " << RelType << " Addend: " << Addend << " TargetName: " << TargetName << "\n"); - // First look the symbol in object file symbols. - LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data()); + RelocationValueRef Value; + // First search for the symbol in the local symbol table + SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; Value.Addend = lsi->second.second; } else { - // Second look the symbol in global symbol table. - StringMap::iterator gsi = SymbolTable.find(TargetName.data()); - if (gsi != SymbolTable.end()) { + // Search for the symbol in the global symbol table + SymbolTableMap::const_iterator gsi = + GlobalSymbolTable.find(TargetName.data()); + if (gsi != GlobalSymbolTable.end()) { Value.SectionID = gsi->second.first; Value.Addend = gsi->second.second; } else { @@ -366,7 +368,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously // and can be changed by another developers. Maybe best way is add // a new symbol type ST_Section to SymbolRef and use it. - section_iterator si = Obj.end_sections(); + section_iterator si(Obj.end_sections()); Symbol.getSection(si); if (si == Obj.end_sections()) llvm_unreachable("Symbol section not found, bad object file format!"); @@ -410,14 +412,24 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - AddRelocation(Value, Rel.SectionID, - StubTargetAddr - Section.Address, ELF::R_ARM_ABS32); + RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + ELF::R_ARM_ABS32, Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); } - } else - AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType); + } else { + RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } } bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index e7f6fab..e413f78 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -51,7 +51,8 @@ protected: virtual void processRelocationRef(const ObjRelocationInfo &Rel, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, StubMap &Stubs); + const SymbolTableMap &Symbols, + StubMap &Stubs); virtual ObjectImage *createObjectImage(const MemoryBuffer *InputBuffer); virtual void handleObjectLoaded(ObjectImage *Obj); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 2dea13f..c38ca69 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -14,60 +14,83 @@ #ifndef LLVM_RUNTIME_DYLD_IMPL_H #define LLVM_RUNTIME_DYLD_IMPL_H +#include "ObjectImage.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/Object/ObjectFile.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Twine.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/system_error.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/Triple.h" -#include #include "llvm/Support/Format.h" -#include "ObjectImage.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include using namespace llvm; using namespace llvm::object; namespace llvm { +class MemoryBuffer; +class Twine; + + +/// SectionEntry - represents a section emitted into memory by the dynamic +/// linker. class SectionEntry { public: - uint8_t* Address; + /// Address - address in the linker's memory where the section resides. + uint8_t *Address; + + /// Size - section size. size_t Size; - uint64_t LoadAddress; // For each section, the address it will be - // considered to live at for relocations. The same - // as the pointer to the above memory block for - // hosted JITs. - uintptr_t StubOffset; // It's used for architecturies with stub - // functions for far relocations like ARM. - uintptr_t ObjAddress; // Section address in object file. It's use for - // calculate MachO relocation addend - SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset, + + /// LoadAddress - the address of the section in the target process's memory. + /// Used for situations in which JIT-ed code is being executed in the address + /// space of a separate process. If the code executes in the same address + /// space where it was JIT-ed, this just equals Address. + uint64_t LoadAddress; + + /// StubOffset - used for architectures with stub functions for far + /// relocations (like ARM). + uintptr_t StubOffset; + + /// ObjAddress - address of the section in the in-memory object file. Used + /// for calculating relocations in some object formats (like MachO). + uintptr_t ObjAddress; + + SectionEntry(uint8_t *address, size_t size, uintptr_t stubOffset, uintptr_t objAddress) : Address(address), Size(size), LoadAddress((uintptr_t)address), StubOffset(stubOffset), ObjAddress(objAddress) {} }; +/// RelocationEntry - used to represent relocations internally in the dynamic +/// linker. class RelocationEntry { public: - unsigned SectionID; // Section the relocation is contained in. - uintptr_t Offset; // Offset into the section for the relocation. - uint32_t Data; // Relocatino data. Including type of relocation - // and another flags and parameners from - intptr_t Addend; // Addend encoded in the instruction itself, if any, - // plus the offset into the source section for - // the symbol once the relocation is resolvable. - RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend) - : SectionID(id), Offset(offset), Data(data), Addend(addend) {} + /// SectionID - the section this relocation points to. + unsigned SectionID; + + /// Offset - offset into the section. + uintptr_t Offset; + + /// RelType - relocation type. + uint32_t RelType; + + /// Addend - the relocation addend encoded in the instruction itself. Also + /// used to make a relocation section relative instead of symbol relative. + intptr_t Addend; + + RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend) + : SectionID(id), Offset(offset), RelType(type), Addend(addend) {} }; -// Raw relocation data from object file +/// ObjRelocationInfo - relocation information as read from the object file. +/// Used to pass around data taken from object::RelocationRef, together with +/// the section to which the relocation points (represented by a SectionID). class ObjRelocationInfo { public: unsigned SectionID; @@ -97,7 +120,8 @@ protected: // The MemoryManager to load objects into. RTDyldMemoryManager *MemMgr; - // A list of emmitted sections. + // A list of all sections emitted by the dynamic linker. These sections are + // referenced in the code by means of their index in this list - SectionID. typedef SmallVector SectionList; SectionList Sections; @@ -105,11 +129,11 @@ protected: // references it. typedef std::map ObjSectionToIDMap; - // Master symbol table. As modules are loaded and external symbols are - // resolved, their addresses are stored here as a SectionID/Offset pair. + // A global symbol table for symbols from all loaded modules. Maps the + // symbol name to a (SectionID, offset in section) pair. typedef std::pair SymbolLoc; - StringMap SymbolTable; - typedef DenseMap LocalSymbolMap; + typedef StringMap SymbolTableMap; + SymbolTableMap GlobalSymbolTable; // Keep a map of common symbols to their sizes typedef std::map CommonSymbolMap; @@ -121,12 +145,14 @@ protected: // in the relocation list where it's stored. typedef SmallVector RelocationList; // Relocations to sections already loaded. Indexed by SectionID which is the - // source of the address. The target where the address will be writen is + // source of the address. The target where the address will be written is // SectionID/Offset in the relocation itself. DenseMap Relocations; - // Relocations to external symbols that are not yet resolved. - // Indexed by symbol name. - StringMap SymbolRelocations; + + // Relocations to external symbols that are not yet resolved. Symbols are + // external when they aren't found in the global symbol table of all loaded + // modules. This map is indexed by symbol name. + StringMap ExternalSymbolRelocations; typedef std::map StubMap; @@ -153,16 +179,17 @@ protected: return (uint8_t*)Sections[SectionID].Address; } - /// \brief Emits a section containing common symbols. - /// \return SectionID. - unsigned emitCommonSymbols(ObjectImage &Obj, - const CommonSymbolMap &Map, - uint64_t TotalSize, - LocalSymbolMap &Symbols); + /// \brief Given the common symbols discovered in the object file, emit a + /// new section for them and update the symbol mappings in the object and + /// symbol table. + void emitCommonSymbols(ObjectImage &Obj, + const CommonSymbolMap &CommonSymbols, + uint64_t TotalSize, + SymbolTableMap &SymbolTable); /// \brief Emits section data from the object file to the MemoryManager. /// \param IsCode if it's true then allocateCodeSection() will be - /// used for emmits, else allocateDataSection() will be used. + /// used for emits, else allocateDataSection() will be used. /// \return SectionID. unsigned emitSection(ObjectImage &Obj, const SectionRef &Section, @@ -178,10 +205,12 @@ protected: bool IsCode, ObjSectionToIDMap &LocalSections); - /// \brief If Value.SymbolName is NULL then store relocation to the - /// Relocations, else store it in the SymbolRelocations. - void AddRelocation(const RelocationValueRef &Value, unsigned SectionID, - uintptr_t Offset, uint32_t RelType); + // \brief Add a relocation entry that uses the given section. + void addRelocationForSection(const RelocationEntry &RE, unsigned SectionID); + + // \brief Add a relocation entry that uses the given symbol. This symbol may + // be found in the global symbol table, or it may be external. + void addRelocationForSymbol(const RelocationEntry &RE, StringRef SymbolName); /// \brief Emits long jump instruction to Addr. /// \return Pointer to the memory area for emitting target address. @@ -203,14 +232,16 @@ protected: uint32_t Type, int64_t Addend) = 0; - /// \brief Parses the object file relocation and store it to Relocations - /// or SymbolRelocations. Its depend from object file type. + /// \brief Parses the object file relocation and stores it to Relocations + /// or SymbolRelocations (this depends on the object file type). virtual void processRelocationRef(const ObjRelocationInfo &Rel, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, StubMap &Stubs) = 0; + const SymbolTableMap &Symbols, + StubMap &Stubs) = 0; - void resolveSymbols(); + /// \brief Resolve relocations to external symbols. + void resolveExternalSymbols(); virtual ObjectImage *createObjectImage(const MemoryBuffer *InputBuffer); virtual void handleObjectLoaded(ObjectImage *Obj) { @@ -228,9 +259,9 @@ public: void *getSymbolAddress(StringRef Name) { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. - if (SymbolTable.find(Name) == SymbolTable.end()) + if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end()) return 0; - SymbolLoc Loc = SymbolTable.lookup(Name); + SymbolLoc Loc = GlobalSymbolTable.lookup(Name); return getSectionAddress(Loc.first) + Loc.second; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index b7f515d..0e3a9d4 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -30,7 +30,8 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, unsigned MachoType = (Type >> 28) & 0xf; unsigned Size = 1 << ((Type >> 25) & 3); - DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress) + DEBUG(dbgs() << "resolveRelocation LocalAddress: " + << format("%p", LocalAddress) << " FinalAddress: " << format("%p", FinalAddress) << " Value: " << format("%p", Value) << " Addend: " << Addend @@ -53,12 +54,12 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, break; case Triple::x86: resolveI386Relocation(LocalAddress, - FinalAddress, - (uintptr_t)Value, - isPCRel, - Type, - Size, - Addend); + FinalAddress, + (uintptr_t)Value, + isPCRel, + Type, + Size, + Addend); break; case Triple::arm: // Fall through. case Triple::thumb: @@ -73,14 +74,13 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, } } -bool RuntimeDyldMachO:: -resolveI386Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, - bool isPCRel, - unsigned Type, - unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { if (isPCRel) Value -= FinalAddress + 4; // see resolveX86_64Relocation @@ -102,14 +102,13 @@ resolveI386Relocation(uint8_t *LocalAddress, } } -bool RuntimeDyldMachO:: -resolveX86_64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, - bool isPCRel, - unsigned Type, - unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (isPCRel) @@ -144,14 +143,13 @@ resolveX86_64Relocation(uint8_t *LocalAddress, } } -bool RuntimeDyldMachO:: -resolveARMRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, - bool isPCRel, - unsigned Type, - unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (isPCRel) { @@ -207,7 +205,7 @@ resolveARMRelocation(uint8_t *LocalAddress, void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, + const SymbolTableMap &Symbols, StubMap &Stubs) { uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL); @@ -217,18 +215,19 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, bool isExtern = (RelType >> 27) & 1; if (isExtern) { + // Obtain the symbol name which is referenced in the relocation StringRef TargetName; const SymbolRef &Symbol = Rel.Symbol; Symbol.getName(TargetName); - // First look the symbol in object file symbols. - LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data()); + // First search for the symbol in the local symbol table + SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; Value.Addend = lsi->second.second; } else { - // Second look the symbol in global symbol table. - StringMap::iterator gsi = SymbolTable.find(TargetName.data()); - if (gsi != SymbolTable.end()) { + // Search for the symbol in the global symbol table + SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data()); + if (gsi != GlobalSymbolTable.end()) { Value.SectionID = gsi->second.first; Value.Addend = gsi->second.second; } else @@ -249,8 +248,8 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID); Value.Addend = *(const intptr_t *)Target; if (Value.Addend) { - // The MachO addend is offset from the current section, we need set it - // as offset from destination section + // The MachO addend is an offset from the current section. We need it + // to be an offset from the destination section Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress; } } @@ -269,19 +268,29 @@ void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address, - macho::RIT_Vanilla); + RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address, + macho::RIT_Vanilla, Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); } - } else - AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType); + } else { + RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } } -bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const { +bool RuntimeDyldMachO::isCompatibleFormat( + const MemoryBuffer *InputBuffer) const { StringRef Magic = InputBuffer->getBuffer().slice(0, 4); if (Magic == "\xFE\xED\xFA\xCE") return true; if (Magic == "\xCE\xFA\xED\xFE") return true; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 418d130..707664c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -51,7 +51,8 @@ protected: virtual void processRelocationRef(const ObjRelocationInfo &Rel, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, StubMap &Stubs); + const SymbolTableMap &Symbols, + StubMap &Stubs); public: virtual void resolveRelocation(uint8_t *LocalAddress, diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 42364f9..7cdd669 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -26,11 +26,7 @@ using namespace llvm; TargetMachine *EngineBuilder::selectTarget() { - StringRef MArch = ""; - StringRef MCPU = ""; - SmallVector MAttrs; - Triple TT(M->getTargetTriple()); - + Triple TT(LLVM_HOSTTRIPLE); return selectTarget(TT, MArch, MCPU, MAttrs); } @@ -56,8 +52,9 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, } if (!TheTarget) { - *ErrorStr = "No available targets are compatible with this -march, " - "see -version for the available targets.\n"; + if (ErrorStr) + *ErrorStr = "No available targets are compatible with this -march, " + "see -version for the available targets.\n"; return 0; } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 765fcc8..a6599bf 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -16,6 +16,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/TypeFinder.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -25,6 +26,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm-c/Linker.h" #include using namespace llvm; @@ -594,13 +596,13 @@ void ModuleLinker::computeTypeMapping() { // At this point, the destination module may have a type "%foo = { i32 }" for // example. When the source module got loaded into the same LLVMContext, if // it had the same type, it would have been renamed to "%foo.42 = { i32 }". - std::vector SrcStructTypes; - SrcM->findUsedStructTypes(SrcStructTypes); + TypeFinder SrcStructTypes; + SrcStructTypes.run(*SrcM, true); SmallPtrSet SrcStructTypesSet(SrcStructTypes.begin(), SrcStructTypes.end()); - std::vector DstStructTypes; - DstM->findUsedStructTypes(DstStructTypes); + TypeFinder DstStructTypes; + DstStructTypes.run(*DstM, true); SmallPtrSet DstStructTypesSet(DstStructTypes.begin(), DstStructTypes.end()); @@ -683,7 +685,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *NG = new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(), DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV, - DstGV->isThreadLocal(), + DstGV->getThreadLocalMode(), DstGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. @@ -758,7 +760,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()), SGV->isConstant(), SGV->getLinkage(), /*init*/0, SGV->getName(), /*insertbefore*/0, - SGV->isThreadLocal(), + SGV->getThreadLocalMode(), SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. copyGVAttributes(NewDGV, SGV); @@ -1335,3 +1337,17 @@ bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, return false; } + +//===----------------------------------------------------------------------===// +// C API. +//===----------------------------------------------------------------------===// + +LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src, + LLVMLinkerMode Mode, char **OutMessages) { + std::string Messages; + LLVMBool Result = Linker::LinkModules(unwrap(Dest), unwrap(Src), + Mode, OutMessages? &Messages : 0); + if (OutMessages) + *OutMessages = strdup(Messages.c_str()); + return Result; +} diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index f11e686..99bff96 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMMC MCObjectStreamer.cpp MCObjectWriter.cpp MCPureStreamer.cpp + MCRegisterInfo.cpp MCSection.cpp MCSectionCOFF.cpp MCSectionELF.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 9fc33b6..7203b9a 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -627,7 +627,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm, const MCValue &Target, - const MCFragment &F, + const MCFragment &F, const MCFixup &Fixup, bool IsPCRel) const { const MCSymbol &Symbol = Target.getSymA()->getSymbol(); @@ -1061,11 +1061,19 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, entry.Index += LocalSymbolData.size(); if (is64Bit()) { String64(*F, entry.r_offset); + if (TargetObjectWriter->isN64()) { + String32(*F, entry.Index); - struct ELF::Elf64_Rela ERE64; - ERE64.setSymbolAndType(entry.Index, entry.Type); - String64(*F, ERE64.r_info); - + String8(*F, TargetObjectWriter->getRSsym(entry.Type)); + String8(*F, TargetObjectWriter->getRType3(entry.Type)); + String8(*F, TargetObjectWriter->getRType2(entry.Type)); + String8(*F, TargetObjectWriter->getRType(entry.Type)); + } + else { + struct ELF::Elf64_Rela ERE64; + ERE64.setSymbolAndType(entry.Index, entry.Type); + String64(*F, ERE64.r_info); + } if (hasRelocationAddend()) String64(*F, entry.r_addend); } else { diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 0b2e4ae..2e447b0 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -39,7 +39,7 @@ MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { { "FK_SecRel_4", 0, 32, 0 }, { "FK_SecRel_8", 0, 64, 0 } }; - + assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) && "Unknown fixup kind"); return Builtins[Kind]; diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 8286c1d..8da2e0e 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -50,6 +50,7 @@ MCAsmInfo::MCAsmInfo() { AllowNameToStartWithDigit = false; AllowPeriodsInName = true; AllowUTF8 = true; + UseDataRegionDirectives = false; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; AscizDirective = "\t.asciz\t"; @@ -57,12 +58,6 @@ MCAsmInfo::MCAsmInfo() { Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; Data64bitsDirective = "\t.quad\t"; - DataBegin = "$d."; - CodeBegin = "$a."; - JT8Begin = "$d."; - JT16Begin = "$d."; - JT32Begin = "$d."; - SupportsDataRegions = false; SunStyleELFSectionSwitchSyntax = false; UsesELFSectionDirectiveForBSS = false; AlignDirective = "\t.align\t"; @@ -89,14 +84,10 @@ MCAsmInfo::MCAsmInfo() { SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; DwarfUsesInlineInfoSection = false; - DwarfRequiresRelocationForSectionOffset = true; DwarfSectionOffsetDirective = 0; - DwarfUsesLabelOffsetForRanges = true; - DwarfUsesRelocationsForStringPool = true; + DwarfUsesRelocationsAcrossSections = true; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; - - AsmTransCBE = 0; } MCAsmInfo::~MCAsmInfo() { diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 881d992..678e75a 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -26,7 +26,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { PrivateGlobalPrefix = "L"; // Prefix for private global symbols WeakRefDirective = "\t.weak\t"; LinkOnceDirective = "\t.linkonce discard\n"; - + // Doesn't support visibility: HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid; ProtectedVisibilityAttr = MCSA_Invalid; @@ -36,8 +36,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { SupportsDebugInformation = true; DwarfSectionOffsetDirective = "\t.secrel32\t"; HasMicrosoftFastStdCallMangling = true; - - SupportsDataRegions = false; } void MCAsmInfoMicrosoft::anchor() { } diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index c1e2635..8e0ac23 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -18,7 +18,7 @@ #include "llvm/MC/MCStreamer.h" using namespace llvm; -void MCAsmInfoDarwin::anchor() { } +void MCAsmInfoDarwin::anchor() { } MCAsmInfoDarwin::MCAsmInfoDarwin() { // Common settings for all Darwin targets. @@ -43,13 +43,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasMachoTBSSDirective = true; // Uses .tbss HasStaticCtorDtorReferenceInStaticMode = true; - CodeBegin = "L$start$code$"; - DataBegin = "L$start$data$"; - JT8Begin = "L$start$jt8$"; - JT16Begin = "L$start$jt16$"; - JT32Begin = "L$start$jt32$"; - SupportsDataRegions = true; - // FIXME: Darwin 10 and newer don't need this. LinkerRequiresNonEmptyDwarfLines = true; @@ -61,12 +54,10 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { // Doesn't support protected visibility. ProtectedVisibilityAttr = MCSA_Invalid; - + HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; HasSymbolResolver = true; - DwarfRequiresRelocationForSectionOffset = false; - DwarfUsesLabelOffsetForRanges = false; - DwarfUsesRelocationsForStringPool = false; + DwarfUsesRelocationsAcrossSections = false; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 11f0f72..373df4b 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -138,6 +138,7 @@ public: virtual void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitDataRegion(MCDataRegionType Kind); virtual void EmitThumbFunc(MCSymbol *Func); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); @@ -170,7 +171,7 @@ public: unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0); @@ -352,6 +353,21 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { EmitEOL(); } +void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { + MCContext &Ctx = getContext(); + const MCAsmInfo &MAI = Ctx.getAsmInfo(); + if (!MAI.doesSupportDataRegionDirectives()) + return; + switch (Kind) { + case MCDR_DataRegion: OS << "\t.data_region"; break; + case MCDR_DataRegionJT8: OS << "\t.data_region jt8"; break; + case MCDR_DataRegionJT16: OS << "\t.data_region jt16"; break; + case MCDR_DataRegionJT32: OS << "\t.data_region jt32"; break; + case MCDR_DataRegionEnd: OS << "\t.end_data_region"; break; + } + EmitEOL(); +} + void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) { // This needs to emit to a temporary string to get properly quoted // MCSymbols when they have spaces in them. @@ -513,7 +529,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; @@ -826,7 +842,7 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, if (IsVerboseAsm) { OS.PadToColumn(MAI.getCommentColumn()); - OS << MAI.getCommentString() << ' ' << FileName << ':' + OS << MAI.getCommentString() << ' ' << FileName << ':' << Line << ':' << Column; } EmitEOL(); @@ -1009,7 +1025,7 @@ void MCAsmStreamer::EmitCFISignalFrame() { if (!UseCFI) return; - OS << "\t.cif_signal_frame"; + OS << "\t.cfi_signal_frame"; EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 66ba9b8..05519b5 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/LEB128.h" using namespace llvm; @@ -403,7 +404,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, // See if we are aligning with nops, and if so do that first to try to fill // the Count bytes. Then if that did not fill any bytes or there are any - // bytes left to fill use the the Value and ValueSize to fill the rest. + // bytes left to fill use the Value and ValueSize to fill the rest. // If we are aligning with nops, ask that target to emit the right data. if (AF.hasEmitNops()) { if (!Asm.getBackend().writeNopData(Count, OW)) @@ -713,9 +714,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { Data.clear(); raw_svector_ostream OSE(Data); if (LF.isSigned()) - MCObjectWriter::EncodeSLEB128(Value, OSE); + encodeSLEB128(Value, OSE); else - MCObjectWriter::EncodeULEB128(Value, OSE); + encodeULEB128(Value, OSE); OSE.flush(); return OldSize != LF.getContents().size(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index d3c4fb1..b5b14b9 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -274,11 +274,11 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, if (Directory.empty()) { // Separate the directory part from the basename of the FileName. - std::pair Slash = FileName.rsplit('/'); - Directory = Slash.second; - if (!Directory.empty()) { - Directory = Slash.first; - FileName = Slash.second; + StringRef tFileName = sys::path::filename(FileName); + if (!tFileName.empty()) { + Directory = sys::path::parent_path(FileName); + if (!Directory.empty()) + FileName = tFileName; } } diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h index 880a31a..322abd5 100644 --- a/lib/MC/MCDisassembler/Disassembler.h +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -99,6 +99,14 @@ public: DisAsm.reset(disAsm); IP.reset(iP); } + const std::string &getTripleName() const { return TripleName; } + void *getDisInfo() const { return DisInfo; } + int getTagType() const { return TagType; } + LLVMOpInfoCallback getGetOpInfo() const { return GetOpInfo; } + LLVMSymbolLookupCallback getSymbolLookupCallback() const { + return SymbolLookUp; + } + const Target *getTarget() const { return TheTarget; } const MCDisassembler *getDisAsm() const { return DisAsm.get(); } const MCAsmInfo *getAsmInfo() const { return MAI.get(); } MCInstPrinter *getIP() { return IP.get(); } diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index b2672ca..1226f1a 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -44,7 +44,7 @@ struct TripleMap { const char *String; }; -static struct TripleMap triplemap[] = { +static const struct TripleMap triplemap[] = { { Triple::x86, "i386-unknown-unknown" }, { Triple::x86_64, "x86_64-unknown-unknown" }, { Triple::arm, "arm-unknown-unknown" }, @@ -256,7 +256,7 @@ void EDDisassembler::initMaps(const MCRegisterInfo ®isterInfo) { unsigned registerIndex; for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { - const char* registerName = registerInfo.get(registerIndex).Name; + const char* registerName = registerInfo.getName(registerIndex); RegVec.push_back(registerName); RegRMap[registerName] = registerIndex; diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp index c658717..5c065db 100644 --- a/lib/MC/MCDisassembler/EDMain.cpp +++ b/lib/MC/MCDisassembler/EDMain.cpp @@ -4,7 +4,7 @@ // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file implements the enhanced disassembler's public C API. @@ -34,9 +34,9 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler, Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL; break; } - + EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax); - + if (!ret) return -1; *disassembler = ret; @@ -70,18 +70,18 @@ unsigned int EDCreateInsts(EDInstRef *insts, uint64_t address, void *arg) { unsigned int index; - + for (index = 0; index < count; ++index) { EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader, address, arg); - + if (!inst) return index; - + insts[index] = inst; address += inst->byteSize(); } - + return count; } @@ -165,14 +165,14 @@ int EDTokenIsRegister(EDTokenRef token) { int EDTokenIsNegativeLiteral(EDTokenRef token) { if (((EDToken*)token)->type() != EDToken::kTokenLiteral) return -1; - + return ((EDToken*)token)->literalSign(); } int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) { if (((EDToken*)token)->type() != EDToken::kTokenLiteral) return -1; - + return ((EDToken*)token)->literalAbsoluteValue(*value); } @@ -180,7 +180,7 @@ int EDRegisterTokenValue(unsigned *registerID, EDTokenRef token) { if (((EDToken*)token)->type() != EDToken::kTokenRegister) return -1; - + return ((EDToken*)token)->registerID(*registerID); } @@ -231,7 +231,7 @@ struct ByteReaderWrapper { EDByteBlock_t byteBlock; }; -static int readerWrapperCallback(uint8_t *byte, +static int readerWrapperCallback(uint8_t *byte, uint64_t address, void *arg) { struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg; @@ -245,13 +245,9 @@ unsigned int EDBlockCreateInsts(EDInstRef *insts, uint64_t address) { struct ByteReaderWrapper wrapper; wrapper.byteBlock = byteBlock; - - return EDCreateInsts(insts, - count, - disassembler, - readerWrapperCallback, - address, - (void*)&wrapper); + + return EDCreateInsts(insts, count, disassembler, readerWrapperCallback, + address, (void*)&wrapper); } int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand, diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 84a34f1..4c63e43 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/ADT/Hashing.h" @@ -36,7 +37,7 @@ using namespace llvm; // First special line opcode - leave room for the standard opcodes. // Note: If you want to change this, you'll have to update the -// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit(). +// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit(). #define DWARF2_LINE_OPCODE_BASE 13 // Minimum line offset in a special line info. opcode. This value @@ -105,7 +106,7 @@ void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) { // // This helper routine returns an expression of End - Start + IntVal . -// +// static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS, const MCSymbol &Start, const MCSymbol &End, @@ -198,7 +199,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // Set the value of the symbol, as we are at the end of the section. MCOS->EmitLabel(SectionEnd); - // Switch back the the dwarf line section. + // Switch back the dwarf line section. MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); @@ -310,7 +311,7 @@ const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) { if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines() && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) { // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures - // it requires: + // it requires: // total_length >= prologue_length + 10 // We are 4 bytes short, since we have total_length = 51 and // prologue_length = 45 @@ -354,14 +355,14 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, AddrDelta = ScaleAddrDelta(AddrDelta); // A LineDelta of INT64_MAX is a signal that this is actually a - // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the + // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the // end_sequence to emit the matrix entry. if (LineDelta == INT64_MAX) { if (AddrDelta == MAX_SPECIAL_ADDR_DELTA) OS << char(dwarf::DW_LNS_const_add_pc); else { OS << char(dwarf::DW_LNS_advance_pc); - MCObjectWriter::EncodeULEB128(AddrDelta, OS); + encodeULEB128(AddrDelta, OS); } OS << char(dwarf::DW_LNS_extended_op); OS << char(1); @@ -376,7 +377,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // it with DW_LNS_advance_line. if (Temp >= DWARF2_LINE_RANGE) { OS << char(dwarf::DW_LNS_advance_line); - MCObjectWriter::EncodeSLEB128(LineDelta, OS); + encodeSLEB128(LineDelta, OS); LineDelta = 0; Temp = 0 - DWARF2_LINE_BASE; @@ -412,7 +413,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // Otherwise use DW_LNS_advance_pc. OS << char(dwarf::DW_LNS_advance_pc); - MCObjectWriter::EncodeULEB128(AddrDelta, OS); + encodeULEB128(AddrDelta, OS); if (NeedCopy) OS << char(dwarf::DW_LNS_copy); @@ -552,7 +553,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) { MCContext &context = MCOS->getContext(); - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); // Create a symbol at the start and end of this section used in here for the // expression to calculate the length in the header. @@ -705,7 +706,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) { MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); MCSymbol *AbbrevSectionSymbol; - if (AsmInfo.doesDwarfRequireRelocationForSectionOffset()) { + if (AsmInfo.doesDwarfUseRelocationsAcrossSections()) { AbbrevSectionSymbol = context.CreateTempSymbol(); MCOS->EmitLabel(AbbrevSectionSymbol); } else { @@ -766,7 +767,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, MCOS->EmitLabel(Label); // Create and entry for the info and add it to the other entries. - MCGenDwarfLabelEntry *Entry = + MCGenDwarfLabelEntry *Entry = new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label); MCOS->getContext().addMCGenDwarfLabelEntry(Entry); } @@ -1285,7 +1286,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, 0); if (verboseAsm) streamer.AddComment("FDE CIE Offset"); streamer.EmitAbsValue(offset, 4); - } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) { + } else if (!asmInfo.doesDwarfUseRelocationsAcrossSections()) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, cieStart, 0); streamer.EmitAbsValue(offset, 4); @@ -1293,20 +1294,17 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, streamer.EmitSymbolValue(&cieStart, 4); } - unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI); - unsigned size = getSizeForEncoding(streamer, fdeEncoding); - // PC Begin - unsigned PCBeginEncoding = IsEH ? fdeEncoding : - (unsigned)dwarf::DW_EH_PE_absptr; - unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding); - EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location"); + unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI) + : (unsigned)dwarf::DW_EH_PE_absptr; + unsigned PCSize = getSizeForEncoding(streamer, PCEncoding); + EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location"); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); if (verboseAsm) streamer.AddComment("FDE address range"); - streamer.EmitAbsValue(Range, size); + streamer.EmitAbsValue(Range, PCSize); if (IsEH) { // Augmentation Data Length @@ -1329,7 +1327,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); // Padding - streamer.EmitValueToAlignment(PCBeginSize); + streamer.EmitValueToAlignment(PCSize); return fdeEnd; } diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 171ab4d..6eb6914 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -15,9 +15,11 @@ using namespace llvm; MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_, uint16_t EMachine_, - bool HasRelocationAddend_) + bool HasRelocationAddend_, + bool IsN64_) : OSABI(OSABI_), EMachine(EMachine_), - HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) { + HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_), + IsN64(IsN64_){ } /// Default e_flags = 0 diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 6c4d0e3..2d342dc 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -13,6 +13,8 @@ #include "MCELF.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -89,7 +91,7 @@ public: unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { + uint64_t Size = 0, unsigned ByteAlignment = 0) { llvm_unreachable("ELF doesn't support this directive"); } virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 7880155..0eb7fcc 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -202,6 +202,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_PPC_DARWIN_LO16: return "lo16"; case VK_PPC_GAS_HA16: return "ha"; case VK_PPC_GAS_LO16: return "l"; + case VK_PPC_TPREL16_HA: return "tprel@ha"; + case VK_PPC_TPREL16_LO: return "tprel@l"; case VK_Mips_GPREL: return "GPREL"; case VK_Mips_GOT_CALL: return "GOT_CALL"; case VK_Mips_GOT16: return "GOT16"; @@ -220,6 +222,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_Mips_GOT_DISP: return "GOT_DISP"; case VK_Mips_GOT_PAGE: return "GOT_PAGE"; case VK_Mips_GOT_OFST: return "GOT_OFST"; + case VK_Mips_HIGHER: return "HIGHER"; + case VK_Mips_HIGHEST: return "HIGHEST"; } llvm_unreachable("Invalid variant kind"); } diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index bc6cf77..b75fe2c 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -1,4 +1,3 @@ -//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===// // // The LLVM Compiler Infrastructure // @@ -33,6 +32,8 @@ class MCMachOStreamer : public MCObjectStreamer { private: virtual void EmitInstToData(const MCInst &Inst); + void EmitDataRegion(DataRegionData::KindTy Kind); + void EmitDataRegionEnd(); public: MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter) @@ -46,6 +47,7 @@ public: virtual void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitDataRegion(MCDataRegionType Kind); virtual void EmitThumbFunc(MCSymbol *Func); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -72,7 +74,7 @@ public: llvm_unreachable("macho doesn't support this directive"); } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); @@ -138,6 +140,26 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); } +void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) { + // Create a temporary label to mark the start of the data region. + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + // Record the region for the object writer to use. + DataRegionData Data = { Kind, Start, NULL }; + std::vector &Regions = getAssembler().getDataRegions(); + Regions.push_back(Data); +} + +void MCMachOStreamer::EmitDataRegionEnd() { + std::vector &Regions = getAssembler().getDataRegions(); + assert(Regions.size() && "Mismatched .end_data_region!"); + DataRegionData &Data = Regions.back(); + assert(Data.End == NULL && "Mismatched .end_data_region!"); + // Create a temporary label to mark the end of the data region. + Data.End = getContext().CreateTempSymbol(); + EmitLabel(Data.End); +} + void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { // Let the target do whatever target specific stuff it needs to do. getAssembler().getBackend().handleAssemblerFlag(Flag); @@ -153,6 +175,26 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } } +void MCMachOStreamer::EmitDataRegion(MCDataRegionType Kind) { + switch (Kind) { + case MCDR_DataRegion: + EmitDataRegion(DataRegionData::Data); + return; + case MCDR_DataRegionJT8: + EmitDataRegion(DataRegionData::JumpTable8); + return; + case MCDR_DataRegionJT16: + EmitDataRegion(DataRegionData::JumpTable16); + return; + case MCDR_DataRegionJT32: + EmitDataRegion(DataRegionData::JumpTable32); + return; + case MCDR_DataRegionEnd: + EmitDataRegionEnd(); + return; + } +} + void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { // Remember that the function is a thumb function. Fixup and relocation // values will need adjusted. @@ -284,7 +326,7 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); // The symbol may not be present, which only creates the section. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 7ff2d1b..4c17d91 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -63,7 +63,7 @@ namespace { virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) {} + uint64_t Size = 0, unsigned ByteAlignment = 0) {} virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} @@ -82,7 +82,7 @@ namespace { virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) { return false; } - + virtual void EmitFileDirective(StringRef Filename) {} virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename) { @@ -99,12 +99,12 @@ namespace { virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { RecordProcEnd(Frame); } - + /// @} }; } - + MCStreamer *llvm::createNullStreamer(MCContext &Context) { return new MCNullStreamer(Context); } diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index b22ae33..29b4a94 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -169,7 +169,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { Ctx->getMachOSection("__DWARF", "__apple_types", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); - + DwarfAbbrevSection = Ctx->getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, @@ -507,15 +507,13 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { PDataSection = Ctx->getCOFFSection(".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); XDataSection = Ctx->getCOFFSection(".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); TLSDataSection = Ctx->getCOFFSection(".tls$", diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 030f247..94d7cd6 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -17,40 +17,6 @@ using namespace llvm; MCObjectWriter::~MCObjectWriter() { } -/// Utility function to encode a SLEB128 value. -void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) { - bool More; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); - if (More) - Byte |= 0x80; // Mark this byte that that more bytes will follow. - OS << char(Byte); - } while (More); -} - -/// Utility function to encode a ULEB128 value. -void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS, - unsigned Padding) { - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - if (Value != 0 || Padding != 0) - Byte |= 0x80; // Mark this byte that that more bytes will follow. - OS << char(Byte); - } while (Value != 0); - - // Pad with 0x80 and emit a null byte at the end. - if (Padding != 0) { - for (; Padding != 1; --Padding) - OS << '\x80'; - OS << '\x00'; - } -} - bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm, const MCSymbolRefExpr *A, diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 8aef43c..b67c769 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -45,13 +45,18 @@ FatalAssemblerWarnings("fatal-assembler-warnings", namespace { /// \brief Helper class for tracking macro definitions. +typedef std::vector MacroArgument; +typedef std::vector MacroArguments; +typedef StringRef MacroParameter; +typedef std::vector MacroParameters; + struct Macro { StringRef Name; StringRef Body; - std::vector Parameters; + MacroParameters Parameters; public: - Macro(StringRef N, StringRef B, const std::vector &P) : + Macro(StringRef N, StringRef B, const MacroParameters &P) : Name(N), Body(B), Parameters(P) {} }; @@ -178,9 +183,9 @@ private: bool ParseCppHashLineFilenameComment(const SMLoc &L); bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); - bool expandMacro(SmallString<256> &Buf, StringRef Body, - const std::vector &Parameters, - const std::vector > &A, + bool expandMacro(raw_svector_ostream &OS, StringRef Body, + const MacroParameters &Parameters, + const MacroArguments &A, const SMLoc &L); void HandleMacroExit(); @@ -204,11 +209,18 @@ private: void EatToEndOfStatement(); + bool ParseMacroArgument(MacroArgument &MA); + bool ParseMacroArguments(const Macro *M, MacroArguments &A); + /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit /// will be either the EndOfStatement or EOF. StringRef ParseStringToEndOfStatement(); + /// \brief Parse until the end of a statement or a comma is encountered, + /// return the contents from the current token up to the end or comma. + StringRef ParseStringToComma(); + bool ParseAssignment(StringRef Name, bool allow_redef); bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); @@ -245,6 +257,10 @@ private: bool ParseDirectiveIncbin(); // ".incbin" bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + // ".ifb" or ".ifnb", depending on ExpectBlank. + bool ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); + // ".ifc" or ".ifnc", depending on ExpectEqual. + bool ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual); // ".ifdef" or ".ifndef", depending on expect_defined bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" @@ -257,6 +273,15 @@ private: const MCExpr *ApplyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant); + + // Macro-like directives + Macro *ParseMacroLikeBody(SMLoc DirectiveLoc); + void InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc, + raw_svector_ostream &OS); + bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept" + bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" + bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" + bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" }; /// \brief Generic implementations of directive handling, etc. which is shared @@ -328,6 +353,7 @@ public: AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectivePurgeMacro>(".purgem"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128"); @@ -359,6 +385,7 @@ public: bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc); + bool ParseDirectivePurgeMacro(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveLEB128(StringRef, SMLoc); }; @@ -456,7 +483,7 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { } /// Process the specified .incbin file by seaching for it in the include paths -/// then just emiting the byte contents of the file to the streamer. This +/// then just emitting the byte contents of the file to the streamer. This /// returns true on failure. bool AsmParser::ProcessIncbinFile(const std::string &Filename) { std::string IncludedFile; @@ -602,6 +629,18 @@ StringRef AsmParser::ParseStringToEndOfStatement() { return StringRef(Start, End - Start); } +StringRef AsmParser::ParseStringToComma() { + const char *Start = getTok().getLoc().getPointer(); + + while (Lexer.isNot(AsmToken::EndOfStatement) && + Lexer.isNot(AsmToken::Comma) && + Lexer.isNot(AsmToken::Eof)) + Lex(); + + const char *End = getTok().getLoc().getPointer(); + return StringRef(Start, End - Start); +} + /// ParseParenExpr - Parse a paren expression and return it. /// NOTE: This assumes the leading '(' has already been consumed. /// @@ -700,7 +739,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { IDVal == "f" ? 1 : 0); Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext()); - if(IDVal == "b" && Sym->isUndefined()) + if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); EndLoc = Lexer.getLoc(); Lex(); // Eat identifier. @@ -1042,6 +1081,14 @@ bool AsmParser::ParseStatement() { // example. if (IDVal == ".if") return ParseDirectiveIf(IDLoc); + if (IDVal == ".ifb") + return ParseDirectiveIfb(IDLoc, true); + if (IDVal == ".ifnb") + return ParseDirectiveIfb(IDLoc, false); + if (IDVal == ".ifc") + return ParseDirectiveIfc(IDLoc, true); + if (IDVal == ".ifnc") + return ParseDirectiveIfc(IDLoc, false); if (IDVal == ".ifdef") return ParseDirectiveIfdef(IDLoc, true); if (IDVal == ".ifndef" || IDVal == ".ifnotdef") @@ -1123,6 +1170,11 @@ bool AsmParser::ParseStatement() { // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.' && IDVal != ".") { + + // Target hook for parsing target specific directives. + if (!getTargetParser().ParseDirective(ID)) + return false; + // Assembler features if (IDVal == ".set" || IDVal == ".equ") return ParseDirectiveSet(IDVal, true); @@ -1192,6 +1244,10 @@ bool AsmParser::ParseStatement() { // Symbol attribute directives + if (IDVal == ".extern") { + EatToEndOfStatement(); // .extern is the default, ignore it. + return false; + } if (IDVal == ".globl" || IDVal == ".global") return ParseDirectiveSymbolAttribute(MCSA_Global); if (IDVal == ".indirect_symbol") @@ -1225,22 +1281,27 @@ bool AsmParser::ParseStatement() { if (IDVal == ".incbin") return ParseDirectiveIncbin(); - if (IDVal == ".code16") + if (IDVal == ".code16" || IDVal == ".code16gcc") return TokError(Twine(IDVal) + " not supported yet"); + // Macro-like directives + if (IDVal == ".rept") + return ParseDirectiveRept(IDLoc); + if (IDVal == ".irp") + return ParseDirectiveIrp(IDLoc); + if (IDVal == ".irpc") + return ParseDirectiveIrpc(IDLoc); + if (IDVal == ".endr") + return ParseDirectiveEndr(IDLoc); + // Look up the handler in the handler table. std::pair Handler = DirectiveMap.lookup(IDVal); if (Handler.first) return (*Handler.second)(Handler.first, IDVal, IDLoc); - // Target hook for parsing target specific directives. - if (!getTargetParser().ParseDirective(ID)) - return false; - bool retval = Warning(IDLoc, "ignoring directive for now"); - EatToEndOfStatement(); - return retval; + return Error(IDLoc, "unknown directive"); } CheckForValidSection(); @@ -1339,7 +1400,7 @@ bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { return false; } -/// DiagHandler - will use the the last parsed cpp hash line filename comment +/// DiagHandler - will use the last parsed cpp hash line filename comment /// for the Filename and LineNo if any in the diagnostic. void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { const AsmParser *Parser = static_cast(Context); @@ -1393,11 +1454,10 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { NewDiag.print(0, OS); } -bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, - const std::vector &Parameters, - const std::vector > &A, +bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, + const MacroParameters &Parameters, + const MacroArguments &A, const SMLoc &L) { - raw_svector_ostream OS(Buf); unsigned NParameters = Parameters.size(); if (NParameters != 0 && NParameters != A.size()) return Error(L, "Wrong number of arguments"); @@ -1449,7 +1509,7 @@ bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, break; // Otherwise substitute with the token values, with spaces eliminated. - for (std::vector::const_iterator it = A[Index].begin(), + for (MacroArgument::const_iterator it = A[Index].begin(), ie = A[Index].end(); it != ie; ++it) OS << it->getString(); break; @@ -1472,7 +1532,7 @@ bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, if (Index == NParameters) return Error(L, "Parameter not found"); - for (std::vector::const_iterator it = A[Index].begin(), + for (MacroArgument::const_iterator it = A[Index].begin(), ie = A[Index].end(); it != ie; ++it) OS << it->getString(); @@ -1482,9 +1542,6 @@ bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body, Body = Body.substr(Pos); } - // We include the .endmacro in the buffer as our queue to exit the macro - // instantiation. - OS << ".endmacro\n"; return false; } @@ -1494,55 +1551,97 @@ MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL, { } -bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, - const Macro *M) { - // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate - // this, although we should protect against infinite loops. - if (ActiveMacros.size() == 20) - return TokError("macros cannot be nested more than 20 levels deep"); - - // Parse the macro instantiation arguments. - std::vector > MacroArguments; - MacroArguments.push_back(std::vector()); +/// ParseMacroArgument - Extract AsmTokens for a macro argument. +/// This is used for both default macro parameter values and the +/// arguments in macro invocations +bool AsmParser::ParseMacroArgument(MacroArgument &MA) { unsigned ParenLevel = 0; + for (;;) { - if (Lexer.is(AsmToken::Eof)) + SMLoc LastTokenLoc; + + if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) return TokError("unexpected token in macro instantiation"); + + // HandleMacroEntry relies on not advancing the lexer here + // to be able to fill in the remaining default parameter values if (Lexer.is(AsmToken::EndOfStatement)) break; + if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) + break; - // If we aren't inside parentheses and this is a comma, start a new token - // list. - if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) { - MacroArguments.push_back(std::vector()); - } else { - // Adjust the current parentheses level. - if (Lexer.is(AsmToken::LParen)) - ++ParenLevel; - else if (Lexer.is(AsmToken::RParen) && ParenLevel) - --ParenLevel; - - // Append the token to the current argument list. - MacroArguments.back().push_back(getTok()); - } + // Adjust the current parentheses level. + if (Lexer.is(AsmToken::LParen)) + ++ParenLevel; + else if (Lexer.is(AsmToken::RParen) && ParenLevel) + --ParenLevel; + + // Append the token to the current argument list. + MA.push_back(getTok()); Lex(); } - // If the last argument didn't end up with any tokens, it's not a real - // argument and we should remove it from the list. This happens with either - // a tailing comma or an empty argument list. - if (MacroArguments.back().empty()) - MacroArguments.pop_back(); + if (ParenLevel != 0) + return TokError("unbalanced parenthesises in macro argument"); + return false; +} + +// Parse the macro instantiation arguments. +bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) { + const unsigned NParameters = M ? M->Parameters.size() : 0; + + // Parse two kinds of macro invocations: + // - macros defined without any parameters accept an arbitrary number of them + // - macros defined with parameters accept at most that many of them + for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; + ++Parameter) { + MacroArgument MA; + + if (ParseMacroArgument(MA)) + return true; + + A.push_back(MA); + + if (Lexer.is(AsmToken::EndOfStatement)) + return false; + + if (Lexer.is(AsmToken::Comma)) + Lex(); + } + return TokError("Too many arguments"); +} + +bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, + const Macro *M) { + // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate + // this, although we should protect against infinite loops. + if (ActiveMacros.size() == 20) + return TokError("macros cannot be nested more than 20 levels deep"); + + MacroArguments A; + if (ParseMacroArguments(M, A)) + return true; + + // Remove any trailing empty arguments. Do this after-the-fact as we have + // to keep empty arguments in the middle of the list or positionality + // gets off. e.g., "foo 1, , 2" vs. "foo 1, 2," + while (!A.empty() && A.back().empty()) + A.pop_back(); // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. SmallString<256> Buf; StringRef Body = M->Body; + raw_svector_ostream OS(Buf); - if (expandMacro(Buf, Body, M->Parameters, MacroArguments, getTok().getLoc())) + if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc())) return true; + // We include the .endmacro in the buffer as our queue to exit the macro + // instantiation. + OS << ".endmacro\n"; + MemoryBuffer *Instantiation = - MemoryBuffer::getMemBufferCopy(Buf.str(), ""); + MemoryBuffer::getMemBufferCopy(OS.str(), ""); // Create the macro instantiation object and add to the current macro // instantiation stack. @@ -2295,10 +2394,9 @@ bool AsmParser::ParseDirectiveIncbin() { bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; - if(TheCondState.Ignore) { + if (TheCondState.Ignore) { EatToEndOfStatement(); - } - else { + } else { int64_t ExprValue; if (ParseAbsoluteExpression(ExprValue)) return true; @@ -2315,6 +2413,61 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { return false; } +/// ParseDirectiveIfb +/// ::= .ifb string +bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + StringRef Str = ParseStringToEndOfStatement(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.ifb' directive"); + + Lex(); + + TheCondState.CondMet = ExpectBlank == Str.empty(); + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveIfc +/// ::= .ifc string1, string2 +bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { + TheCondStack.push_back(TheCondState); + TheCondState.TheCond = AsmCond::IfCond; + + if (TheCondState.Ignore) { + EatToEndOfStatement(); + } else { + StringRef Str1 = ParseStringToComma(); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.ifc' directive"); + + Lex(); + + StringRef Str2 = ParseStringToEndOfStatement(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.ifc' directive"); + + Lex(); + + TheCondState.CondMet = ExpectEqual == (Str1 == Str2); + TheCondState.Ignore = !TheCondState.CondMet; + } + + return false; +} + +/// ParseDirectiveIfdef +/// ::= .ifdef symbol bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { StringRef Name; TheCondStack.push_back(TheCondState); @@ -2853,7 +3006,7 @@ bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal, /// ParseDirectiveCFIRestore /// ::= .cfi_restore register bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal, - SMLoc DirectiveLoc) { + SMLoc DirectiveLoc) { int64_t Register = 0; if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; @@ -2866,7 +3019,7 @@ bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal, /// ParseDirectiveCFIEscape /// ::= .cfi_escape expression[,...] bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal, - SMLoc DirectiveLoc) { + SMLoc DirectiveLoc) { std::string Values; int64_t CurrValue; if (getParser().ParseAbsoluteExpression(CurrValue)) @@ -2922,7 +3075,7 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, if (getParser().ParseIdentifier(Name)) return TokError("expected identifier in directive"); - std::vector Parameters; + MacroParameters Parameters; if (getLexer().isNot(AsmToken::EndOfStatement)) { for(;;) { StringRef Parameter; @@ -2981,7 +3134,7 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, /// ::= .endm /// ::= .endmacro bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, - SMLoc DirectiveLoc) { + SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '" + Directive + "' directive"); @@ -2998,6 +3151,27 @@ bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, "no current macro definition"); } +/// ParseDirectivePurgeMacro +/// ::= .purgem +bool GenericAsmParser::ParseDirectivePurgeMacro(StringRef Directive, + SMLoc DirectiveLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in '.purgem' directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.purgem' directive"); + + StringMap::iterator I = getParser().MacroMap.find(Name); + if (I == getParser().MacroMap.end()) + return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); + + // Undefine the macro. + delete I->getValue(); + getParser().MacroMap.erase(I); + return false; +} + bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { getParser().CheckForValidSection(); @@ -3017,6 +3191,217 @@ bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) { return false; } +Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { + AsmToken EndToken, StartToken = getTok(); + + unsigned NestLevel = 0; + for (;;) { + // Check whether we have reached the end of the file. + if (getLexer().is(AsmToken::Eof)) { + Error(DirectiveLoc, "no matching '.endr' in definition"); + return 0; + } + + if (Lexer.is(AsmToken::Identifier) && + (getTok().getIdentifier() == ".rept")) { + ++NestLevel; + } + + // Otherwise, check whether we have reached the .endr. + if (Lexer.is(AsmToken::Identifier) && + getTok().getIdentifier() == ".endr") { + if (NestLevel == 0) { + EndToken = getTok(); + Lex(); + if (Lexer.isNot(AsmToken::EndOfStatement)) { + TokError("unexpected token in '.endr' directive"); + return 0; + } + break; + } + --NestLevel; + } + + // Otherwise, scan till the end of the statement. + EatToEndOfStatement(); + } + + const char *BodyStart = StartToken.getLoc().getPointer(); + const char *BodyEnd = EndToken.getLoc().getPointer(); + StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); + + // We Are Anonymous. + StringRef Name; + MacroParameters Parameters; + return new Macro(Name, Body, Parameters); +} + +void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc, + raw_svector_ostream &OS) { + OS << ".endr\n"; + + MemoryBuffer *Instantiation = + MemoryBuffer::getMemBufferCopy(OS.str(), ""); + + // Create the macro instantiation object and add to the current macro + // instantiation stack. + MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc, + getTok().getLoc(), + Instantiation); + ActiveMacros.push_back(MI); + + // Jump to the macro instantiation and prime the lexer. + CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc()); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lex(); +} + +bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { + int64_t Count; + if (ParseAbsoluteExpression(Count)) + return TokError("unexpected token in '.rept' directive"); + + if (Count < 0) + return TokError("Count is negative"); + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.rept' directive"); + + // Eat the end of statement. + Lex(); + + // Lex the rept definition. + Macro *M = ParseMacroLikeBody(DirectiveLoc); + if (!M) + return true; + + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + MacroParameters Parameters; + MacroArguments A; + raw_svector_ostream OS(Buf); + while (Count--) { + if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc())) + return true; + } + InstantiateMacroLikeBody(M, DirectiveLoc, OS); + + return false; +} + +/// ParseDirectiveIrp +/// ::= .irp symbol,values +bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { + MacroParameters Parameters; + MacroParameter Parameter; + + if (ParseIdentifier(Parameter)) + return TokError("expected identifier in '.irp' directive"); + + Parameters.push_back(Parameter); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("expected comma in '.irp' directive"); + + Lex(); + + MacroArguments A; + if (ParseMacroArguments(0, A)) + return true; + + // Eat the end of statement. + Lex(); + + // Lex the irp definition. + Macro *M = ParseMacroLikeBody(DirectiveLoc); + if (!M) + return true; + + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + for (std::vector::iterator i = A.begin(), e = A.end(); i != e; + ++i) { + std::vector Args; + Args.push_back(*i); + + if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) + return true; + } + + InstantiateMacroLikeBody(M, DirectiveLoc, OS); + + return false; +} + +/// ParseDirectiveIrpc +/// ::= .irpc symbol,values +bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { + MacroParameters Parameters; + MacroParameter Parameter; + + if (ParseIdentifier(Parameter)) + return TokError("expected identifier in '.irpc' directive"); + + Parameters.push_back(Parameter); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("expected comma in '.irpc' directive"); + + Lex(); + + MacroArguments A; + if (ParseMacroArguments(0, A)) + return true; + + if (A.size() != 1 || A.front().size() != 1) + return TokError("unexpected token in '.irpc' directive"); + + // Eat the end of statement. + Lex(); + + // Lex the irpc definition. + Macro *M = ParseMacroLikeBody(DirectiveLoc); + if (!M) + return true; + + // Macro instantiation is lexical, unfortunately. We construct a new buffer + // to hold the macro body with substitutions. + SmallString<256> Buf; + raw_svector_ostream OS(Buf); + + StringRef Values = A.front().front().getString(); + std::size_t I, End = Values.size(); + for (I = 0; I < End; ++I) { + MacroArgument Arg; + Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); + + MacroArguments Args; + Args.push_back(Arg); + + if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) + return true; + } + + InstantiateMacroLikeBody(M, DirectiveLoc, OS); + + return false; +} + +bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { + if (ActiveMacros.empty()) + return TokError("unexpected '.endr' directive, no current .rept"); + + // The only .repl that should get here are the ones created by + // InstantiateMacroLikeBody. + assert(getLexer().is(AsmToken::EndOfStatement)); + + HandleMacroExit(); + return false; +} /// \brief Create an MCAsmParser instance. MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 6f45068..18033d0 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/MemoryBuffer.h" @@ -49,6 +50,9 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( ".secure_log_unique"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( @@ -56,6 +60,9 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegion>(".data_region"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDataRegionEnd>(".end_data_region"); + // Special section directives. AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data"); @@ -108,11 +115,16 @@ public: bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); bool ParseDirectiveLsym(StringRef, SMLoc); bool ParseDirectiveSection(StringRef, SMLoc); + bool ParseDirectivePushSection(StringRef, SMLoc); + bool ParseDirectivePopSection(StringRef, SMLoc); + bool ParseDirectivePrevious(StringRef, SMLoc); bool ParseDirectiveSecureLogReset(StringRef, SMLoc); bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); bool ParseDirectiveTBSS(StringRef, SMLoc); bool ParseDirectiveZerofill(StringRef, SMLoc); + bool ParseDirectiveDataRegion(StringRef, SMLoc); + bool ParseDirectiveDataRegionEnd(StringRef, SMLoc); // Named Section Directive bool ParseSectionDirectiveConst(StringRef, SMLoc) { @@ -291,7 +303,7 @@ public: }; -} +} // end anonymous namespace bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, const char *Section, @@ -451,6 +463,37 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { return false; } +/// ParseDirectivePushSection: +/// ::= .pushsection identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectivePushSection(StringRef S, SMLoc Loc) { + getStreamer().PushSection(); + + if (ParseDirectiveSection(S, Loc)) { + getStreamer().PopSection(); + return true; + } + + return false; +} + +/// ParseDirectivePopSection: +/// ::= .popsection +bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { + if (!getStreamer().PopSection()) + return TokError(".popsection without corresponding .pushsection"); + return false; +} + +/// ParseDirectivePrevious: +/// ::= .previous +bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { + const MCSection *PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection == NULL) + return TokError(".previous without corresponding .section"); + getStreamer().SwitchSection(PreviousSection); + return false; +} + /// ParseDirectiveSecureLogUnique /// ::= .secure_log_unique ... message ... bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { @@ -659,10 +702,46 @@ bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { return false; } +/// ParseDirectiveDataRegion +/// ::= .data_region [ ( jt8 | jt16 | jt32 ) ] +bool DarwinAsmParser::ParseDirectiveDataRegion(StringRef, SMLoc) { + if (getLexer().is(AsmToken::EndOfStatement)) { + Lex(); + getStreamer().EmitDataRegion(MCDR_DataRegion); + return false; + } + StringRef RegionType; + SMLoc Loc = getParser().getTok().getLoc(); + if (getParser().ParseIdentifier(RegionType)) + return TokError("expected region type after '.data_region' directive"); + int Kind = StringSwitch(RegionType) + .Case("jt8", MCDR_DataRegionJT8) + .Case("jt16", MCDR_DataRegionJT16) + .Case("jt32", MCDR_DataRegionJT32) + .Default(-1); + if (Kind == -1) + return Error(Loc, "unknown region type in '.data_region' directive"); + Lex(); + + getStreamer().EmitDataRegion((MCDataRegionType)Kind); + return false; +} + +/// ParseDirectiveDataRegionEnd +/// ::= .end_data_region +bool DarwinAsmParser::ParseDirectiveDataRegionEnd(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.end_data_region' directive"); + + Lex(); + getStreamer().EmitDataRegion(MCDR_DataRegionEnd); + return false; +} + namespace llvm { MCAsmParserExtension *createDarwinAsmParser() { return new DarwinAsmParser; } -} +} // end llvm namespace diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index ffc400b..9316bb1 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -64,6 +64,7 @@ public: AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver"); + AddDirectiveHandler<&ELFAsmParser::ParseDirectiveVersion>(".version"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".weak"); AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymbolAttribute>(".local"); @@ -141,6 +142,7 @@ public: bool ParseDirectiveType(StringRef, SMLoc); bool ParseDirectiveIdent(StringRef, SMLoc); bool ParseDirectiveSymver(StringRef, SMLoc); + bool ParseDirectiveVersion(StringRef, SMLoc); bool ParseDirectiveWeakref(StringRef, SMLoc); bool ParseDirectiveSymbolAttribute(StringRef, SMLoc); @@ -548,6 +550,32 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) { return false; } +/// ParseDirectiveVersion +/// ::= .version string +bool ELFAsmParser::ParseDirectiveVersion(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::String)) + return TokError("unexpected token in '.version' directive"); + + StringRef Data = getTok().getIdentifier(); + + Lex(); + + const MCSection *Note = + getContext().getELFSection(".note", ELF::SHT_NOTE, 0, + SectionKind::getReadOnly()); + + getStreamer().PushSection(); + getStreamer().SwitchSection(Note); + getStreamer().EmitIntValue(Data.size()+1, 4); // namesz. + getStreamer().EmitIntValue(0, 4); // descsz = 0 (no description). + getStreamer().EmitIntValue(1, 4); // type = NT_VERSION. + getStreamer().EmitBytes(Data, 0); // name. + getStreamer().EmitIntValue(0, 1); // terminate the string. + getStreamer().EmitValueToAlignment(4); // ensure 4 byte alignment. + getStreamer().PopSection(); + return false; +} + /// ParseDirectiveWeakref /// ::= .weakref foo, bar bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp index a770c97..9ccab93 100644 --- a/lib/MC/MCPureStreamer.cpp +++ b/lib/MC/MCPureStreamer.cpp @@ -39,7 +39,7 @@ public: virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); + uint64_t Size = 0, unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -144,7 +144,7 @@ void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { } void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) { + uint64_t Size, unsigned ByteAlignment) { report_fatal_error("not yet implemented in pure streamer"); } diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp new file mode 100644 index 0000000..4d1aff3 --- /dev/null +++ b/lib/MC/MCRegisterInfo.cpp @@ -0,0 +1,71 @@ +//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements MCRegisterInfo functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCRegisterInfo.h" + +using namespace llvm; + +unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + const MCRegisterClass *RC) const { + for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers) + if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx)) + return *Supers; + return 0; +} + +unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { + // Get a pointer to the corresponding SubRegIndices list. This list has the + // name of each sub-register in the same order as MCSubRegIterator. + const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices; + for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI) + if (*SRI == Idx) + return *Subs; + return 0; +} + +unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const { + // Get a pointer to the corresponding SubRegIndices list. This list has the + // name of each sub-register in the same order as MCSubRegIterator. + const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices; + for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI) + if (*Subs == SubReg) + return *SRI; + return 0; +} + +int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; + unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + if (I == M+Size || I->FromReg != RegNum) + return -1; + return I->ToReg; +} + +int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { + const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; + unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum"); + return I->ToReg; +} + +int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const { + const DenseMap::const_iterator I = L2SEHRegs.find(RegNum); + if (I == L2SEHRegs.end()) return (int)RegNum; + return I->second; +} diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index 90091f0..aac9377 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -20,7 +20,7 @@ MCSectionCOFF::~MCSectionCOFF() {} // anchor. // should be printed before the section name bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { - + // FIXME: Does .section .bss/.data/.text work everywhere?? if (Name == ".text" || Name == ".data" || Name == ".bss") return true; @@ -30,7 +30,7 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const { - + // standard sections don't require the '.section' if (ShouldOmitSectionDirective(SectionName, MAI)) { OS << '\t' << getSectionName() << '\n'; @@ -47,7 +47,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; OS << "\"\n"; - + if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { switch (Selection) { case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index dfd77c3..0775cfa 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -22,7 +22,7 @@ MCSectionELF::~MCSectionELF() {} // anchor. // should be printed before the section name bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const { - + // FIXME: Does .section .bss/.data/.text work everywhere?? if (Name == ".text" || Name == ".data" || (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS())) @@ -33,7 +33,7 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const { - + if (ShouldOmitSectionDirective(SectionName, MAI)) { OS << '\t' << getSectionName() << '\n'; return; @@ -62,7 +62,7 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, } // Handle the weird solaris syntax if desired. - if (MAI.usesSunStyleELFSectionSwitchSyntax() && + if (MAI.usesSunStyleELFSectionSwitchSyntax() && !(Flags & ELF::SHF_MERGE)) { if (Flags & ELF::SHF_ALLOC) OS << ",#alloc"; @@ -75,7 +75,7 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << '\n'; return; } - + OS << ",\""; if (Flags & ELF::SHF_ALLOC) OS << 'a'; @@ -91,13 +91,13 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << 'S'; if (Flags & ELF::SHF_TLS) OS << 'T'; - + // If there are target-specific flags, print them. if (Flags & ELF::XCORE_SHF_CP_SECTION) OS << 'c'; if (Flags & ELF::XCORE_SHF_DP_SECTION) OS << 'd'; - + OS << '"'; OS << ','; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 43e62ff..0bac24d 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -15,17 +15,15 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/LEB128.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include using namespace llvm; -MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), EmitEHFrame(true), - EmitDebugFrame(false), - CurrentW64UnwindInfo(0), - LastSymbol(0), - UniqueCodeBeginSuffix(0), - UniqueDataBeginSuffix(0) { +MCStreamer::MCStreamer(MCContext &Ctx) + : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), + CurrentW64UnwindInfo(0), LastSymbol(0) { const MCSection *section = NULL; SectionStack.push_back(std::make_pair(section, section)); } @@ -97,7 +95,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, unsigned Padding) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeULEB128(Value, OSE, Padding); + encodeULEB128(Value, OSE, Padding); EmitBytes(OSE.str(), AddrSpace); } @@ -106,7 +104,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeSLEB128(Value, OSE); + encodeSLEB128(Value, OSE); EmitBytes(OSE.str(), AddrSpace); } @@ -183,85 +181,6 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) { LastSymbol = Symbol; } -void MCStreamer::EmitDataRegion() { - if (RegionIndicator == Data) return; - - MCContext &Context = getContext(); - const MCAsmInfo &MAI = Context.getAsmInfo(); - if (!MAI.getSupportsDataRegions()) return; - - // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getDataBeginLabelName() + - Twine(UniqueDataBeginSuffix++)); - EmitLabel(NewSym); - - RegionIndicator = Data; -} - -void MCStreamer::EmitCodeRegion() { - if (RegionIndicator == Code) return; - - MCContext &Context = getContext(); - const MCAsmInfo &MAI = Context.getAsmInfo(); - if (!MAI.getSupportsDataRegions()) return; - - // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getCodeBeginLabelName() + - Twine(UniqueCodeBeginSuffix++)); - EmitLabel(NewSym); - - RegionIndicator = Code; -} - -void MCStreamer::EmitJumpTable8Region() { - if (RegionIndicator == JumpTable8) return; - - MCContext &Context = getContext(); - const MCAsmInfo &MAI = Context.getAsmInfo(); - if (!MAI.getSupportsDataRegions()) return; - - // Generate a unique symbol name. - MCSymbol *NewSym = - Context.GetOrCreateSymbol(MAI.getJumpTable8BeginLabelName() + - Twine(UniqueDataBeginSuffix++)); - EmitLabel(NewSym); - - RegionIndicator = JumpTable8; -} - -void MCStreamer::EmitJumpTable16Region() { - if (RegionIndicator == JumpTable16) return; - - MCContext &Context = getContext(); - const MCAsmInfo &MAI = Context.getAsmInfo(); - if (!MAI.getSupportsDataRegions()) return; - - // Generate a unique symbol name. - MCSymbol *NewSym = - Context.GetOrCreateSymbol(MAI.getJumpTable16BeginLabelName() + - Twine(UniqueDataBeginSuffix++)); - EmitLabel(NewSym); - - RegionIndicator = JumpTable16; -} - - -void MCStreamer::EmitJumpTable32Region() { - if (RegionIndicator == JumpTable32) return; - - MCContext &Context = getContext(); - const MCAsmInfo &MAI = Context.getAsmInfo(); - if (!MAI.getSupportsDataRegions()) return; - - // Generate a unique symbol name. - MCSymbol *NewSym = - Context.GetOrCreateSymbol(MAI.getJumpTable32BeginLabelName() + - Twine(UniqueDataBeginSuffix++)); - EmitLabel(NewSym); - - RegionIndicator = JumpTable32; -} - void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); @@ -283,7 +202,6 @@ void MCStreamer::EmitCFIStartProc() { EmitCFIStartProcImpl(Frame); FrameInfos.push_back(Frame); - RegionIndicator = Code; } void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 86dc108..05c83f7 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -17,11 +17,13 @@ using namespace llvm; +MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors. + void MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS, const SubtargetFeatureKV *PF, const SubtargetFeatureKV *PD, - const SubtargetInfoKV *PI, + const SubtargetInfoKV *ProcSched, const InstrStage *IS, const unsigned *OC, const unsigned *FP, @@ -29,10 +31,10 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS, TargetTriple = TT; ProcFeatures = PF; ProcDesc = PD; - ProcItins = PI; + ProcSchedModel = ProcSched; Stages = IS; OperandCycles = OC; - ForwardingPathes = FP; + ForwardingPaths = FP; NumFeatures = NF; NumProcs = NP; @@ -68,14 +70,14 @@ uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) { } -InstrItineraryData -MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { - assert(ProcItins && "Instruction itineraries information not available!"); +MCSchedModel * +MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { + assert(ProcSchedModel && "Processor machine model not available!"); #ifndef NDEBUG for (size_t i = 1; i < NumProcs; i++) { - assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 && - "Itineraries table is not sorted"); + assert(strcmp(ProcSchedModel[i - 1].Key, ProcSchedModel[i].Key) < 0 && + "Processor machine model table is not sorted"); } #endif @@ -83,14 +85,19 @@ MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { SubtargetInfoKV KV; KV.Key = CPU.data(); const SubtargetInfoKV *Found = - std::lower_bound(ProcItins, ProcItins+NumProcs, KV); - if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) { + std::lower_bound(ProcSchedModel, ProcSchedModel+NumProcs, KV); + if (Found == ProcSchedModel+NumProcs || StringRef(Found->Key) != CPU) { errs() << "'" << CPU << "' is not a recognized processor for this target" << " (ignoring processor)\n"; - return InstrItineraryData(); + return &MCSchedModel::DefaultSchedModel; } + assert(Found->Value && "Missing processor SchedModel value"); + return (MCSchedModel *)Found->Value; +} - return InstrItineraryData(Stages, OperandCycles, ForwardingPathes, - (InstrItinerary *)Found->Value); +InstrItineraryData +MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const { + MCSchedModel *SchedModel = getSchedModelForCPU(CPU); + return InstrItineraryData(SchedModel, Stages, OperandCycles, ForwardingPaths); } diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index e013e77..f7f9184 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -30,7 +30,7 @@ static bool isAcceptableChar(char C) { /// syntactically correct. static bool NameNeedsQuoting(StringRef Str) { assert(!Str.empty() && "Cannot create an empty MCSymbol"); - + // If any of the characters in the string is an unacceptable character, force // quotes. for (unsigned i = 0, e = Str.size(); i != e; ++i) @@ -72,7 +72,7 @@ void MCSymbol::print(raw_ostream &OS) const { OS << getName(); return; } - + OS << '"' << getName() << '"'; } diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 79e66fc..c05b4b1 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -228,8 +228,7 @@ static const MCSection *getWin64EHTableSection(StringRef suffix, return context.getCOFFSection((".xdata"+suffix).str(), COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); } @@ -239,8 +238,7 @@ static const MCSection *getWin64EHFuncTableSection(StringRef suffix, return context.getObjectFileInfo()->getPDataSection(); return context.getCOFFSection((".pdata"+suffix).str(), COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 8e4066c..5820a22 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include @@ -351,6 +352,21 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, Write32(Address); } +void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, + uint32_t DataOffset, + uint32_t DataSize) { + uint64_t Start = OS.tell(); + (void) Start; + + Write32(Type); + Write32(macho::LinkeditLoadCommandSize); + Write32(DataOffset); + Write32(DataSize); + + assert(OS.tell() - Start == macho::LinkeditLoadCommandSize); +} + + void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -654,6 +670,13 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, macho::DysymtabLoadCommandSize); } + // Add the data-in-code load command size, if used. + unsigned NumDataRegions = Asm.getDataRegions().size(); + if (NumDataRegions) { + ++NumLoadCommands; + LoadCommandsSize += macho::LinkeditLoadCommandSize; + } + // Compute the total size of the section data, as well as its file size and vm // size. uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : @@ -701,6 +724,15 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, RelocTableEnd += NumRelocs * macho::RelocationInfoSize; } + // Write the data-in-code load command, if used. + uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8; + if (NumDataRegions) { + uint64_t DataRegionsOffset = RelocTableEnd; + uint64_t DataRegionsSize = NumDataRegions * 8; + WriteLinkeditLoadCommand(macho::LCT_DataInCode, DataRegionsOffset, + DataRegionsSize); + } + // Write the symbol table load command, if used. if (NumSymbols) { unsigned FirstLocalSymbol = 0; @@ -717,10 +749,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // If used, the indirect symbols are written after the section data. if (NumIndirectSymbols) - IndirectSymbolOffset = RelocTableEnd; + IndirectSymbolOffset = DataInCodeTableEnd; // The symbol table is written after the indirect symbol data. - uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + uint64_t SymbolTableOffset = DataInCodeTableEnd + IndirectSymbolSize; // The string table is written after symbol table. uint64_t StringTableOffset = @@ -760,6 +792,23 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, } } + // Write out the data-in-code region payload, if there is one. + for (MCAssembler::const_data_region_iterator + it = Asm.data_region_begin(), ie = Asm.data_region_end(); + it != ie; ++it) { + const DataRegionData *Data = &(*it); + uint64_t Start = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->Start), Layout); + uint64_t End = getSymbolAddress(&Layout.getAssembler().getSymbolData(*Data->End), Layout); + DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind + << " start: " << Start << "(" << Data->Start->getName() << ")" + << " end: " << End << "(" << Data->End->getName() << ")" + << " size: " << End - Start + << "\n"); + Write32(Start); + Write16(End - Start); + Write16(Data->Kind); + } + // Write the symbol table data, if used. if (NumSymbols) { // Write the indirect symbol entries. diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index be41579..0a44e77 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -92,7 +92,7 @@ static void Split(std::vector &V, const StringRef S) { static std::string Join(const std::vector &V) { // Start with empty string. std::string Result; - // If the vector is not empty + // If the vector is not empty if (!V.empty()) { // Start with the first feature Result = V[0]; @@ -104,7 +104,7 @@ static std::string Join(const std::vector &V) { Result += V[i]; } } - // Return the features string + // Return the features string return Result; } @@ -205,7 +205,7 @@ void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// ClearImpliedBits - For each feature that (transitively) implies this /// feature, clear it. -/// +/// static void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, const SubtargetFeatureKV *FeatureTable, @@ -252,7 +252,7 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature, return Bits; } - + /// getFeatureBits - Get feature bits a CPU. /// @@ -279,7 +279,7 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, // Check if help is needed if (CPU == "help") Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); - + // Find CPU entry if CPU name is specified. if (!CPU.empty()) { const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize); @@ -304,11 +304,11 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, // Iterate through each feature for (size_t i = 0, E = Features.size(); i < E; i++) { const StringRef Feature = Features[i]; - + // Check for help if (Feature == "+help") Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); - + // Find feature in table. const SubtargetFeatureKV *FeatureEntry = Find(StripFlag(Feature), FeatureTable, FeatureTableSize); @@ -349,7 +349,7 @@ void *SubtargetFeatures::getItinerary(const StringRef CPU, // Find entry const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize); - + if (Entry) { return Entry->Value; } else { diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 67dc649..b026277 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -67,7 +67,7 @@ public: virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment); + uint64_t Size,unsigned ByteAlignment); virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); @@ -324,7 +324,7 @@ void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, } void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size,unsigned ByteAlignment) { + uint64_t Size,unsigned ByteAlignment) { llvm_unreachable("not implemented"); } diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index c5f15ba..2a5951a 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -28,7 +28,7 @@ struct ArchiveMemberHeader { char UID[6]; char GID[6]; char AccessMode[8]; - char Size[10]; //< Size of data, not including header or padding. + char Size[10]; ///< Size of data, not including header or padding. char Terminator[2]; ///! Get the name without looking up long names. @@ -60,11 +60,11 @@ static const ArchiveMemberHeader *ToHeader(const char *base) { static bool isInternalMember(const ArchiveMemberHeader &amh) { - const char *internals[] = { + static const char *const internals[] = { "/", "//", "#_LLVM_SYM_TAB_#" - }; + }; StringRef name = amh.getName(); for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) { diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index bd27a56..8ab54c6 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -622,6 +622,28 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol, return object_error::success; } +ArrayRef COFFObjectFile::getSymbolAuxData( + const coff_symbol *symbol) const { + const uint8_t *aux = NULL; + + if ( symbol->NumberOfAuxSymbols > 0 ) { + // AUX data comes immediately after the symbol in COFF + aux = reinterpret_cast(symbol + 1); +# ifndef NDEBUG + // Verify that the aux symbol points to a valid entry in the symbol table. + uintptr_t offset = uintptr_t(aux) - uintptr_t(base()); + if (offset < Header->PointerToSymbolTable + || offset >= Header->PointerToSymbolTable + + (Header->NumberOfSymbols * sizeof(coff_symbol))) + report_fatal_error("Aux Symbol data was outside of symbol table."); + + assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + == 0 && "Aux Symbol data did not point to the beginning of a symbol"); +# endif + } + return ArrayRef(aux, symbol->NumberOfAuxSymbols * sizeof(coff_symbol)); +} + error_code COFFObjectFile::getSectionName(const coff_section *Sec, StringRef &Res) const { StringRef Name; @@ -694,6 +716,20 @@ error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, return object_error::success; } +const coff_section *COFFObjectFile::getCOFFSection(section_iterator &It) const { + return toSec(It->getRawDataRefImpl()); +} + +const coff_symbol *COFFObjectFile::getCOFFSymbol(symbol_iterator &It) const { + return toSymb(It->getRawDataRefImpl()); +} + +const coff_relocation *COFFObjectFile::getCOFFRelocation( + relocation_iterator &It) const { + return toRel(It->getRawDataRefImpl()); +} + + #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \ case COFF::enum: res = #enum; break; diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index b7e5cdc..00dea3f 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -357,6 +357,19 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } +template<> +void SwapStruct(macho::DataInCodeTableEntry &Value) { + SwapValue(Value.Offset); + SwapValue(Value.Length); + SwapValue(Value.Kind); +} +void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset, + unsigned Index, + InMemoryStruct &Res) const { + uint64_t Offset = (TableOffset + + Index * sizeof(macho::DataInCodeTableEntry)); + ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); +} void MachOObject::ReadULEB128s(uint64_t Index, SmallVectorImpl &Out) const { diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 3bcda17..d229671 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -598,13 +598,15 @@ error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI, if (MachOObj->is64Bit()) { InMemoryStruct Sect; getSection64(DRI, Sect); - Result = (Sect->Flags & MachO::SectionTypeZeroFill || - Sect->Flags & MachO::SectionTypeZeroFillLarge); + unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType; + Result = (SectionType == MachO::SectionTypeZeroFill || + SectionType == MachO::SectionTypeZeroFillLarge); } else { InMemoryStruct Sect; getSection(DRI, Sect); - Result = (Sect->Flags & MachO::SectionTypeZeroFill || - Sect->Flags & MachO::SectionTypeZeroFillLarge); + unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType; + Result = (SectionType == MachO::SectionTypeZeroFill || + SectionType == MachO::SectionTypeZeroFillLarge); } return object_error::success; @@ -786,7 +788,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, switch (Arch) { case Triple::x86: { - const char* Table[] = { + static const char *const Table[] = { "GENERIC_RELOC_VANILLA", "GENERIC_RELOC_PAIR", "GENERIC_RELOC_SECTDIFF", @@ -801,7 +803,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, break; } case Triple::x86_64: { - const char* Table[] = { + static const char *const Table[] = { "X86_64_RELOC_UNSIGNED", "X86_64_RELOC_SIGNED", "X86_64_RELOC_BRANCH", @@ -820,7 +822,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, break; } case Triple::arm: { - const char* Table[] = { + static const char *const Table[] = { "ARM_RELOC_VANILLA", "ARM_RELOC_PAIR", "ARM_RELOC_SECTDIFF", @@ -839,7 +841,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, break; } case Triple::ppc: { - const char* Table[] = { + static const char *const Table[] = { "PPC_RELOC_VANILLA", "PPC_RELOC_PAIR", "PPC_RELOC_BR14", diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 409d4fb..2139df5 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1765,6 +1765,32 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, return fs; } +/* Rounding-mode corrrect round to integral value. */ +APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { + opStatus fs; + assertArithmeticOK(*semantics); + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), + 1 << (semanticsPrecision(*semantics)-1)); + APFloat MagicConstant(*semantics); + fs = MagicConstant.convertFromAPInt(IntegerConstant, false, + rmNearestTiesToEven); + if (fs != opOK) + return fs; + + fs = add(MagicConstant, rounding_mode); + if (fs != opOK && fs != opInexact) + return fs; + + fs = subtract(MagicConstant, rounding_mode); + return fs; +} + + /* Comparison requires normalized numbers. */ APFloat::cmpResult APFloat::compare(const APFloat &rhs) const @@ -3278,16 +3304,8 @@ APFloat::APFloat(double d) : exponent2(0), sign2(0) { } namespace { - static void append(SmallVectorImpl &Buffer, - unsigned N, const char *Str) { - unsigned Start = Buffer.size(); - Buffer.set_size(Start + N); - memcpy(&Buffer[Start], Str, N); - } - - template - void append(SmallVectorImpl &Buffer, const char (&Str)[N]) { - append(Buffer, N, Str); + void append(SmallVectorImpl &Buffer, StringRef Str) { + Buffer.append(Str.begin(), Str.end()); } /// Removes data from the given significand until it is no more diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 9b81fe7..38cfaed 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1135,7 +1135,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { // If all the bits were shifted out, the result is 0. This avoids issues // with shifting by the size of the integer type, which produces undefined // results. We define these "undefined results" to always be 0. - if (shiftAmt == BitWidth) + if (shiftAmt >= BitWidth) return APInt(BitWidth, 0); // If none of the bits are shifted out, the result is *this. This avoids @@ -1446,7 +1446,7 @@ APInt::mu APInt::magicu(unsigned LeadingZeros) const { APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); - nc = allOnes - (-d).urem(d); + nc = allOnes - (allOnes - d).urem(d); p = d.getBitWidth() - 1; // initialize p q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 9103327..83baf60 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMSupport Dwarf.cpp ErrorHandling.cpp FileUtilities.cpp + FileOutputBuffer.cpp FoldingSet.cpp FormattedStream.cpp GraphWriter.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index e6fdf16..593315d1 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -219,10 +219,10 @@ static Option *LookupNearestOption(StringRef Arg, if (!Best || Distance < BestDistance) { Best = O; BestDistance = Distance; - if (RHS.empty() || !PermitValue) - NearestString = OptionNames[i]; - else - NearestString = std::string(OptionNames[i]) + "=" + RHS.str(); + if (RHS.empty() || !PermitValue) + NearestString = OptionNames[i]; + else + NearestString = std::string(OptionNames[i]) + "=" + RHS.str(); } } } diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index 5206cf1..720ef36 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -143,16 +143,17 @@ bool ConstantRange::isSignWrappedSet() const { /// getSetSize - Return the number of elements in this set. /// APInt ConstantRange::getSetSize() const { - if (isEmptySet()) - return APInt(getBitWidth(), 0); - if (getBitWidth() == 1) { - if (Lower != Upper) // One of T or F in the set... - return APInt(2, 1); - return APInt(2, 2); // Must be full set... + if (isEmptySet()) + return APInt(getBitWidth()+1, 0); + + if (isFullSet()) { + APInt Size(getBitWidth()+1, 0); + Size.setBit(getBitWidth()); + return Size; } - // Simply subtract the bounds... - return Upper - Lower; + // This is also correct for wrapped sets. + return (Upper - Lower).zext(getBitWidth()+1); } /// getUnsignedMax - Return the largest unsigned value contained in the @@ -248,6 +249,12 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const { return ConstantRange(Lower - Val, Upper - Val); } +/// \brief Subtract the specified range from this range (aka relative complement +/// of the sets). +ConstantRange ConstantRange::difference(const ConstantRange &CR) const { + return intersectWith(CR.inverse()); +} + /// intersectWith - Return the range that results from the intersection of this /// range with another range. The resultant range is guaranteed to include all /// elements contained in both input ranges, and to have the smallest possible @@ -288,7 +295,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { if (CR.Upper.ult(Upper)) return CR; - if (CR.Upper.ult(Lower)) + if (CR.Upper.ule(Lower)) return ConstantRange(CR.Lower, Upper); if (getSetSize().ult(CR.getSetSize())) @@ -316,7 +323,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { return CR; } - if (CR.Upper.ult(Lower)) { + if (CR.Upper.ule(Lower)) { if (CR.Lower.ult(Lower)) return *this; @@ -420,9 +427,13 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); assert(SrcTySize < DstTySize && "Not a value extension"); - if (isFullSet() || isWrappedSet()) + if (isFullSet() || isWrappedSet()) { // Change into [0, 1 << src bit width) - return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize)); + APInt LowerExt(DstTySize, 0); + if (!Upper) // special case: [X, 0) -- not really wrapping around + LowerExt = Lower.zext(DstTySize); + return ConstantRange(LowerExt, APInt(DstTySize, 1).shl(SrcTySize)); + } return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize)); } @@ -450,10 +461,53 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { /// truncated to the specified type. ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { assert(getBitWidth() > DstTySize && "Not a value truncation"); - if (isFullSet() || getSetSize().getActiveBits() > DstTySize) + if (isEmptySet()) + return ConstantRange(DstTySize, /*isFullSet=*/false); + if (isFullSet()) return ConstantRange(DstTySize, /*isFullSet=*/true); - return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize)); + APInt MaxValue = APInt::getMaxValue(DstTySize).zext(getBitWidth()); + APInt MaxBitValue(getBitWidth(), 0); + MaxBitValue.setBit(DstTySize); + + APInt LowerDiv(Lower), UpperDiv(Upper); + ConstantRange Union(DstTySize, /*isFullSet=*/false); + + // Analyze wrapped sets in their two parts: [0, Upper) \/ [Lower, MaxValue] + // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and + // then we do the union with [MaxValue, Upper) + if (isWrappedSet()) { + // if Upper is greater than Max Value, it covers the whole truncated range. + if (Upper.uge(MaxValue)) + return ConstantRange(DstTySize, /*isFullSet=*/true); + + Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize)); + UpperDiv = APInt::getMaxValue(getBitWidth()); + + // Union covers the MaxValue case, so return if the remaining range is just + // MaxValue. + if (LowerDiv == UpperDiv) + return Union; + } + + // Chop off the most significant bits that are past the destination bitwidth. + if (LowerDiv.uge(MaxValue)) { + APInt Div(getBitWidth(), 0); + APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv); + UpperDiv = UpperDiv - MaxBitValue * Div; + } + + if (UpperDiv.ule(MaxValue)) + return ConstantRange(LowerDiv.trunc(DstTySize), + UpperDiv.trunc(DstTySize)).unionWith(Union); + + // The truncated value wrapps around. Check if we can do better than fullset. + APInt UpperModulo = UpperDiv - MaxBitValue; + if (UpperModulo.ult(LowerDiv)) + return ConstantRange(LowerDiv.trunc(DstTySize), + UpperModulo.trunc(DstTySize)).unionWith(Union); + + return ConstantRange(DstTySize, /*isFullSet=*/true); } /// zextOrTrunc - make this range have the bit width given by \p DstTySize. The @@ -529,8 +583,6 @@ ConstantRange::multiply(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); - if (isFullSet() || Other.isFullSet()) - return ConstantRange(getBitWidth(), /*isFullSet=*/true); APInt this_min = getUnsignedMin().zext(getBitWidth() * 2); APInt this_max = getUnsignedMax().zext(getBitWidth() * 2); diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index e2af0bc..e175056 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -223,7 +223,7 @@ void CrashRecoveryContext::Disable() { #include -static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; +static const int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]); static struct sigaction PrevActions[NumSignals]; diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 9fdb12e..c8e8900 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements a handle way of adding debugging information to your +// This file implements a handy way of adding debugging information to your // code, without it being enabled all of the time, and without having to add // command line options to enable it. // @@ -18,8 +18,8 @@ // can specify '-debug-only=foo' to enable JUST the debug information for the // foo class. // -// When compiling in release mode, the -debug-* options and all code in DEBUG() -// statements disappears, so it does not effect the runtime of the code. +// When compiling without assertions, the -debug-* options and all code in +// DEBUG() statements disappears, so it does not affect the runtime of the code. // //===----------------------------------------------------------------------===// @@ -89,11 +89,11 @@ bool llvm::isCurrentDebugType(const char *DebugType) { return CurrentDebugType.empty() || DebugType == CurrentDebugType; } -/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X +/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. /// -void llvm::SetCurrentDebugType(const char *Type) { +void llvm::setCurrentDebugType(const char *Type) { CurrentDebugType = Type; } diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index 18c6581..dd218f6 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -52,7 +52,7 @@ std::string StrError(int errnum) { # endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" if (errnum) - strerror_s(buffer, errnum); + strerror_s(buffer, MaxErrStrLen - 1, errnum); #elif defined(HAVE_STRERROR) // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp new file mode 100644 index 0000000..7dc9587 --- /dev/null +++ b/lib/Support/FileOutputBuffer.cpp @@ -0,0 +1,148 @@ +//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility for creating a in-memory buffer that will be written to a file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileOutputBuffer.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + + +namespace llvm { + + +FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End, + StringRef Path, StringRef TmpPath) + : BufferStart(Start), BufferEnd(End) { + FinalPath.assign(Path); + TempPath.assign(TmpPath); +} + + +FileOutputBuffer::~FileOutputBuffer() { + // If not already commited, delete buffer and remove temp file. + if ( BufferStart != NULL ) { + sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize()); + bool Existed; + sys::fs::remove(Twine(TempPath), Existed); + } +} + + +error_code FileOutputBuffer::create(StringRef FilePath, + size_t Size, + OwningPtr &Result, + unsigned Flags) { + // If file already exists, it must be a regular file (to be mappable). + sys::fs::file_status Stat; + error_code EC = sys::fs::status(FilePath, Stat); + switch (Stat.type()) { + case sys::fs::file_type::file_not_found: + // If file does not exist, we'll create one. + break; + case sys::fs::file_type::regular_file: { + // If file is not currently writable, error out. + // FIXME: There is no sys::fs:: api for checking this. + // FIXME: In posix, you use the access() call to check this. + } + break; + default: + if (EC) + return EC; + else + return make_error_code(errc::operation_not_permitted); + } + + // Delete target file. + bool Existed; + EC = sys::fs::remove(FilePath, Existed); + if (EC) + return EC; + + // Create new file in same directory but with random name. + SmallString<128> TempFilePath; + int FD; + EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%", + FD, TempFilePath, false, 0644); + if (EC) + return EC; + + // The unique_file() interface leaks lower layers and returns a file + // descriptor. There is no way to directly close it, so use this hack + // to hand it off to raw_fd_ostream to close for us. + { + raw_fd_ostream Dummy(FD, /*shouldClose=*/true); + } + + // Resize file to requested initial size + EC = sys::fs::resize_file(Twine(TempFilePath), Size); + if (EC) + return EC; + + // If requested, make the output file executable. + if ( Flags & F_executable ) { + sys::fs::file_status Stat2; + EC = sys::fs::status(Twine(TempFilePath), Stat2); + if (EC) + return EC; + + sys::fs::perms new_perms = Stat2.permissions(); + if ( new_perms & sys::fs::owner_read ) + new_perms |= sys::fs::owner_exe; + if ( new_perms & sys::fs::group_read ) + new_perms |= sys::fs::group_exe; + if ( new_perms & sys::fs::others_read ) + new_perms |= sys::fs::others_exe; + new_perms |= sys::fs::add_perms; + EC = sys::fs::permissions(Twine(TempFilePath), new_perms); + if (EC) + return EC; + } + + // Memory map new file. + void *Base; + EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base); + if (EC) + return EC; + + // Create FileOutputBuffer object to own mapped range. + uint8_t *Start = reinterpret_cast(Base); + Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath)); + + return error_code::success(); +} + + +error_code FileOutputBuffer::commit(int64_t NewSmallerSize) { + // Unmap buffer, letting OS flush dirty pages to file on disk. + void *Start = reinterpret_cast(BufferStart); + error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize()); + if (EC) + return EC; + + // If requested, resize file as part of commit. + if ( NewSmallerSize != -1 ) { + EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize); + if (EC) + return EC; + } + + // Rename file to final name. + return sys::fs::rename(Twine(TempPath), Twine(FinalPath)); +} + + +} // namespace + diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 32126ec..f6aaf83 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -99,7 +99,6 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break; case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break; case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break; - default: errs() << "Unknown graph layout name; using default.\n"; } args.push_back(0); diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 0f06964..9a2c39d 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -11,7 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/DataStream.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Config/config.h" #include @@ -25,6 +31,12 @@ #ifdef _MSC_VER #include #endif +#if defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +#include +#include +#include +#include +#endif //===----------------------------------------------------------------------===// // @@ -230,11 +242,18 @@ std::string sys::getHostCPUName() { case 45: return "corei7-avx"; - case 28: // Intel Atom processor. All processors are manufactured using - // the 45 nm process + // Ivy Bridge: + case 58: + return "core-avx-i"; + + case 28: // Most 45 nm Intel Atom processors + case 38: // 45 nm Atom Lincroft + case 39: // 32 nm Atom Medfield + case 53: // 32 nm Atom Midview + case 54: // 32 nm Atom Midview return "atom"; - default: return "i686"; + default: return (Em64T) ? "x86-64" : "i686"; } case 15: { switch (Model) { @@ -315,6 +334,179 @@ std::string sys::getHostCPUName() { } return "generic"; } +#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +std::string sys::getHostCPUName() { + host_basic_info_data_t hostInfo; + mach_msg_type_number_t infoCount; + + infoCount = HOST_BASIC_INFO_COUNT; + host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, + &infoCount); + + if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; + + switch(hostInfo.cpu_subtype) { + case CPU_SUBTYPE_POWERPC_601: return "601"; + case CPU_SUBTYPE_POWERPC_602: return "602"; + case CPU_SUBTYPE_POWERPC_603: return "603"; + case CPU_SUBTYPE_POWERPC_603e: return "603e"; + case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; + case CPU_SUBTYPE_POWERPC_604: return "604"; + case CPU_SUBTYPE_POWERPC_604e: return "604e"; + case CPU_SUBTYPE_POWERPC_620: return "620"; + case CPU_SUBTYPE_POWERPC_750: return "750"; + case CPU_SUBTYPE_POWERPC_7400: return "7400"; + case CPU_SUBTYPE_POWERPC_7450: return "7450"; + case CPU_SUBTYPE_POWERPC_970: return "970"; + default: ; + } + + return "generic"; +} +#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) +std::string sys::getHostCPUName() { + // Access to the Processor Version Register (PVR) on PowerPC is privileged, + // and so we must use an operating-system interface to determine the current + // processor type. On Linux, this is exposed through the /proc/cpuinfo file. + const char *generic = "generic"; + + // Note: We cannot mmap /proc/cpuinfo here and then process the resulting + // memory buffer because the 'file' has 0 size (it can be read from only + // as a stream). + + std::string Err; + DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); + if (!DS) { + DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n"); + return generic; + } + + // The cpu line is second (after the 'processor: 0' line), so if this + // buffer is too small then something has changed (or is wrong). + char buffer[1024]; + size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer)); + delete DS; + + const char *CPUInfoStart = buffer; + const char *CPUInfoEnd = buffer + CPUInfoSize; + + const char *CIP = CPUInfoStart; + + const char *CPUStart = 0; + size_t CPULen = 0; + + // We need to find the first line which starts with cpu, spaces, and a colon. + // After the colon, there may be some additional spaces and then the cpu type. + while (CIP < CPUInfoEnd && CPUStart == 0) { + if (CIP < CPUInfoEnd && *CIP == '\n') + ++CIP; + + if (CIP < CPUInfoEnd && *CIP == 'c') { + ++CIP; + if (CIP < CPUInfoEnd && *CIP == 'p') { + ++CIP; + if (CIP < CPUInfoEnd && *CIP == 'u') { + ++CIP; + while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) + ++CIP; + + if (CIP < CPUInfoEnd && *CIP == ':') { + ++CIP; + while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) + ++CIP; + + if (CIP < CPUInfoEnd) { + CPUStart = CIP; + while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && + *CIP != ',' && *CIP != '\n')) + ++CIP; + CPULen = CIP - CPUStart; + } + } + } + } + } + + if (CPUStart == 0) + while (CIP < CPUInfoEnd && *CIP != '\n') + ++CIP; + } + + if (CPUStart == 0) + return generic; + + return StringSwitch(StringRef(CPUStart, CPULen)) + .Case("604e", "604e") + .Case("604", "604") + .Case("7400", "7400") + .Case("7410", "7400") + .Case("7447", "7400") + .Case("7455", "7450") + .Case("G4", "g4") + .Case("POWER4", "970") + .Case("PPC970FX", "970") + .Case("PPC970MP", "970") + .Case("G5", "g5") + .Case("POWER5", "g5") + .Case("A2", "a2") + .Case("POWER6", "pwr6") + .Case("POWER7", "pwr7") + .Default(generic); +} +#elif defined(__linux__) && defined(__arm__) +std::string sys::getHostCPUName() { + // The cpuid register on arm is not accessible from user space. On Linux, + // it is exposed through the /proc/cpuinfo file. + // Note: We cannot mmap /proc/cpuinfo here and then process the resulting + // memory buffer because the 'file' has 0 size (it can be read from only + // as a stream). + + std::string Err; + DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); + if (!DS) { + DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n"); + return "generic"; + } + + // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line + // in all cases. + char buffer[1024]; + size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer)); + delete DS; + + StringRef Str(buffer, CPUInfoSize); + + SmallVector Lines; + Str.split(Lines, "\n"); + + // Look for the CPU implementer line. + StringRef Implementer; + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU implementer")) + Implementer = Lines[I].substr(15).ltrim("\t :"); + + if (Implementer == "0x41") // ARM Ltd. + // Look for the CPU part line. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("CPU part")) + // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The + // values correspond to the "Part number" in the CP15/c0 register. The + // contents are specified in the various processor manuals. + return StringSwitch(Lines[I].substr(8).ltrim("\t :")) + .Case("0x926", "arm926ej-s") + .Case("0xb02", "mpcore") + .Case("0xb36", "arm1136j-s") + .Case("0xb56", "arm1156t2-s") + .Case("0xb76", "arm1176jz-s") + .Case("0xc08", "cortex-a8") + .Case("0xc09", "cortex-a9") + .Case("0xc20", "cortex-m0") + .Case("0xc23", "cortex-m3") + .Case("0xc24", "cortex-m4") + .Default("generic"); + + return "generic"; +} #else std::string sys::getHostCPUName() { return "generic"; diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp index 2a1642a..22f7494 100644 --- a/lib/Support/Memory.cpp +++ b/lib/Support/Memory.cpp @@ -45,7 +45,7 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, # if (defined(__POWERPC__) || defined (__ppc__) || \ defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__) - sys_icache_invalidate(Addr, Len); + sys_icache_invalidate(const_cast(Addr), Len); # endif #else @@ -67,11 +67,12 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, asm volatile("isync"); # elif defined(__arm__) && defined(__GNUC__) // FIXME: Can we safely always call this for __GNUC__ everywhere? - char *Start = (char*) Addr; - char *End = Start + Len; - __clear_cache(Start, End); + const char *Start = static_cast(Addr); + const char *End = Start + Len; + __clear_cache(const_cast(Start), const_cast(End)); # elif defined(__mips__) - cacheflush((char*)Addr, Len, BCACHE); + const char *Start = static_cast(Addr); + cacheflush(const_cast(Start), Len, BCACHE); # endif #endif // end apple diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 16e5c7a..992f03c 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -17,6 +17,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" @@ -214,6 +215,14 @@ error_code MemoryBuffer::getFile(const char *Filename, OwningPtr &result, int64_t FileSize, bool RequiresNullTerminator) { + // First check that the "file" is not a directory + bool is_dir = false; + error_code err = sys::fs::is_directory(Filename, is_dir); + if (err) + return err; + if (is_dir) + return make_error_code(errc::is_a_directory); + int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. @@ -304,16 +313,6 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, RealMapOffset)) { result.reset(GetNamedBuffer( StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); - - if (RequiresNullTerminator && result->getBufferEnd()[0] != '\0') { - // There could be a racing issue that resulted in the file being larger - // than the FileSize passed by the caller. We already have an assertion - // for this in MemoryBuffer::init() but have a runtime guarantee that - // the buffer will be null-terminated here, so do a copy that adds a - // null-terminator. - result.reset(MemoryBuffer::getMemBufferCopy(result->getBuffer(), - Filename)); - } return error_code::success(); } } diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index da5baab..4e4a026 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -59,7 +59,8 @@ MutexImpl::MutexImpl( bool recursive) errorcode = pthread_mutexattr_settype(&attr, kind); assert(errorcode == 0); -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \ + !defined(__DragonFly__) && !defined(__Bitrig__) // Make it a process local mutex errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); assert(errorcode == 0); diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index dcddeda..db4a56b 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -60,8 +60,11 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case '\177': if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { - if (length >= 18 && magic[17] == 0) - switch (magic[16]) { + bool Data2MSB = magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (length >= 18 && magic[high] == 0) + switch (magic[low]) { default: break; case 1: return ELF_Relocatable_FileType; case 2: return ELF_Executable_FileType; diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index e2a69a6..46571c0 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -744,6 +744,8 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { /// @brief Identify the magic in magic. file_magic identify_magic(StringRef magic) { + if (magic.size() < 4) + return file_magic::unknown; switch ((unsigned char)magic[0]) { case 0xDE: // 0x0B17C0DE = BC wraper if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 15278c5..e4e01be 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -79,9 +79,10 @@ int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { return -1; } -/// FindLineNumber - Find the line number for the specified location in the -/// specified file. This is not a fast method. -unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { +/// getLineAndColumn - Find the line and column number for the specified +/// location in the specified file. This is not a fast method. +std::pair +SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const { if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc); assert(BufferID != -1 && "Invalid Location!"); @@ -91,7 +92,8 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { // location. unsigned LineNo = 1; - const char *Ptr = Buff->getBufferStart(); + const char *BufStart = Buff->getBufferStart(); + const char *Ptr = BufStart; // If we have a line number cache, and if the query is to a later point in the // same file, start searching from the last query location. This optimizes @@ -108,7 +110,6 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) if (*Ptr == '\n') ++LineNo; - // Allocate the line number cache if it doesn't exist. if (LineNoCache == 0) LineNoCache = new LineNoCacheTy(); @@ -118,7 +119,10 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { Cache.LastQueryBufferID = BufferID; Cache.LastQuery = Ptr; Cache.LineNoOfQuery = LineNo; - return LineNo; + + size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); + if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; + return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); } void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { @@ -145,50 +149,59 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, ArrayRef Ranges) const { // First thing to do: find the current buffer containing the specified - // location. - int CurBuf = FindBufferContainingLoc(Loc); - assert(CurBuf != -1 && "Invalid or unspecified location!"); - - MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; - - // Scan backward to find the start of the line. - const char *LineStart = Loc.getPointer(); - while (LineStart != CurMB->getBufferStart() && - LineStart[-1] != '\n' && LineStart[-1] != '\r') - --LineStart; - - // Get the end of the line. - const char *LineEnd = Loc.getPointer(); - while (LineEnd != CurMB->getBufferEnd() && - LineEnd[0] != '\n' && LineEnd[0] != '\r') - ++LineEnd; - std::string LineStr(LineStart, LineEnd); - - // Convert any ranges to column ranges that only intersect the line of the - // location. + // location to pull out the source line. SmallVector, 4> ColRanges; - for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { - SMRange R = Ranges[i]; - if (!R.isValid()) continue; - - // If the line doesn't contain any part of the range, then ignore it. - if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) - continue; - - // Ignore pieces of the range that go onto other lines. - if (R.Start.getPointer() < LineStart) - R.Start = SMLoc::getFromPointer(LineStart); - if (R.End.getPointer() > LineEnd) - R.End = SMLoc::getFromPointer(LineEnd); + std::pair LineAndCol; + const char *BufferID = ""; + std::string LineStr; + + if (Loc.isValid()) { + int CurBuf = FindBufferContainingLoc(Loc); + assert(CurBuf != -1 && "Invalid or unspecified location!"); + + MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; + BufferID = CurMB->getBufferIdentifier(); - // Translate from SMLoc ranges to column ranges. - ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, - R.End.getPointer()-LineStart)); + // Scan backward to find the start of the line. + const char *LineStart = Loc.getPointer(); + const char *BufStart = CurMB->getBufferStart(); + while (LineStart != BufStart && LineStart[-1] != '\n' && + LineStart[-1] != '\r') + --LineStart; + + // Get the end of the line. + const char *LineEnd = Loc.getPointer(); + const char *BufEnd = CurMB->getBufferEnd(); + while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r') + ++LineEnd; + LineStr = std::string(LineStart, LineEnd); + + // Convert any ranges to column ranges that only intersect the line of the + // location. + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { + SMRange R = Ranges[i]; + if (!R.isValid()) continue; + + // If the line doesn't contain any part of the range, then ignore it. + if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart) + continue; + + // Ignore pieces of the range that go onto other lines. + if (R.Start.getPointer() < LineStart) + R.Start = SMLoc::getFromPointer(LineStart); + if (R.End.getPointer() > LineEnd) + R.End = SMLoc::getFromPointer(LineEnd); + + // Translate from SMLoc ranges to column ranges. + ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart, + R.End.getPointer()-LineStart)); + } + + LineAndCol = getLineAndColumn(Loc, CurBuf); } - - return SMDiagnostic(*this, Loc, - CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), - Loc.getPointer()-LineStart, Kind, Msg.str(), + + return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first, + LineAndCol.second-1, Kind, Msg.str(), LineStr, ColRanges); } @@ -205,9 +218,11 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, raw_ostream &OS = errs(); - int CurBuf = FindBufferContainingLoc(Loc); - assert(CurBuf != -1 && "Invalid or unspecified location!"); - PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); + if (Loc != SMLoc()) { + int CurBuf = FindBufferContainingLoc(Loc); + assert(CurBuf != -1 && "Invalid or unspecified location!"); + PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); + } Diagnostic.print(0, OS, ShowColors); } @@ -228,8 +243,8 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors) const { - // Display colors only if OS goes to a tty. - ShowColors &= S.is_displayed(); + // Display colors only if OS supports colors. + ShowColors &= S.has_colors(); if (ShowColors) S.changeColor(raw_ostream::SAVEDCOLOR, true); @@ -343,5 +358,3 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, S << '\n'; } - - diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamableMemoryObject.cpp index c23f07b..fe3752a 100644 --- a/lib/Support/StreamableMemoryObject.cpp +++ b/lib/Support/StreamableMemoryObject.cpp @@ -20,7 +20,7 @@ class RawMemoryObject : public StreamableMemoryObject { public: RawMemoryObject(const unsigned char *Start, const unsigned char *End) : FirstChar(Start), LastChar(End) { - assert(LastChar > FirstChar && "Invalid start/end range"); + assert(LastChar >= FirstChar && "Invalid start/end range"); } virtual uint64_t getBase() const { return 0; } diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index c131fe0..c2fc261 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -189,7 +189,7 @@ void StringMapImpl::RehashTable() { // grow/rehash the table. if (NumItems*4 > NumBuckets*3) { NewSize = NumBuckets*2; - } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) { + } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) { NewSize = NumBuckets; } else { return; diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index abe570f..8aab4b2 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/edit_distance.h" + #include using namespace llvm; @@ -230,6 +231,31 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars, return npos; } +/// find_last_not_of - Find the last character in the string that is not +/// \arg C, or npos if not found. +StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { + for (size_type i = min(From, Length) - 1, e = -1; i != e; --i) + if (Data[i] != C) + return i; + return npos; +} + +/// find_last_not_of - Find the last character in the string that is not in +/// \arg Chars, or npos if not found. +/// +/// Note: O(size() + Chars.size()) +StringRef::size_type StringRef::find_last_not_of(StringRef Chars, + size_t From) const { + std::bitset<1 << CHAR_BIT> CharBits; + for (size_type i = 0, e = Chars.size(); i != e; ++i) + CharBits.set((unsigned char)Chars[i]); + + for (size_type i = min(From, Length) - 1, e = -1; i != e; --i) + if (!CharBits.test((unsigned char)Data[i])) + return i; + return npos; +} + void StringRef::split(SmallVectorImpl &A, StringRef Separators, int MaxSplit, bool KeepEmpty) const { @@ -272,14 +298,22 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { if (Str.startswith("0x")) { Str = Str.substr(2); return 16; - } else if (Str.startswith("0b")) { + } + + if (Str.startswith("0b")) { Str = Str.substr(2); return 2; - } else if (Str.startswith("0")) { + } + + if (Str.startswith("0o")) { + Str = Str.substr(2); return 8; - } else { - return 10; } + + if (Str.startswith("0")) + return 8; + + return 10; } @@ -383,7 +417,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { unsigned BitWidth = Log2Radix * Str.size(); if (BitWidth < Result.getBitWidth()) BitWidth = Result.getBitWidth(); // don't shrink the result - else + else if (BitWidth > Result.getBitWidth()) Result = Result.zext(BitWidth); APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index 53c8d84..9c81327 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -23,6 +23,47 @@ TargetRegistry::iterator TargetRegistry::begin() { return iterator(FirstTarget); } +const Target *TargetRegistry::lookupTarget(const std::string &ArchName, + Triple &TheTriple, + std::string &Error) { + // Allocate target machine. First, check whether the user has explicitly + // specified an architecture to compile for. If so we have to look it up by + // name, because it might be a backend that has no mapping to a target triple. + const Target *TheTarget = 0; + if (!ArchName.empty()) { + for (TargetRegistry::iterator it = TargetRegistry::begin(), + ie = TargetRegistry::end(); it != ie; ++it) { + if (ArchName == it->getName()) { + TheTarget = &*it; + break; + } + } + + if (!TheTarget) { + Error = "error: invalid target '" + ArchName + "'.\n"; + return 0; + } + + // Adjust the triple to match (if known), otherwise stick with the + // given triple. + Triple::ArchType Type = Triple::getArchTypeForLLVMName(ArchName); + if (Type != Triple::UnknownArch) + TheTriple.setArch(Type); + } else { + // Get the target specific parser. + std::string TempError; + TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError); + if (TheTarget == 0) { + Error = ": error: unable to get target for '" + + TheTriple.getTriple() + + "', see --version and --triple.\n"; + return 0; + } + } + + return TheTarget; +} + const Target *TargetRegistry::lookupTarget(const std::string &TT, std::string &Error) { // Provide special warning when no targets are initialized. diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index 08b12b6..0587aae 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -25,9 +25,18 @@ namespace llvm { using namespace sys; ThreadLocalImpl::ThreadLocalImpl() { } ThreadLocalImpl::~ThreadLocalImpl() { } -void ThreadLocalImpl::setInstance(const void* d) { data = const_cast(d);} -const void* ThreadLocalImpl::getInstance() { return data; } -void ThreadLocalImpl::removeInstance() { data = 0; } +void ThreadLocalImpl::setInstance(const void* d) { + typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1]; + void **pd = reinterpret_cast(&data); + *pd = const_cast(d); +} +const void* ThreadLocalImpl::getInstance() { + void **pd = reinterpret_cast(&data); + return *pd; +} +void ThreadLocalImpl::removeInstance() { + setInstance(0); +} } #else @@ -40,31 +49,30 @@ void ThreadLocalImpl::removeInstance() { data = 0; } namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() : data(0) { - pthread_key_t* key = new pthread_key_t; +ThreadLocalImpl::ThreadLocalImpl() : data() { + typedef int SIZE_TOO_BIG[sizeof(pthread_key_t) <= sizeof(data) ? 1 : -1]; + pthread_key_t* key = reinterpret_cast(&data); int errorcode = pthread_key_create(key, NULL); assert(errorcode == 0); (void) errorcode; - data = (void*)key; } ThreadLocalImpl::~ThreadLocalImpl() { - pthread_key_t* key = static_cast(data); + pthread_key_t* key = reinterpret_cast(&data); int errorcode = pthread_key_delete(*key); assert(errorcode == 0); (void) errorcode; - delete key; } void ThreadLocalImpl::setInstance(const void* d) { - pthread_key_t* key = static_cast(data); + pthread_key_t* key = reinterpret_cast(&data); int errorcode = pthread_setspecific(*key, d); assert(errorcode == 0); (void) errorcode; } const void* ThreadLocalImpl::getInstance() { - pthread_key_t* key = static_cast(data); + pthread_key_t* key = reinterpret_cast(&data); return pthread_getspecific(*key); } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 44a1b38..cca549d 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -38,8 +38,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86_64: return "x86_64"; case xcore: return "xcore"; case mblaze: return "mblaze"; - case ptx32: return "ptx32"; - case ptx64: return "ptx64"; + case nvptx: return "nvptx"; + case nvptx64: return "nvptx64"; case le32: return "le32"; case amdil: return "amdil"; } @@ -62,7 +62,12 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case mblaze: return "mblaze"; - case hexagon: return "hexagon"; + case mips: + case mipsel: + case mips64: + case mips64el:return "mips"; + + case hexagon: return "hexagon"; case r600: return "r600"; @@ -74,8 +79,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case xcore: return "xcore"; - case ptx32: return "ptx"; - case ptx64: return "ptx"; + case nvptx: return "nvptx"; + case nvptx64: return "nvptx"; case le32: return "le32"; case amdil: return "amdil"; } @@ -119,6 +124,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case RTEMS: return "rtems"; case NativeClient: return "nacl"; case CNK: return "cnk"; + case Bitrig: return "bitrig"; } llvm_unreachable("Invalid OSType"); @@ -160,8 +166,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("x86", x86) .Case("x86-64", x86_64) .Case("xcore", xcore) - .Case("ptx32", ptx32) - .Case("ptx64", ptx64) + .Case("nvptx", nvptx) + .Case("nvptx64", nvptx64) .Case("le32", le32) .Case("amdil", amdil) .Default(UnknownArch); @@ -192,8 +198,8 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm) .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm) .Case("r600", Triple::r600) - .Case("ptx32", Triple::ptx32) - .Case("ptx64", Triple::ptx64) + .Case("nvptx", Triple::nvptx) + .Case("nvptx64", Triple::nvptx64) .Case("amdil", Triple::amdil) .Default(Triple::UnknownArch); } @@ -215,8 +221,8 @@ const char *Triple::getArchNameForAssembler() { .Cases("armv6", "thumbv6", "armv6") .Cases("armv7", "thumbv7", "armv7") .Case("r600", "r600") - .Case("ptx32", "ptx32") - .Case("ptx64", "ptx64") + .Case("nvptx", "nvptx") + .Case("nvptx64", "nvptx64") .Case("le32", "le32") .Case("amdil", "amdil") .Default(NULL); @@ -249,8 +255,8 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("sparcv9", Triple::sparcv9) .Case("tce", Triple::tce) .Case("xcore", Triple::xcore) - .Case("ptx32", Triple::ptx32) - .Case("ptx64", Triple::ptx64) + .Case("nvptx", Triple::nvptx) + .Case("nvptx64", Triple::nvptx64) .Case("le32", Triple::le32) .Case("amdil", Triple::amdil) .Default(Triple::UnknownArch); @@ -288,6 +294,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("rtems", Triple::RTEMS) .StartsWith("nacl", Triple::NativeClient) .StartsWith("cnk", Triple::CNK) + .StartsWith("bitrig", Triple::Bitrig) .Default(Triple::UnknownOS); } @@ -584,6 +591,29 @@ bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor, return true; } +void Triple::getiOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + switch (getOS()) { + default: llvm_unreachable("unexpected OS for Darwin triple"); + case Darwin: + case MacOSX: + // Ignore the version from the triple. This is only handled because the + // the clang driver combines OS X and IOS support into a common Darwin + // toolchain that wants to know the iOS version number even when targeting + // OS X. + Major = 3; + Minor = 0; + Micro = 0; + break; + case IOS: + getOSVersion(Major, Minor, Micro); + // Default to 3.0. + if (Major == 0) + Major = 3; + break; + } +} + void Triple::setTriple(const Twine &Str) { *this = Triple(Str); } @@ -652,8 +682,8 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mblaze: case llvm::Triple::mips: case llvm::Triple::mipsel: + case llvm::Triple::nvptx: case llvm::Triple::ppc: - case llvm::Triple::ptx32: case llvm::Triple::r600: case llvm::Triple::sparc: case llvm::Triple::tce: @@ -664,8 +694,8 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mips64: case llvm::Triple::mips64el: + case llvm::Triple::nvptx64: case llvm::Triple::ppc64: - case llvm::Triple::ptx64: case llvm::Triple::sparcv9: case llvm::Triple::x86_64: return 64; @@ -701,8 +731,8 @@ Triple Triple::get32BitArchVariant() const { case Triple::mblaze: case Triple::mips: case Triple::mipsel: + case Triple::nvptx: case Triple::ppc: - case Triple::ptx32: case Triple::r600: case Triple::sparc: case Triple::tce: @@ -714,8 +744,8 @@ Triple Triple::get32BitArchVariant() const { case Triple::mips64: T.setArch(Triple::mips); break; case Triple::mips64el: T.setArch(Triple::mipsel); break; + case Triple::nvptx64: T.setArch(Triple::nvptx); break; case Triple::ppc64: T.setArch(Triple::ppc); break; - case Triple::ptx64: T.setArch(Triple::ptx32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::x86_64: T.setArch(Triple::x86); break; } @@ -742,8 +772,8 @@ Triple Triple::get64BitArchVariant() const { case Triple::mips64: case Triple::mips64el: + case Triple::nvptx64: case Triple::ppc64: - case Triple::ptx64: case Triple::sparcv9: case Triple::x86_64: // Already 64-bit. @@ -751,8 +781,8 @@ Triple Triple::get64BitArchVariant() const { case Triple::mips: T.setArch(Triple::mips64); break; case Triple::mipsel: T.setArch(Triple::mips64el); break; + case Triple::nvptx: T.setArch(Triple::nvptx64); break; case Triple::ppc: T.setArch(Triple::ppc64); break; - case Triple::ptx32: T.setArch(Triple::ptx64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::x86: T.setArch(Triple::x86_64); break; } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index ddc1e0f..6bddbdf 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -260,7 +260,7 @@ Path::GetCurrentDirectory() { return Path(pathname); } -#if defined(__FreeBSD__) || defined (__NetBSD__) || \ +#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], @@ -329,7 +329,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { if (realpath(exe_path, link_path)) return Path(link_path); } -#elif defined(__FreeBSD__) || defined (__NetBSD__) || \ +#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; @@ -884,7 +884,8 @@ const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { } void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) { - ::munmap((void*)BasePtr, FileSize); + const void *Addr = static_cast(BasePtr); + ::munmap(const_cast(Addr), FileSize); } } // end llvm namespace diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index edb101e..99f8cd4 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -17,12 +17,16 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/Process.h" #if HAVE_SYS_STAT_H #include #endif #if HAVE_FCNTL_H #include #endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif #if HAVE_DIRENT_H # include # define NAMLEN(dirent) strlen((dirent)->d_name) @@ -46,6 +50,12 @@ #include #endif +// Both stdio.h and cstdio are included via different pathes and +// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros +// either. +#undef ferror +#undef feof + // For GNU Hurd #if defined(__GNU__) && !defined(PATH_MAX) # define PATH_MAX 4096 @@ -272,8 +282,7 @@ error_code exists(const Twine &path, bool &result) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); - struct stat status; - if (::stat(p.begin(), &status) == -1) { + if (::access(p.begin(), F_OK) == -1) { if (errno != errc::no_such_file_or_directory) return error_code(errno, system_category()); result = false; @@ -285,8 +294,8 @@ error_code exists(const Twine &path, bool &result) { bool equivalent(file_status A, file_status B) { assert(status_known(A) && status_known(B)); - return A.st_dev == B.st_dev && - A.st_ino == B.st_ino; + return A.fs_st_dev == B.fs_st_dev && + A.fs_st_ino == B.fs_st_ino; } error_code equivalent(const Twine &A, const Twine &B, bool &result) { @@ -325,30 +334,62 @@ error_code status(const Twine &path, file_status &result) { return ec; } + perms prms = static_cast(status.st_mode & perms_mask); + if (S_ISDIR(status.st_mode)) - result = file_status(file_type::directory_file); + result = file_status(file_type::directory_file, prms); else if (S_ISREG(status.st_mode)) - result = file_status(file_type::regular_file); + result = file_status(file_type::regular_file, prms); else if (S_ISBLK(status.st_mode)) - result = file_status(file_type::block_file); + result = file_status(file_type::block_file, prms); else if (S_ISCHR(status.st_mode)) - result = file_status(file_type::character_file); + result = file_status(file_type::character_file, prms); else if (S_ISFIFO(status.st_mode)) - result = file_status(file_type::fifo_file); + result = file_status(file_type::fifo_file, prms); else if (S_ISSOCK(status.st_mode)) - result = file_status(file_type::socket_file); + result = file_status(file_type::socket_file, prms); else - result = file_status(file_type::type_unknown); + result = file_status(file_type::type_unknown, prms); - result.st_dev = status.st_dev; - result.st_ino = status.st_ino; + result.fs_st_dev = status.st_dev; + result.fs_st_ino = status.st_ino; return error_code::success(); } +// Modifies permissions on a file. +error_code permissions(const Twine &path, perms prms) { + if ((prms & add_perms) && (prms & remove_perms)) + llvm_unreachable("add_perms and remove_perms are mutually exclusive"); + + // Get current permissions + file_status info; + if (error_code ec = status(path, info)) { + return ec; + } + + // Set updated permissions. + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + perms permsToSet; + if (prms & add_perms) { + permsToSet = (info.permissions() | prms) & perms_mask; + } else if (prms & remove_perms) { + permsToSet = (info.permissions() & ~prms) & perms_mask; + } else { + permsToSet = prms & perms_mask; + } + if (::chmod(p.begin(), static_cast(permsToSet))) { + return error_code(errno, system_category()); + } + + return error_code::success(); +} + +// Since this is most often used for temporary files, mode defaults to 0600. error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl &result_path, - bool makeAbsolute) { + SmallVectorImpl &result_path, + bool makeAbsolute, unsigned mode) { SmallString<128> Model; model.toVector(Model); // Null terminate. @@ -365,37 +406,20 @@ error_code unique_file(const Twine &model, int &result_fd, } } - // Replace '%' with random chars. From here on, DO NOT modify model. It may be - // needed if the randomly chosen path already exists. - SmallString<128> RandomPath; - RandomPath.reserve(Model.size() + 1); - ::srand(::time(NULL)); + // From here on, DO NOT modify model. It may be needed if the randomly chosen + // path already exists. + SmallString<128> RandomPath = Model; retry_random_path: - // This is opened here instead of above to make it easier to track when to - // close it. Collisions should be rare enough for the possible extra syscalls - // not to matter. - FILE *RandomSource = ::fopen("/dev/urandom", "r"); - RandomPath.set_size(0); - for (SmallVectorImpl::const_iterator i = Model.begin(), - e = Model.end(); i != e; ++i) { - if (*i == '%') { - char val = 0; - if (RandomSource) - val = fgetc(RandomSource); - else - val = ::rand(); - RandomPath.push_back("0123456789abcdef"[val & 15]); - } else - RandomPath.push_back(*i); + // Replace '%' with random chars. + for (unsigned i = 0, e = Model.size(); i != e; ++i) { + if (Model[i] == '%') + RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15]; } - if (RandomSource) - ::fclose(RandomSource); - // Try to open + create the file. rety_open_create: - int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600); + int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode); if (RandomFD == -1) { // If the file existed, try again, otherwise, error. if (errno == errc::file_exists) @@ -511,6 +535,36 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + SmallString<128> path_storage; + StringRef name = path.toNullTerminatedStringRef(path_storage); + int oflags = map_writable ? O_RDWR : O_RDONLY; + int ofd = ::open(name.begin(), oflags); + if ( ofd == -1 ) + return error_code(errno, system_category()); + AutoFD fd(ofd); + int flags = map_writable ? MAP_SHARED : MAP_PRIVATE; + int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ; +#ifdef MAP_FILE + flags |= MAP_FILE; +#endif + result = ::mmap(0, size, prot, flags, fd, file_offset); + if (result == MAP_FAILED) { + return error_code(errno, system_category()); + } + + return error_code::success(); +} + +error_code unmap_file_pages(void *base, size_t size) { + if ( ::munmap(base, size) == -1 ) + return error_code(errno, system_category()); + + return error_code::success(); +} + + } // end namespace fs } // end namespace sys } // end namespace llvm diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index f640462..5204147 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -12,15 +12,18 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/TimeValue.h" #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_RESOURCE_H #include #endif -// DragonFly BSD has deprecated for instead, -// Unix.h includes this for us already. -#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) +// DragonFlyBSD, OpenBSD, and Bitrig have deprecated for +// instead. Unix.h includes this for us already. +#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \ + !defined(__OpenBSD__) && !defined(__Bitrig__) #include #endif #ifdef HAVE_MALLOC_MALLOC_H @@ -247,16 +250,18 @@ static bool terminalHasColors() { return false; } +bool Process::FileDescriptorHasColors(int fd) { + // A file descriptor has colors if it is displayed and the terminal has + // colors. + return FileDescriptorIsDisplayed(fd) && terminalHasColors(); +} + bool Process::StandardOutHasColors() { - if (!StandardOutIsDisplayed()) - return false; - return terminalHasColors(); + return FileDescriptorHasColors(STDOUT_FILENO); } bool Process::StandardErrHasColors() { - if (!StandardErrIsDisplayed()) - return false; - return terminalHasColors(); + return FileDescriptorHasColors(STDERR_FILENO); } bool Process::ColorNeedsFlush() { @@ -297,3 +302,33 @@ const char *Process::OutputReverse() { const char *Process::ResetColor() { return "\033[0m"; } + +#if !defined(HAVE_ARC4RANDOM) +static unsigned GetRandomNumberSeed() { + // Attempt to get the initial seed from /dev/urandom, if possible. + if (FILE *RandomSource = ::fopen("/dev/urandom", "r")) { + unsigned seed; + int count = ::fread((void *)&seed, sizeof(seed), 1, RandomSource); + ::fclose(RandomSource); + + // Return the seed if the read was successful. + if (count == 1) + return seed; + } + + // Otherwise, swizzle the current time and the process ID to form a reasonable + // seed. + TimeValue Now = llvm::TimeValue::now(); + return hash_combine(Now.seconds(), Now.nanoseconds(), ::getpid()); +} +#endif + +unsigned llvm::sys::Process::GetRandomNumber() { +#if defined(HAVE_ARC4RANDOM) + return arc4random(); +#else + static int x = (::srand(GetRandomNumberSeed()), 0); + (void)x; + return ::rand(); +#endif +} diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index c9ec9fc..5195116 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -15,6 +15,7 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Mutex.h" +#include #include #include #if HAVE_EXECINFO_H @@ -43,7 +44,7 @@ static SmartMutex SignalsMutex; /// InterruptFunction - The function to call if ctrl-c is pressed. static void (*InterruptFunction)() = 0; -static std::vector FilesToRemove; +static std::vector FilesToRemove; static std::vector > CallBacksToRun; // IntSigs - Signals that may interrupt the program at any time. @@ -117,10 +118,20 @@ static void UnregisterHandlers() { /// RemoveFilesToRemove - Process the FilesToRemove list. This function /// should be called with the SignalsMutex lock held. +/// NB: This must be an async signal safe function. It cannot allocate or free +/// memory, even in debug builds. static void RemoveFilesToRemove() { - while (!FilesToRemove.empty()) { - FilesToRemove.back().eraseFromDisk(true); - FilesToRemove.pop_back(); + // Note: avoid iterators in case of debug iterators that allocate or release + // memory. + for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) { + // Note that we don't want to use any external code here, and we don't care + // about errors. We're going to try as hard as we can as often as we need + // to to make these files go away. If these aren't files, too bad. + // + // We do however rely on a std::string implementation for which repeated + // calls to 'c_str()' don't allocate memory. We pre-call 'c_str()' on all + // of these strings to try to ensure this is safe. + unlink(FilesToRemove[i].c_str()); } } @@ -178,7 +189,19 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { SignalsMutex.acquire(); - FilesToRemove.push_back(Filename); + std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0]; + FilesToRemove.push_back(Filename.str()); + + // We want to call 'c_str()' on every std::string in this vector so that if + // the underlying implementation requires a re-allocation, it happens here + // rather than inside of the signal handler. If we see the vector grow, we + // have to call it on every entry. If it remains in place, we only need to + // call it on the latest one. + if (OldPtr == &FilesToRemove[0]) + FilesToRemove.back().c_str(); + else + for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) + FilesToRemove[i].c_str(); SignalsMutex.release(); @@ -189,10 +212,19 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, // DontRemoveFileOnSignal - The public API void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) { SignalsMutex.acquire(); - std::vector::reverse_iterator I = - std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename); - if (I != FilesToRemove.rend()) - FilesToRemove.erase(I.base()-1); + std::vector::reverse_iterator RI = + std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename.str()); + std::vector::iterator I = FilesToRemove.end(); + if (RI != FilesToRemove.rend()) + I = FilesToRemove.erase(RI.base()-1); + + // We need to call c_str() on every element which would have been moved by + // the erase. These elements, in a C++98 implementation where c_str() + // requires a reallocation on the first call may have had the call to c_str() + // made on insertion become invalid by being copied down an element. + for (std::vector::iterator E = FilesToRemove.end(); I != E; ++I) + I->c_str(); + SignalsMutex.release(); } diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h index b7be311..361f297 100644 --- a/lib/Support/Unix/Unix.h +++ b/lib/Support/Unix/Unix.h @@ -44,16 +44,10 @@ #include #endif -#ifdef TIME_WITH_SYS_TIME +#ifdef HAVE_SYS_TIME_H # include -# include -#else -# ifdef HAVE_SYS_TIME_H -# include -# else -# include -# endif #endif +#include #ifdef HAVE_SYS_WAIT_H # include diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index d8dc522..2280b34 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -188,8 +188,20 @@ static Path *TempDirectory; Path Path::GetTemporaryDirectory(std::string* ErrMsg) { - if (TempDirectory) + if (TempDirectory) { +#if defined(_MSC_VER) + // Visual Studio gets confused and emits a diagnostic about calling exists, + // even though this is the implementation for PathV1. Temporarily + // disable the deprecated warning message + #pragma warning(push) + #pragma warning(disable:4996) +#endif + assert(TempDirectory->exists() && "Who has removed TempDirectory?"); +#if defined(_MSC_VER) + #pragma warning(pop) +#endif return *TempDirectory; + } char pathname[MAX_PATH]; if (!GetTempPath(MAX_PATH, pathname)) { @@ -201,7 +213,7 @@ Path::GetTemporaryDirectory(std::string* ErrMsg) { Path result; result.set(pathname); - // Append a subdirectory passed on our process id so multiple LLVMs don't + // Append a subdirectory based on our process id so multiple LLVMs don't // step on each other's toes. #ifdef __MINGW32__ // Mingw's Win32 header files are broken. diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index e9ce5d9..66eeab0 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -301,11 +301,21 @@ error_code rename(const Twine &from, const Twine &to) { if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; - if (!::MoveFileExW(wide_from.begin(), wide_to.begin(), - MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) - return windows_error(::GetLastError()); + error_code ec = error_code::success(); + for (int i = 0; i < 2000; i++) { + if (::MoveFileExW(wide_from.begin(), wide_to.begin(), + MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) + return error_code::success(); + ec = windows_error(::GetLastError()); + if (ec != windows_error::access_denied) + break; + // Retry MoveFile() at ACCESS_DENIED. + // System scanners (eg. indexer) might open the source file when + // It is written and closed. + ::Sleep(1); + } - return error_code::success(); + return ec; } error_code resize_file(const Twine &path, uint64_t size) { @@ -487,9 +497,46 @@ handle_status_error: return error_code::success(); } + +// Modifies permissions on a file. +error_code permissions(const Twine &path, perms prms) { +#if 0 // verify code below before enabling: + // If the permissions bits are not trying to modify + // "write" permissions, there is nothing to do. + if (!(prms & (owner_write|group_write|others_write))) + return error_code::success(); + + SmallString<128> path_storage; + SmallVector path_utf16; + + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; + + DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); + + if (prms & add_perms) { + attributes &= ~FILE_ATTRIBUTE_READONLY; + } + else if (prms & remove_perms) { + attributes |= FILE_ATTRIBUTE_READONLY; + } + else { + assert(0 && "neither add_perms or remove_perms is set"); + } + + if ( ! ::SetFileAttributesW(path_utf16.begin(), attributes)) + return windows_error(::GetLastError()); +#endif + return error_code::success(); +} + + +// FIXME: mode should be used here and default to user r/w only, +// it currently comes in as a UNIX mode. error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl &result_path, - bool makeAbsolute) { + SmallVectorImpl &result_path, + bool makeAbsolute, unsigned mode) { // Use result_path as temp storage. result_path.set_size(0); StringRef m = model.toStringRef(result_path); @@ -743,6 +790,19 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) { return error_code::success(); } +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; +} + +error_code unmap_file_pages(void *base, size_t size) { + assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; +} + + + } // end namespace fs } // end namespace sys } // end namespace llvm diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 9a388b4..e29eb6d 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -133,7 +133,7 @@ bool Process::StandardErrIsDisplayed() { } bool Process::FileDescriptorIsDisplayed(int fd) { - DWORD Mode; // Unused + DWORD Mode; // Unused return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0); } @@ -153,13 +153,17 @@ unsigned Process::StandardErrColumns() { return Columns; } -// It always has colors. -bool Process::StandardErrHasColors() { - return StandardErrIsDisplayed(); +// The terminal always has colors. +bool Process::FileDescriptorHasColors(int fd) { + return FileDescriptorIsDisplayed(fd); } bool Process::StandardOutHasColors() { - return StandardOutIsDisplayed(); + return FileDescriptorHasColors(1); +} + +bool Process::StandardErrHasColors() { + return FileDescriptorHasColors(2); } namespace { diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc index 26b9bba..9593923 100644 --- a/lib/Support/Windows/RWMutex.inc +++ b/lib/Support/Windows/RWMutex.inc @@ -67,9 +67,9 @@ static bool loadSRW() { "ReleaseSRWLockShared"); ::FreeLibrary(hLib); - if (fpInitializeSRWLock != NULL) { - sHasSRW = true; - } + if (fpInitializeSRWLock != NULL) { + sHasSRW = true; + } } } return sHasSRW; diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc index 512462d..057deb3 100644 --- a/lib/Support/Windows/ThreadLocal.inc +++ b/lib/Support/Windows/ThreadLocal.inc @@ -22,26 +22,25 @@ namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { - DWORD* tls = new DWORD; +ThreadLocalImpl::ThreadLocalImpl() : data() { + typedef int SIZE_TOO_BIG[sizeof(DWORD) <= sizeof(data) ? 1 : -1]; + DWORD* tls = reinterpret_cast(&data); *tls = TlsAlloc(); assert(*tls != TLS_OUT_OF_INDEXES); - data = tls; } ThreadLocalImpl::~ThreadLocalImpl() { - DWORD* tls = static_cast(data); + DWORD* tls = reinterpret_cast(&data); TlsFree(*tls); - delete tls; } const void* ThreadLocalImpl::getInstance() { - DWORD* tls = static_cast(data); + DWORD* tls = reinterpret_cast(&data); return TlsGetValue(*tls); } void ThreadLocalImpl::setInstance(const void* d){ - DWORD* tls = static_cast(data); + DWORD* tls = reinterpret_cast(&data); int errorcode = TlsSetValue(*tls, const_cast(d)); assert(errorcode != 0); (void)errorcode; diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index d38b51b..7c353c8 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -27,12 +27,12 @@ using namespace llvm; using namespace yaml; enum UnicodeEncodingForm { - UEF_UTF32_LE, //< UTF-32 Little Endian - UEF_UTF32_BE, //< UTF-32 Big Endian - UEF_UTF16_LE, //< UTF-16 Little Endian - UEF_UTF16_BE, //< UTF-16 Big Endian - UEF_UTF8, //< UTF-8 or ascii. - UEF_Unknown //< Not a valid Unicode encoding. + UEF_UTF32_LE, ///< UTF-32 Little Endian + UEF_UTF32_BE, ///< UTF-32 Big Endian + UEF_UTF16_LE, ///< UTF-16 Little Endian + UEF_UTF16_BE, ///< UTF-16 Big Endian + UEF_UTF8, ///< UTF-8 or ascii. + UEF_Unknown ///< Not a valid Unicode encoding. }; /// EncodingInfo - Holds the encoding type and length of the byte order mark if @@ -489,9 +489,6 @@ private: /// @brief Can the next token be the start of a simple key? bool IsSimpleKeyAllowed; - /// @brief Is the next token required to start a simple key? - bool IsSimpleKeyRequired; - /// @brief True if an error has occurred. bool Failed; @@ -658,7 +655,7 @@ std::string yaml::escape(StringRef Input) { EscapedInput += "\\r"; else if (*i == 0x1B) EscapedInput += "\\e"; - else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. + else if ((unsigned char)*i < 0x20) { // Control characters not handled above. std::string HexStr = utohexstr(*i); EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. @@ -704,7 +701,6 @@ Scanner::Scanner(StringRef Input, SourceMgr &sm) , FlowLevel(0) , IsStartOfStream(true) , IsSimpleKeyAllowed(true) - , IsSimpleKeyRequired(false) , Failed(false) { InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); SM.AddNewSourceBuffer(InputBuffer, SMLoc()); @@ -755,6 +751,8 @@ Token Scanner::getNext() { } StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + if (Position == End) + return Position; // Check 7 bit c-printable - b-char. if ( *Position == 0x09 || (*Position >= 0x20 && *Position <= 0x7E)) @@ -778,6 +776,8 @@ StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { } StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (Position == End) + return Position; if (*Position == 0x0D) { if (Position + 1 != End && *(Position + 1) == 0x0A) return Position + 2; @@ -1211,7 +1211,9 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { ++Current; // Repeat until the previous character was not a '\' or was an escaped // backslash. - } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); + } while ( Current != End + && *(Current - 1) == '\\' + && wasEscaped(Start + 1, Current)); } else { skip(1); while (true) { @@ -1624,9 +1626,7 @@ StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const { return UnquotedValue; } // Plain or block. - size_t trimtrail = Value.rfind(' '); - return Value.drop_back( - trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); + return Value.rtrim(" "); } StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue @@ -1732,8 +1732,10 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue if (UnquotedValue.size() < 3) // TODO: Report error. break; - unsigned int UnicodeScalarValue = 0; - UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(2); break; @@ -1742,8 +1744,10 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue if (UnquotedValue.size() < 5) // TODO: Report error. break; - unsigned int UnicodeScalarValue = 0; - UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(4); break; @@ -1752,8 +1756,10 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue if (UnquotedValue.size() < 9) // TODO: Report error. break; - unsigned int UnicodeScalarValue = 0; - UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); + unsigned int UnicodeScalarValue; + if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(8); break; @@ -2113,5 +2119,3 @@ bool Document::expectToken(int TK) { } return true; } - -OwningPtr document_iterator::NullDoc; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 86cdca1..fa69c2d 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -528,7 +528,8 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { } else { // Use ::writev() where available. #if defined(HAVE_WRITEV) - struct iovec IOV = { (void*) Ptr, Size }; + const void *Addr = static_cast(Ptr); + struct iovec IOV = {const_cast(Addr), Size }; ret = ::writev(FD, &IOV, 1); #else ret = ::write(FD, Ptr, Size); @@ -650,6 +651,10 @@ bool raw_fd_ostream::is_displayed() const { return sys::Process::FileDescriptorIsDisplayed(FD); } +bool raw_fd_ostream::has_colors() const { + return sys::Process::FileDescriptorHasColors(FD); +} + //===----------------------------------------------------------------------===// // outs(), errs(), nulls() //===----------------------------------------------------------------------===// diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 82f72b0..ba7bf14 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMTableGen Error.cpp Main.cpp Record.cpp + StringMatcher.cpp TableGenAction.cpp TableGenBackend.cpp TGLexer.cpp diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index 01bc55e..7aeef56 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -34,7 +34,9 @@ namespace { cl::init("-")); cl::opt - DependFilename("d", cl::desc("Dependency filename"), cl::value_desc("filename"), + DependFilename("d", + cl::desc("Dependency filename"), + cl::value_desc("filename"), cl::init("")); cl::opt @@ -53,7 +55,8 @@ int TableGenMain(char *argv0, TableGenAction &Action) { try { // Parse the input file. OwningPtr File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { + if (error_code ec = + MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { errs() << "Could not open input file '" << InputFilename << "': " << ec.message() <<"\n"; return 1; @@ -93,7 +96,7 @@ int TableGenMain(char *argv0, TableGenAction &Action) { DepOut.os() << OutputFilename << ":"; const std::vector &Dependencies = Parser.getDependencies(); for (std::vector::const_iterator I = Dependencies.begin(), - E = Dependencies.end(); + E = Dependencies.end(); I != E; ++I) { DepOut.os() << " " << (*I); } diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 93eed24..99fdc1f 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1699,7 +1699,7 @@ void Record::checkName() { assert(TypedName && "Record name is not typed!"); RecTy *Type = TypedName->getType(); if (dynamic_cast(Type) == 0) { - throw "Record name is not a string!"; + throw TGError(getLoc(), "Record name is not a string!"); } } diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp new file mode 100644 index 0000000..1668170 --- /dev/null +++ b/lib/TableGen/StringMatcher.cpp @@ -0,0 +1,149 @@ +//===- StringMatcher.cpp - Generate a matcher for input strings -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StringMatcher class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/StringMatcher.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +/// FindFirstNonCommonLetter - Find the first character in the keys of the +/// string pairs that is not shared across the whole set of strings. All +/// strings are assumed to have the same length. +static unsigned +FindFirstNonCommonLetter(const std::vector &Matches) { + assert(!Matches.empty()); + for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { + // Check to see if letter i is the same across the set. + char Letter = Matches[0]->first[i]; + + for (unsigned str = 0, e = Matches.size(); str != e; ++str) + if (Matches[str]->first[i] != Letter) + return i; + } + + return Matches[0]->first.size(); +} + +/// EmitStringMatcherForChar - Given a set of strings that are known to be the +/// same length and whose characters leading up to CharNo are the same, emit +/// code to verify that CharNo and later are the same. +/// +/// \return - True if control can leave the emitted code fragment. +bool StringMatcher:: +EmitStringMatcherForChar(const std::vector &Matches, + unsigned CharNo, unsigned IndentCount) const { + assert(!Matches.empty() && "Must have at least one string to match!"); + std::string Indent(IndentCount*2+4, ' '); + + // If we have verified that the entire string matches, we're done: output the + // matching code. + if (CharNo == Matches[0]->first.size()) { + assert(Matches.size() == 1 && "Had duplicate keys to match on"); + + // If the to-execute code has \n's in it, indent each subsequent line. + StringRef Code = Matches[0]->second; + + std::pair Split = Code.split('\n'); + OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n"; + + Code = Split.second; + while (!Code.empty()) { + Split = Code.split('\n'); + OS << Indent << Split.first << "\n"; + Code = Split.second; + } + return false; + } + + // Bucket the matches by the character we are comparing. + std::map > MatchesByLetter; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); + + + // If we have exactly one bucket to match, see how many characters are common + // across the whole set and match all of them at once. + if (MatchesByLetter.size() == 1) { + unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); + unsigned NumChars = FirstNonCommonLetter-CharNo; + + // Emit code to break out if the prefix doesn't match. + if (NumChars == 1) { + // Do the comparison with if (Str[1] != 'f') + // FIXME: Need to escape general characters. + OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '" + << Matches[0]->first[CharNo] << "')\n"; + OS << Indent << " break;\n"; + } else { + // Do the comparison with if memcmp(Str.data()+1, "foo", 3). + // FIXME: Need to escape general strings. + OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo + << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", " + << NumChars << "))\n"; + OS << Indent << " break;\n"; + } + + return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount); + } + + // Otherwise, we have multiple possible things, emit a switch on the + // character. + OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; + OS << Indent << "default: break;\n"; + + for (std::map >::iterator LI = + MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { + // TODO: escape hard stuff (like \n) if we ever care about it. + OS << Indent << "case '" << LI->first << "':\t // " + << LI->second.size() << " string"; + if (LI->second.size() != 1) OS << 's'; + OS << " to match.\n"; + if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1)) + OS << Indent << " break;\n"; + } + + OS << Indent << "}\n"; + return true; +} + + +/// Emit - Top level entry point. +/// +void StringMatcher::Emit(unsigned Indent) const { + // If nothing to match, just fall through. + if (Matches.empty()) return; + + // First level categorization: group strings by length. + std::map > MatchesByLength; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); + + // Output a switch statement on length and categorize the elements within each + // bin. + OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n"; + OS.indent(Indent*2+2) << "default: break;\n"; + + for (std::map >::iterator LI = + MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { + OS.indent(Indent*2+2) << "case " << LI->first << ":\t // " + << LI->second.size() + << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n"; + if (EmitStringMatcherForChar(LI->second, 0, Indent)) + OS.indent(Indent*2+4) << "break;\n"; + } + + OS.indent(Indent*2+2) << "}\n"; +} diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 04c4fc1..b9c7ff6 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -292,107 +292,78 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC, /// ProcessForeachDefs - Given a record, apply all of the variable /// values in all surrounding foreach loops, creating new records for /// each combination of values. -bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass, - SMLoc Loc) { +bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc) { + if (Loops.empty()) + return false; + // We want to instantiate a new copy of CurRec for each combination // of nested loop iterator values. We don't want top instantiate // any copies until we have values for each loop iterator. IterSet IterVals; - for (LoopVector::iterator Loop = Loops.begin(), LoopEnd = Loops.end(); - Loop != LoopEnd; - ++Loop) { - // Process this loop. - if (ProcessForeachDefs(CurRec, CurMultiClass, Loc, - IterVals, *Loop, Loop+1)) { - Error(Loc, - "Could not process loops for def " + CurRec->getNameInitAsString()); - return true; - } - } - - return false; + return ProcessForeachDefs(CurRec, Loc, IterVals); } /// ProcessForeachDefs - Given a record, a loop and a loop iterator, /// apply each of the variable values in this loop and then process /// subloops. -bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass, - SMLoc Loc, IterSet &IterVals, - ForeachLoop &CurLoop, - LoopVector::iterator NextLoop) { - Init *IterVar = CurLoop.IterVar; - ListInit *List = dynamic_cast(CurLoop.ListValue); - - if (List == 0) { - Error(Loc, "Loop list is not a list"); - return true; - } - - // Process each value. - for (int64_t i = 0; i < List->getSize(); ++i) { - Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i); - IterVals.push_back(IterRecord(IterVar, ItemVal)); - - if (IterVals.size() == Loops.size()) { - // Ok, we have all of the iterator values for this point in the - // iteration space. Instantiate a new record to reflect this - // combination of values. - Record *IterRec = new Record(*CurRec); - - // Set the iterator values now. - for (IterSet::iterator i = IterVals.begin(), iend = IterVals.end(); - i != iend; - ++i) { - VarInit *IterVar = dynamic_cast(i->IterVar); - if (IterVar == 0) { - Error(Loc, "foreach iterator is unresolved"); - return true; - } - - TypedInit *IVal = dynamic_cast(i->IterValue); - if (IVal == 0) { - Error(Loc, "foreach iterator value is untyped"); - return true; - } - - IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false)); +bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ + // Recursively build a tuple of iterator values. + if (IterVals.size() != Loops.size()) { + assert(IterVals.size() < Loops.size()); + ForeachLoop &CurLoop = Loops[IterVals.size()]; + ListInit *List = dynamic_cast(CurLoop.ListValue); + if (List == 0) { + Error(Loc, "Loop list is not a list"); + return true; + } - if (SetValue(IterRec, Loc, IterVar->getName(), - std::vector(), IVal)) { - Error(Loc, "when instantiating this def"); - return true; - } + // Process each value. + for (int64_t i = 0; i < List->getSize(); ++i) { + Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i); + IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal)); + if (ProcessForeachDefs(CurRec, Loc, IterVals)) + return true; + IterVals.pop_back(); + } + return false; + } - // Resolve it next. - IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName())); + // This is the bottom of the recursion. We have all of the iterator values + // for this point in the iteration space. Instantiate a new record to + // reflect this combination of values. + Record *IterRec = new Record(*CurRec); - // Remove it. - IterRec->removeValue(IterVar->getName()); - } + // Set the iterator values now. + for (unsigned i = 0, e = IterVals.size(); i != e; ++i) { + VarInit *IterVar = IterVals[i].IterVar; + TypedInit *IVal = dynamic_cast(IterVals[i].IterValue); + if (IVal == 0) { + Error(Loc, "foreach iterator value is untyped"); + return true; + } - if (Records.getDef(IterRec->getNameInitAsString())) { - Error(Loc, "def already exists: " + IterRec->getNameInitAsString()); - return true; - } + IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false)); - Records.addDef(IterRec); - IterRec->resolveReferences(); + if (SetValue(IterRec, Loc, IterVar->getName(), + std::vector(), IVal)) { + Error(Loc, "when instantiating this def"); + return true; } - if (NextLoop != Loops.end()) { - // Process nested loops. - if (ProcessForeachDefs(CurRec, CurMultiClass, Loc, IterVals, *NextLoop, - NextLoop+1)) { - Error(Loc, - "Could not process loops for def " + - CurRec->getNameInitAsString()); - return true; - } - } + // Resolve it next. + IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName())); - // We're done with this iterator. - IterVals.pop_back(); + // Remove it. + IterRec->removeValue(IterVar->getName()); } + + if (Records.getDef(IterRec->getNameInitAsString())) { + Error(Loc, "def already exists: " + IterRec->getNameInitAsString()); + return true; + } + + Records.addDef(IterRec); + IterRec->resolveReferences(); return false; } @@ -1726,9 +1697,11 @@ Init *TGParser::ParseDeclaration(Record *CurRec, /// the name of the declared object or a NULL Init on error. Return /// the name of the parsed initializer list through ForeachListName. /// -/// ForeachDeclaration ::= ID '=' Value +/// ForeachDeclaration ::= ID '=' '[' ValueList ']' +/// ForeachDeclaration ::= ID '=' '{' RangeList '}' +/// ForeachDeclaration ::= ID '=' RangePiece /// -Init *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) { +VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { if (Lex.getCode() != tgtok::Id) { TokError("Expected identifier in foreach declaration"); return 0; @@ -1744,26 +1717,59 @@ Init *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) { } Lex.Lex(); // Eat the '=' - // Expect a list initializer. - ForeachListValue = ParseValue(0, 0, ParseForeachMode); + RecTy *IterType = 0; + std::vector Ranges; - TypedInit *TypedList = dynamic_cast(ForeachListValue); - if (TypedList == 0) { - TokError("Value list is untyped"); - return 0; + switch (Lex.getCode()) { + default: TokError("Unknown token when expecting a range list"); return 0; + case tgtok::l_square: { // '[' ValueList ']' + Init *List = ParseSimpleValue(0, 0, ParseForeachMode); + ForeachListValue = dynamic_cast(List); + if (ForeachListValue == 0) { + TokError("Expected a Value list"); + return 0; + } + RecTy *ValueType = ForeachListValue->getType(); + ListRecTy *ListType = dynamic_cast(ValueType); + if (ListType == 0) { + TokError("Value list is not of list type"); + return 0; + } + IterType = ListType->getElementType(); + break; } - RecTy *ValueType = TypedList->getType(); - ListRecTy *ListType = dynamic_cast(ValueType); - if (ListType == 0) { - TokError("Value list is not of list type"); - return 0; + case tgtok::IntVal: { // RangePiece. + if (ParseRangePiece(Ranges)) + return 0; + break; } - RecTy *IterType = ListType->getElementType(); - VarInit *IterVar = VarInit::get(DeclName, IterType); + case tgtok::l_brace: { // '{' RangeList '}' + Lex.Lex(); // eat the '{' + Ranges = ParseRangeList(); + if (Lex.getCode() != tgtok::r_brace) { + TokError("expected '}' at end of bit range list"); + return 0; + } + Lex.Lex(); + break; + } + } - return IterVar; + if (!Ranges.empty()) { + assert(!IterType && "Type already initialized?"); + IterType = IntRecTy::get(); + std::vector Values; + for (unsigned i = 0, e = Ranges.size(); i != e; ++i) + Values.push_back(IntInit::get(Ranges[i])); + ForeachListValue = ListInit::get(Values, IterType); + } + + if (!IterType) + return 0; + + return VarInit::get(DeclName, IterType); } /// ParseTemplateArgList - Read a template argument list, which is a non-empty @@ -1932,7 +1938,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { // Parse ObjectName and make a record for it. Record *CurRec = new Record(ParseObjectName(CurMultiClass), DefLoc, Records); - if (!CurMultiClass) { + if (!CurMultiClass && Loops.empty()) { // Top-level def definition. // Ensure redefinition doesn't happen. @@ -1942,7 +1948,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { return true; } Records.addDef(CurRec); - } else { + } else if (CurMultiClass) { // Otherwise, a def inside a multiclass, add it to the multiclass. for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i) if (CurMultiClass->DefPrototypes[i]->getNameInit() @@ -1978,7 +1984,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { } } - if (ProcessForeachDefs(CurRec, CurMultiClass, DefLoc)) { + if (ProcessForeachDefs(CurRec, DefLoc)) { Error(DefLoc, "Could not process loops for def" + CurRec->getNameInitAsString()); return true; @@ -1999,8 +2005,8 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) { // Make a temporary object to record items associated with the for // loop. - Init *ListValue = 0; - Init *IterName = ParseForeachDeclaration(ListValue); + ListInit *ListValue = 0; + VarInit *IterName = ParseForeachDeclaration(ListValue); if (IterName == 0) return TokError("expected declaration in for"); @@ -2278,23 +2284,33 @@ InstantiateMulticlassDef(MultiClass &MC, Ref.Rec = DefProto; AddSubClass(CurRec, Ref); - if (DefNameString == 0) { - // We must resolve references to NAME. - if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector(), - DefmPrefix)) { - Error(DefmPrefixLoc, "Could not resolve " - + CurRec->getNameInitAsString() + ":NAME to '" - + DefmPrefix->getAsUnquotedString() + "'"); - return 0; - } + // Set the value for NAME. We don't resolve references to it 'til later, + // though, so that uses in nested multiclass names don't get + // confused. + if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector(), + DefmPrefix)) { + Error(DefmPrefixLoc, "Could not resolve " + + CurRec->getNameInitAsString() + ":NAME to '" + + DefmPrefix->getAsUnquotedString() + "'"); + return 0; + } + // If the DefNameString didn't resolve, we probably have a reference to + // NAME and need to replace it. We need to do at least this much greedily, + // otherwise nested multiclasses will end up with incorrect NAME expansions. + if (DefNameString == 0) { RecordVal *DefNameRV = CurRec->getValue("NAME"); CurRec->resolveReferencesTo(DefNameRV); } if (!CurMultiClass) { - // We do this after resolving NAME because before resolution, many - // multiclass defs will have the same name expression. If we are + // Now that we're at the top level, resolve all NAME references + // in the resultant defs that weren't in the def names themselves. + RecordVal *DefNameRV = CurRec->getValue("NAME"); + CurRec->resolveReferencesTo(DefNameRV); + + // Now that NAME references are resolved and we're at the top level of + // any multiclass expansions, add the record to the RecordKeeper. If we are // currently in a multiclass, it means this defm appears inside a // multiclass and its name won't be fully resolvable until we see // the top-level defm. Therefore, we don't add this to the diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index b8e7cb1..3d2c72c 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -45,10 +45,11 @@ namespace llvm { /// ForeachLoop - Record the iteration state associated with a for loop. /// This is used to instantiate items in the loop body. struct ForeachLoop { - Init *IterVar; - Init *ListValue; + VarInit *IterVar; + ListInit *ListValue; - ForeachLoop(Init *IVar, Init *LValue) : IterVar(IVar), ListValue(LValue) {} + ForeachLoop(VarInit *IVar, ListInit *LValue) + : IterVar(IVar), ListValue(LValue) {} }; class TGParser { @@ -113,20 +114,17 @@ private: // Semantic analysis methods. // IterRecord: Map an iterator name to a value. struct IterRecord { - Init *IterVar; + VarInit *IterVar; Init *IterValue; - IterRecord(Init *Var, Init *Val) : IterVar(Var), IterValue(Val) {} + IterRecord(VarInit *Var, Init *Val) : IterVar(Var), IterValue(Val) {} }; // IterSet: The set of all iterator values at some point in the // iteration space. typedef std::vector IterSet; - bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass, - SMLoc Loc); - bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass, - SMLoc Loc, IterSet &IterVals, ForeachLoop &CurLoop, - LoopVector::iterator NextLoop); + bool ProcessForeachDefs(Record *CurRec, SMLoc Loc); + bool ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals); private: // Parser methods. bool ParseObjectList(MultiClass *MC = 0); @@ -160,7 +158,7 @@ private: // Parser methods. bool ParseTemplateArgList(Record *CurRec); Init *ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs); - Init *ParseForeachDeclaration(Init *&ForeachListValue); + VarInit *ParseForeachDeclaration(ListInit *&ForeachListValue); SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm); SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC); diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp index 09bcc7a..7c8367a 100644 --- a/lib/TableGen/TableGenBackend.cpp +++ b/lib/TableGen/TableGenBackend.cpp @@ -1,4 +1,4 @@ -//===- TableGenBackend.cpp - Base class for TableGen Backends ---*- C++ -*-===// +//===- TableGenBackend.cpp - Utilities for TableGen Backends ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/Twine.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/TableGenBackend.h" -#include "llvm/TableGen/Record.h" using namespace llvm; -void TableGenBackend::anchor() { } - -void TableGenBackend::EmitSourceFileHeader(StringRef Desc, - raw_ostream &OS) const { - OS << "//===- TableGen'erated file -------------------------------------*-" - " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate" - "d file, do not edit!\n//\n//===------------------------------------" - "----------------------------------===//\n\n"; +static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, + StringRef Suffix) { + uint64_t Pos = OS.tell(); + OS << Prefix; + for (unsigned i = OS.tell() - Pos, e = 80 - Suffix.size(); i != e; ++i) + OS << Fill; + OS << Suffix << '\n'; } +void llvm::emitSourceFileHeader(StringRef Desc, raw_ostream &OS) { + printLine(OS, "/*===- TableGen'erated file ", '-', "*- C++ -*-===*\\"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "|* " + Desc, ' ', "*|"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "|* Automatically generated file, do not edit!", ' ', "*|"); + printLine(OS, "|*", ' ', "*|"); + printLine(OS, "\\*===", '-', "===*/"); + OS << '\n'; +} diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 9b0cb0c..69e2346 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -141,7 +141,7 @@ def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", FeatureAvoidPartialCPSR]>; class ProcNoItin Features> - : Processor; + : Processor; // V4 Processors. def : ProcNoItin<"generic", []>; @@ -204,13 +204,13 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, FeatureDSPThumb2]>; // V7a Processors. -def : Processor<"cortex-a8", CortexA8Itineraries, +def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; -def : Processor<"cortex-a9", CortexA9Itineraries, +def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; -def : Processor<"cortex-a9-mp", CortexA9Itineraries, +def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, FeatureHasRAS]>; @@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops, def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureVFP2, + FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 410790a..8536b94 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,8 +23,8 @@ #include "InstPrinter/ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" @@ -283,9 +283,16 @@ void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { } } -void ARMAsmPrinter::EmitFunctionEntryLabel() { - OutStreamer.ForceCodeRegion(); +void ARMAsmPrinter::EmitFunctionBodyEnd() { + // Make sure to terminate any constant pools that were at the end + // of the function. + if (!InConstantPool) + return; + InConstantPool = false; + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); +} +void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); OutStreamer.EmitThumbFunc(CurrentFnSym); @@ -415,7 +422,9 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); case 'a': // Print as a memory address. if (MI->getOperand(OpNum).isReg()) { O << "[" @@ -434,15 +443,18 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printOperand(MI, OpNum, O); return false; case 'y': // Print a VFP single precision register as indexed double. - // This uses the ordering of the alias table to get the first 'd' register - // that overlaps the 's' register. Also, s0 is an odd register, hence the - // odd modulus check below. if (MI->getOperand(OpNum).isReg()) { unsigned Reg = MI->getOperand(OpNum).getReg(); const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - O << ARMInstPrinter::getRegisterName(TRI->getAliasSet(Reg)[0]) << - (((Reg % 2) == 1) ? "[0]" : "[1]"); - return false; + // Find the 'd' register that has this 's' register as a sub-register, + // and determine the lane number. + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { + if (!ARM::DPRRegClass.contains(*SR)) + continue; + bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg; + O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]"); + return false; + } } return true; case 'B': // Bitwise inverse of integer or symbol without a preceding #. @@ -517,10 +529,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. + // This modifier is not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'H': // The highest-numbered register of a pair. return true; + case 'H': // The highest-numbered register of a pair. + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterClass &RC = ARM::GPRRegClass; + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); + RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. + + unsigned Reg = RC.getRegister(RegIdx); + O << ARMInstPrinter::getRegisterName(Reg); + return false; } } @@ -934,13 +959,13 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - // Tag the jump table appropriately for precise disassembly. - OutStreamer.EmitJumpTable32Region(); - // Emit a label for the jump table. MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); + // Mark the jump table as data-in-code. + OutStreamer.EmitDataRegion(MCDR_DataRegionJT32); + // Emit each entry of the table. const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector &JT = MJTI->getJumpTables(); @@ -969,6 +994,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) { OutContext); OutStreamer.EmitValue(Expr, 4); } + // Mark the end of jump table data-in-code region. + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { @@ -978,15 +1005,6 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - // Emit a label for the jump table. - if (MI->getOpcode() == ARM::t2TBB_JT) { - OutStreamer.EmitJumpTable8Region(); - } else if (MI->getOpcode() == ARM::t2TBH_JT) { - OutStreamer.EmitJumpTable16Region(); - } else { - OutStreamer.EmitJumpTable32Region(); - } - MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm()); OutStreamer.EmitLabel(JTISymbol); @@ -995,10 +1013,15 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { const std::vector &JT = MJTI->getJumpTables(); const std::vector &JTBBs = JT[JTI].MBBs; unsigned OffsetWidth = 4; - if (MI->getOpcode() == ARM::t2TBB_JT) + if (MI->getOpcode() == ARM::t2TBB_JT) { OffsetWidth = 1; - else if (MI->getOpcode() == ARM::t2TBH_JT) + // Mark the jump table as data-in-code. + OutStreamer.EmitDataRegion(MCDR_DataRegionJT8); + } else if (MI->getOpcode() == ARM::t2TBH_JT) { OffsetWidth = 2; + // Mark the jump table as data-in-code. + OutStreamer.EmitDataRegion(MCDR_DataRegionJT16); + } for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; @@ -1031,6 +1054,11 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutContext); OutStreamer.EmitValue(Expr, OffsetWidth); } + // Mark the end of jump table data-in-code region. 32-bit offsets use + // actual branch instructions here, so we don't mark those as a data-region + // at all. + if (OffsetWidth != 4) + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, @@ -1121,8 +1149,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(SrcReg == ARM::SP && "Only stack pointer as a source reg is supported"); for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; - i != NumOps; ++i) - RegList.push_back(MI->getOperand(i).getReg()); + i != NumOps; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Actually, there should never be any impdef stuff here. Skip it + // temporary to workaround PR11902. + if (MO.isImplicit()) + continue; + RegList.push_back(MO.getReg()); + } break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: @@ -1208,8 +1242,11 @@ extern cl::opt EnableARMEHABI; #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - if (MI->getOpcode() != ARM::CONSTPOOL_ENTRY) - OutStreamer.EmitCodeRegion(); + // If we just ended a constant pool, mark it as such. + if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { + OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); + InConstantPool = false; + } // Emit unwinding stuff for frame-related instructions if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) @@ -1565,9 +1602,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - // Mark the constant pool entry as data if we're not already in a data - // region. - OutStreamer.EmitDataRegion(); + // If this is the first entry of the pool, mark it. + if (!InConstantPool) { + OutStreamer.EmitDataRegion(MCDR_DataRegion); + InConstantPool = true; + } + OutStreamer.EmitLabel(GetCPISymbol(LabelId)); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index af3f75a..3555e8f5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -44,9 +44,12 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// MachineFunction. const MachineConstantPool *MCP; + /// InConstantPool - Maintain state when emitting a sequence of constant + /// pool entries so we can properly mark them as data regions. + bool InConstantPool; public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) { + : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) { Subtarget = &TM.getSubtarget(); } @@ -70,6 +73,7 @@ public: bool runOnMachineFunction(MachineFunction &F); virtual void EmitConstantPool() {} // we emit constant pools customly! + virtual void EmitFunctionBodyEnd(); virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c6280f8..057fd71 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -51,9 +51,9 @@ WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { - unsigned MLxOpc; // MLA / MLS opcode - unsigned MulOpc; // Expanded multiplication opcode - unsigned AddSubOpc; // Expanded add / sub opcode + uint16_t MLxOpc; // MLA / MLS opcode + uint16_t MulOpc; // Expanded multiplication opcode + uint16_t AddSubOpc; // Expanded add / sub opcode bool NegAcc; // True if the acc is negated before the add / sub. bool HasLane; // True if instruction has an extra "lane" operand. }; @@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI).addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. @@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, } break; case ARM::VST1q64: + case ARM::VST1d64TPseudo: + case ARM::VST1d64QPseudo: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; - case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI).addImm(16) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) @@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, } break; case ARM::VLD1q64: + case ARM::VLD1d64TPseudo: + case ARM::VLD1d64QPseudo: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1531,11 +1575,11 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { /// This will go away once we can teach tblgen how to set the optional CPSR def /// operand itself. struct AddSubFlagsOpcodePair { - unsigned PseudoOpc; - unsigned MachineOpc; + uint16_t PseudoOpc; + uint16_t MachineOpc; }; -static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { +static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::ADDSri, ARM::ADDri}, {ARM::ADDSrr, ARM::ADDrr}, {ARM::ADDSrsi, ARM::ADDrsi}, @@ -1563,14 +1607,9 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { }; unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { - static const int NPairs = - sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); - for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0], - *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) { - if (OldOpc == OpcPair->PseudoOpc) { - return OpcPair->MachineOpc; - } - } + for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i) + if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc) + return AddSubFlagsOpcodeMap[i].MachineOpc; return 0; } @@ -1742,20 +1781,33 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } +/// analyzeCompare - For a comparison instruction, return the source registers +/// in SrcReg and SrcReg2 if having two register operands, and the value it +/// compares against in CmpValue. Return true if the comparison instruction +/// can be analyzed. bool ARMBaseInstrInfo:: -AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, - int &CmpValue) const { +analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const { switch (MI->getOpcode()) { default: break; case ARM::CMPri: case ARM::t2CMPri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; + case ARM::CMPrr: + case ARM::t2CMPrr: + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = MI->getOperand(1).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; case ARM::TSTri: case ARM::t2TSTri: SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; CmpMask = MI->getOperand(1).getImm(); CmpValue = 0; return true; @@ -1793,20 +1845,67 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, return false; } -/// OptimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. -bool ARMBaseInstrInfo:: -OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { - if (CmpValue != 0) - return false; +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} + +/// isRedundantFlagInstr - check whether the first instruction, whose only +/// purpose is to update flags, can be made redundant. +/// CMPrr can be made redundant by SUBrr if the operands are the same. +/// CMPri can be made redundant by SUBri if the operands are the same. +/// This function can be extended later on. +inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg, + unsigned SrcReg2, int ImmValue, + MachineInstr *OI) { + if ((CmpI->getOpcode() == ARM::CMPrr || + CmpI->getOpcode() == ARM::t2CMPrr) && + (OI->getOpcode() == ARM::SUBrr || + OI->getOpcode() == ARM::t2SUBrr) && + ((OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) || + (OI->getOperand(1).getReg() == SrcReg2 && + OI->getOperand(2).getReg() == SrcReg))) + return true; - MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); - if (llvm::next(DI) != MRI->def_end()) - // Only support one definition. - return false; + if ((CmpI->getOpcode() == ARM::CMPri || + CmpI->getOpcode() == ARM::t2CMPri) && + (OI->getOpcode() == ARM::SUBri || + OI->getOpcode() == ARM::t2SUBri) && + OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getImm() == ImmValue) + return true; + return false; +} - MachineInstr *MI = &*DI; +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register; +/// Remove a redundant Compare instruction if an earlier instruction can set the +/// flags in the same way as Compare. +/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two +/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the +/// condition code of instructions which use the flags. +bool ARMBaseInstrInfo:: +optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const { + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { @@ -1825,32 +1924,49 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, } } - // Conservatively refuse to convert an instruction which isn't in the same BB - // as the comparison. - if (MI->getParent() != CmpInstr->getParent()) - return false; - - // Check that CPSR isn't set between the comparison instruction and the one we - // want to change. - MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); + // Get ready to iterate backward from CmpInstr. + MachineBasicBlock::iterator I = CmpInstr, E = MI, + B = CmpInstr->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; + // There are two possible candidates which can be changed to set CPSR: + // One is MI, the other is a SUB instruction. + // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). + // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). + MachineInstr *Sub = NULL; + if (SrcReg2 != 0) + // MI is not a candidate for CMPrr. + MI = NULL; + else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { + // Conservatively refuse to convert an instruction which isn't in the same + // BB as the comparison. + // For CMPri, we need to check Sub, thus we can't return here. + if (CmpInstr->getOpcode() == ARM::CMPri || + CmpInstr->getOpcode() == ARM::t2CMPri) + MI = NULL; + else + return false; + } + + // Check that CPSR isn't set between the comparison instruction and the one we + // want to change. At the same time, search for Sub. + const TargetRegisterInfo *TRI = &getRegisterInfo(); --I; for (; I != E; --I) { const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) - return false; - if (!MO.isReg()) continue; - + if (Instr.modifiesRegister(ARM::CPSR, TRI) || + Instr.readsRegister(ARM::CPSR, TRI)) // This instruction modifies or uses CPSR after the one we want to // change. We can't do this transformation. - if (MO.getReg() == ARM::CPSR) - return false; + return false; + + // Check whether CmpInstr can be made redundant by the current instruction. + if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) { + Sub = &*I; + break; } if (I == B) @@ -1858,7 +1974,13 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, return false; } - // Set the "zero" bit in CPSR. + // Return false if no candidates exist. + if (!MI && !Sub) + return false; + + // The single candidate is called MI. + if (!MI) MI = Sub; + switch (MI->getOpcode()) { default: break; case ARM::RSBrr: @@ -1894,13 +2016,17 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, case ARM::EORri: case ARM::t2EORrr: case ARM::t2EORri: { - // Scan forward for the use of CPSR, if it's a conditional code requires - // checking of V bit, then this is not safe to do. If we can't find the - // CPSR use (i.e. used in another block), then it's not safe to perform - // the optimization. + // Scan forward for the use of CPSR + // When checking against MI: if it's a conditional code requires + // checking of V bit, then this is not safe to do. + // It is safe to remove CmpInstr if CPSR is redefined or killed. + // If we are done with the basic block, we need to check whether CPSR is + // live-out. + SmallVector, 4> + OperandsToUpdate; bool isSafe = false; I = CmpInstr; - E = MI->getParent()->end(); + E = CmpInstr->getParent()->end(); while (!isSafe && ++I != E) { const MachineInstr &Instr = *I; for (unsigned IO = 0, EO = Instr.getNumOperands(); @@ -1918,28 +2044,56 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, } // Condition code is after the operand before CPSR. ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); - switch (CC) { - default: - isSafe = true; - break; - case ARMCC::VS: - case ARMCC::VC: - case ARMCC::GE: - case ARMCC::LT: - case ARMCC::GT: - case ARMCC::LE: - return false; + if (Sub) { + ARMCC::CondCodes NewCC = getSwappedCondition(CC); + if (NewCC == ARMCC::AL) + return false; + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based + // on CMP needs to be updated to be based on SUB. + // Push the condition code operands to OperandsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // operands will be modified. + if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg) + OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), + NewCC)); } + else + switch (CC) { + default: + // CPSR can be used multiple times, we should continue. + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } } } - if (!isSafe) - return false; + // If CPSR is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if (!isSafe) { + MachineBasicBlock *MBB = CmpInstr->getParent(); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(ARM::CPSR)) + return false; + } // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); CmpInstr->eraseFromParent(); + + // Modify the condition code of operands in OperandsToUpdate. + // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to + // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) + OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); return true; } } @@ -2071,9 +2225,9 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MCInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; + int ItinUOps = ItinData->getNumMicroOps(Class); + if (ItinUOps >= 0) + return ItinUOps; unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2088,7 +2242,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // // On Cortex-A8, each pair of register loads / stores can be scheduled on the // same cycle. The scheduling for the first load / store must be done - // separately by assuming the the address is not 64-bit aligned. + // separately by assuming the address is not 64-bit aligned. // // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON @@ -2147,19 +2301,19 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, return 2; // 4 registers would be issued: 2, 2. // 5 registers would be issued: 2, 2, 1. - UOps = (NumRegs / 2); + int A8UOps = (NumRegs / 2); if (NumRegs % 2) - ++UOps; - return UOps; + ++A8UOps; + return A8UOps; } else if (Subtarget.isCortexA9()) { - UOps = (NumRegs / 2); + int A9UOps = (NumRegs / 2); // If there are odd number of registers or if it's not 64-bit aligned, // then it takes an extra AGU (Address Generation Unit) cycle. if ((NumRegs % 2) || !MI->hasOneMemOperand() || (*MI->memoperands_begin())->getAlignment() < 8) - ++UOps; - return UOps; + ++A9UOps; + return A9UOps; } else { // Assume the worst. return NumRegs; @@ -2478,82 +2632,14 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, return II; } -int -ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || - DefMI->isRegSequence() || DefMI->isImplicitDef()) - return 1; - - if (!ItinData || ItinData->isEmpty()) - return DefMI->mayLoad() ? 3 : 1; - - const MCInstrDesc *DefMCID = &DefMI->getDesc(); - const MCInstrDesc *UseMCID = &UseMI->getDesc(); - const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - unsigned Reg = DefMO.getReg(); - if (Reg == ARM::CPSR) { - if (DefMI->getOpcode() == ARM::FMSTAT) { - // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) - return Subtarget.isCortexA9() ? 1 : 20; - } - - // CPSR set and branch can be paired in the same cycle. - if (UseMI->isBranch()) - return 0; - - // Otherwise it takes the instruction latency (generally one). - int Latency = getInstrLatency(ItinData, DefMI); - - // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to - // its uses. Instructions which are otherwise scheduled between them may - // incur a code size penalty (not able to use the CPSR setting 16-bit - // instructions). - if (Latency > 0 && Subtarget.isThumb2()) { - const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - --Latency; - } - return Latency; - } - - unsigned DefAlign = DefMI->hasOneMemOperand() - ? (*DefMI->memoperands_begin())->getAlignment() : 0; - unsigned UseAlign = UseMI->hasOneMemOperand() - ? (*UseMI->memoperands_begin())->getAlignment() : 0; - - unsigned DefAdj = 0; - if (DefMI->isBundle()) { - DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); - if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || - DefMI->isRegSequence() || DefMI->isImplicitDef()) - return 1; - DefMCID = &DefMI->getDesc(); - } - unsigned UseAdj = 0; - if (UseMI->isBundle()) { - unsigned NewUseIdx; - const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, - Reg, NewUseIdx, UseAdj); - if (NewUseMI) { - UseMI = NewUseMI; - UseIdx = NewUseIdx; - UseMCID = &UseMI->getDesc(); - } - } - - int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, - *UseMCID, UseIdx, UseAlign); - int Adj = DefAdj + UseAdj; - if (Adj) { - Latency -= (int)(DefAdj + UseAdj); - if (Latency < 1) - return 1; - } - - if (Latency > 1 && - (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { +/// Return the number of cycles to add to (or subtract from) the static +/// itinerary based on the def opcode and alignment. The caller will ensure that +/// adjusted latency is at least one cycle. +static int adjustDefLatency(const ARMSubtarget &Subtarget, + const MachineInstr *DefMI, + const MCInstrDesc *DefMCID, unsigned DefAlign) { + int Adjust = 0; + if (Subtarget.isCortexA8() || Subtarget.isCortexA9()) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. switch (DefMCID->getOpcode()) { @@ -2564,7 +2650,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); if (ShImm == 0 || (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) - --Latency; + --Adjust; break; } case ARM::t2LDRs: @@ -2574,13 +2660,13 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // Thumb2 mode: lsl only. unsigned ShAmt = DefMI->getOperand(3).getImm(); if (ShAmt == 0 || ShAmt == 2) - --Latency; + --Adjust; break; } } } - if (DefAlign < 8 && Subtarget.isCortexA9()) + if (DefAlign < 8 && Subtarget.isCortexA9()) { switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: @@ -2689,10 +2775,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4LNq32_UPD: // If the address is not 64-bit aligned, the latencies of these // instructions increases by one. - ++Latency; + ++Adjust; break; } + } + return Adjust; +} + + + +int +ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, + unsigned UseIdx) const { + // No operand latency. The caller may fall back to getInstrLatency. + if (!ItinData || ItinData->isEmpty()) + return -1; + + const MachineOperand &DefMO = DefMI->getOperand(DefIdx); + unsigned Reg = DefMO.getReg(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + DefMCID = &DefMI->getDesc(); + } + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) { + return 1; + } + + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (!NewUseMI) + return -1; + + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + + if (Reg == ARM::CPSR) { + if (DefMI->getOpcode() == ARM::FMSTAT) { + // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) + return Subtarget.isCortexA9() ? 1 : 20; + } + + // CPSR set and branch can be paired in the same cycle. + if (UseMI->isBranch()) + return 0; + + // Otherwise it takes the instruction latency (generally one). + unsigned Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; + } + + if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit()) + return -1; + + unsigned DefAlign = DefMI->hasOneMemOperand() + ? (*DefMI->memoperands_begin())->getAlignment() : 0; + unsigned UseAlign = UseMI->hasOneMemOperand() + ? (*UseMI->memoperands_begin())->getAlignment() : 0; + // Get the itinerary's latency if possible, and handle variable_ops. + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + // Unable to find operand latency. The caller may resort to getInstrLatency. + if (Latency < 0) + return Latency; + + // Adjust for IT block position. + int Adj = DefAdj + UseAdj; + + // Adjust for dynamic def-side opcode variants not captured by the itinerary. + Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); + if (Adj >= 0 || (int)Latency > -Adj) { + return Latency + Adj; + } + // Return the itinerary latency, which may be zero but not less than zero. return Latency; } @@ -2892,22 +3069,20 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, return 1; // If the second MI is predicated, then there is an implicit use dependency. - return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, - DepMI->getNumOperands()); + return getInstrLatency(ItinData, DefMI); } -int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost) const { +unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || MI->isImplicitDef()) return 1; - if (!ItinData || ItinData->isEmpty()) - return 1; - + // An instruction scheduler typically runs on unbundled instructions, however + // other passes may query the latency of a bundled instruction. if (MI->isBundle()) { - int Latency = 0; + unsigned Latency = 0; MachineBasicBlock::const_instr_iterator I = MI; MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); while (++I != E && I->isInsideBundle()) { @@ -2918,15 +3093,33 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &MCID = MI->getDesc(); - unsigned Class = MCID.getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) + if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; - if (UOps) - return ItinData->getStageLatency(Class); - return getNumMicroOps(ItinData, MI); + } + // Be sure to call getStageLatency for an empty itinerary in case it has a + // valid MinLatency property. + if (!ItinData) + return MI->mayLoad() ? 3 : 1; + + unsigned Class = MCID.getSchedClass(); + + // For instructions with variable uops, use uops as latency. + if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0) + return getNumMicroOps(ItinData, MI); + + // For the common case, fall back on the itinerary's latency. + unsigned Latency = ItinData->getStageLatency(Class); + + // Adjust for dynamic def-side opcode variants not captured by the itinerary. + unsigned DefAlign = MI->hasOneMemOperand() + ? (*MI->memoperands_begin())->getAlignment() : 0; + int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign); + if (Adj >= 0 || (int)Latency > -Adj) { + return Latency + Adj; + } + return Latency; } int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, @@ -2960,7 +3153,10 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, + /*FindMin=*/false); + if (Latency < 0) + Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) return false; return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 2fe8507..1a10a4a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -186,16 +186,20 @@ public: return NumCycles == 1; } - /// AnalyzeCompare - For a comparison instruction, return the source register - /// in SrcReg and the value it compares against in CmpValue. Return true if - /// the comparison instruction can be analyzed. - virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - int &CmpMask, int &CmpValue) const; - - /// OptimizeCompareInstr - Convert the instruction to set the zero flag so - /// that we can remove a "comparison with zero". - virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - int CmpMask, int CmpValue, + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const; + + /// optimizeCompareInstr - Convert the instruction to set the zero flag so + /// that we can remove a "comparison with zero"; Remove a redundant CMP + /// instruction if the flags can be updated in the same way by an earlier + /// instruction such as SUB. + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; /// FoldImmediate - 'Reg' is known to be defined by a move immediate @@ -249,8 +253,9 @@ private: const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; - int getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, unsigned *PredCost = 0) const; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = 0) const; int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3907f75..9deb96e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -62,12 +62,26 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + bool ghcCall = false; + + if (MF) { + const Function *F = MF->getFunction(); + ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + } + + if (ghcCall) { + return CSR_GHC_SaveList; + } + else { + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + } } const uint32_t* ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -257,8 +271,9 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) } const TargetRegisterClass * -ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { - return ARM::GPRRegisterClass; +ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { + return &ARM::GPRRegClass; } const TargetRegisterClass * @@ -369,7 +384,7 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, }; // We only support even/odd hints for GPR and rGPR. - if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass) + if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass) return RC->getRawAllocationOrder(MF); if (HintType == ARMRI::RegPairEven) { @@ -712,6 +727,11 @@ requiresRegisterScavenging(const MachineFunction &MF) const { } bool ARMBaseRegisterInfo:: +trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; +} + +bool ARMBaseRegisterInfo:: requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } @@ -932,7 +952,8 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MCInstrDesc &MCID = TII.get(ADDriOpc); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this)); + const MachineFunction &MF = *MBB->getParent(); + MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) .addFrameIndex(FrameIdx).addImm(Offset)); @@ -1110,7 +1131,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Must be addrmode4/6. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { - ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); + ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index af79351..da29f7e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -109,7 +109,8 @@ public: SmallVectorImpl &SubIndices, unsigned &NewSubIdx) const; - const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; const TargetRegisterClass* getCrossCopyRegClass(const TargetRegisterClass *RC) const; @@ -173,6 +174,8 @@ public: virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index b9a2512..bda1517 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[ CCDelegateTo ]>; +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for GHC +//===----------------------------------------------------------------------===// + +def CC_ARM_APCS_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, + CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim + CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> +]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts @@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, @@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, @@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; + +// GHC set of callee saved regs is empty as all those regs are +// used for passing STG regs around +// add is a workaround for not being able to compile empty list: +// def CSR_GHC : CalleeSavedRegs<()>; +def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 32ef345..e81b4cc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -254,7 +254,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); uint32_t Binary; Binary = Imm12 & 0xfff; @@ -296,7 +296,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); // Special value for #-0 @@ -352,6 +352,12 @@ namespace { void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, intptr_t JTBase = 0) const; + unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; }; } @@ -440,7 +446,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) - return getARMRegisterNumbering(MO.getReg()); + return II->getRegisterInfo().getEncodingValue(MO.getReg()); else if (MO.isImm()) return static_cast(MO.getImm()); else if (MO.isGlobal()) @@ -776,7 +782,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; // Encode the displacement. Binary |= 1 << ARMII::I_BitShift; @@ -963,7 +969,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, if (Rs) { // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } // Encode shift_imm bit[11:7]. @@ -1014,7 +1020,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; else if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); if (MCID.Opcode == ARM::MOVi16) { // Get immediate from MI. @@ -1064,7 +1070,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else { Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; ++OpIdx; @@ -1081,7 +1087,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (MO.isReg()) { // Encode register Rm. - emitWordLE(Binary | getARMRegisterNumbering(MO.getReg())); + emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); return; } @@ -1124,14 +1130,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, // Set first operand if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1158,7 +1164,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, Binary |= 1 << ARMII::I_BitShift; assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); // Set bit[3:0] to the corresponding Rm register - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); // If this instr is in scaled register offset/index instruction, set // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. @@ -1202,7 +1208,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1221,7 +1227,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // If this instr is in register offset/index encoding, set bit[3:0] // to the corresponding Rm register. if (MO2.getReg()) { - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); emitWordLE(Binary); return; } @@ -1287,7 +1293,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; - unsigned RegNum = getARMRegisterNumbering(MO.getReg()); + unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && RegNum < 16); Binary |= 0x1 << RegNum; @@ -1530,7 +1536,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) // The return register is LR. - Binary |= getARMRegisterNumbering(ARM::LR); + Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); else // otherwise, set the return register Binary |= getMachineOpValue(MI, 0); @@ -1538,11 +1544,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegisterClass->contains(RegD); - RegD = getARMRegisterNumbering(RegD); + bool isSPVFP = ARM::SPRRegClass.contains(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); if (!isSPVFP) Binary |= RegD << ARMII::RegRdShift; else { @@ -1552,11 +1559,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegisterClass->contains(RegN); - RegN = getARMRegisterNumbering(RegN); + bool isSPVFP = ARM::SPRRegClass.contains(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); if (!isSPVFP) Binary |= RegN << ARMII::RegRnShift; else { @@ -1566,11 +1574,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegisterClass->contains(RegM); - RegM = getARMRegisterNumbering(RegM); + bool isSPVFP = ARM::SPRRegClass.contains(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); if (!isSPVFP) Binary |= RegM; else { @@ -1757,28 +1766,31 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegD = getARMRegisterNumbering(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); Binary |= (RegD & 0xf) << ARMII::RegRdShift; Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; return Binary; } -static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegN = getARMRegisterNumbering(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); Binary |= (RegN & 0xf) << ARMII::RegRnShift; Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; return Binary; } -static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegM = getARMRegisterNumbering(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); Binary |= (RegM & 0xf); Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; return Binary; @@ -1812,7 +1824,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, RegNOpIdx); @@ -1841,7 +1853,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(1).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, 0); emitWordLE(Binary); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index fc35c7c..a953985 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -69,27 +69,6 @@ static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { return 0; } -/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact, -/// add padding such that: -/// -/// 1. The result is aligned to 1 << LogAlign. -/// -/// 2. No other value of the unknown bits would require more padding. -/// -/// This may add more padding than is required to satisfy just one of the -/// constraints. It is necessary to compute alignment this way to guarantee -/// that we don't underestimate the padding before an aligned block. If the -/// real padding before a block is larger than we think, constant pool entries -/// may go out of range. -static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign, - unsigned KnownBits) { - // Add the worst possible padding that the unknown bits could cause. - Offset += UnknownPadding(LogAlign, KnownBits); - - // Then align the result. - return RoundUpToAlignment(Offset, 1u << LogAlign); -} - namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -109,7 +88,12 @@ namespace { /// Offset - Distance from the beginning of the function to the beginning /// of this basic block. /// - /// The offset is always aligned as required by the basic block. + /// Offsets are computed assuming worst case padding before an aligned + /// block. This means that subtracting basic block offsets always gives a + /// conservative estimate of the real distance which may be smaller. + /// + /// Because worst case padding is used, the computed offset of an aligned + /// block may not actually be aligned. unsigned Offset; /// Size - Size of the basic block in bytes. If the block contains @@ -140,7 +124,12 @@ namespace { /// This number should be used to predict worst case padding when /// splitting the block. unsigned internalKnownBits() const { - return Unalign ? Unalign : KnownBits; + unsigned Bits = Unalign ? Unalign : KnownBits; + // If the block size isn't a multiple of the known bits, assume the + // worst case padding. + if (Size & ((1u << Bits) - 1)) + Bits = CountTrailingZeros_32(Size); + return Bits; } /// Compute the offset immediately following this block. If LogAlign is @@ -152,7 +141,7 @@ namespace { if (!LA) return PO; // Add alignment padding from the terminator. - return WorstCaseAlign(PO, LA, internalKnownBits()); + return PO + UnknownPadding(LA, internalKnownBits()); } /// Compute the number of known low bits of postOffset. If this block @@ -342,9 +331,7 @@ void ARMConstantIslands::verify() { for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; - unsigned Align = MBB->getAlignment(); unsigned MBBId = MBB->getNumber(); - assert(BBInfo[MBBId].Offset % (1u << Align) == 0); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); @@ -428,7 +415,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // ARM and Thumb2 functions need to be 4-byte aligned. if (!isThumb1) - MF->EnsureAlignment(2); // 2 = log2(4) + MF->ensureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. @@ -529,7 +516,7 @@ ARMConstantIslands::doInitialPlacement(std::vector &CPEMIs) { // The function needs to be as aligned as the basic blocks. The linker may // move functions around based on their alignment. - MF->EnsureAlignment(BB->getAlignment()); + MF->ensureAlignment(BB->getAlignment()); // Order the entries in BB by descending alignment. That ensures correct // alignment of all entries as long as BB is sufficiently aligned. Keep @@ -828,7 +815,7 @@ void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { // tBR_JTr contains a .align 2 directive. if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { BBI.PostAlign = 2; - MBB->getParent()->EnsureAlignment(2); + MBB->getParent()->ensureAlignment(2); } } @@ -1045,7 +1032,6 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = getOffsetOf(CPEMI); - assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { DEBUG({ @@ -1256,11 +1242,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (BBHasFallthrough(UserMBB)) { // Size of branch to insert. unsigned Delta = isThumb1 ? 2 : 4; - // End of UserBlock after adding a branch. - unsigned UserBlockEnd = UserBBI.postOffset() + Delta; // Compute the offset where the CPE will begin. - unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, - UserBBI.postKnownBits()); + unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() @@ -1299,20 +1282,16 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // up the insertion point. // Try to split the block so it's fully aligned. Compute the latest split - // point where we can add a 4-byte branch instruction, and then - // WorstCaseAlign to LogAlign. + // point where we can add a 4-byte branch instruction, and then align to + // LogAlign which is the largest possible alignment in the function. unsigned LogAlign = MF->getAlignment(); assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); unsigned KnownBits = UserBBI.internalKnownBits(); unsigned UPad = UnknownPadding(LogAlign, KnownBits); - unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); + unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad; DEBUG(dbgs() << format("Split in middle of big block before %#x", BaseInsertOffset)); - // Account for alignment and unknown padding. - BaseInsertOffset &= ~((1u << LogAlign) - 1); - BaseInsertOffset -= UPad; - // The 4 in the following is for the unconditional branch we'll be inserting // (allows for long branch on Thumb1). Alignment of the island is handled // inside isOffsetInRange. @@ -1327,11 +1306,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // pool entries following this block; only the last one is in the water list. // Back past any possible branches (allow for a conditional and a maximally // long unconditional). - if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset) - BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset - - (isThumb1 ? 6 : 8); - unsigned EndInsertOffset = - WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) + + if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { + BaseInsertOffset = UserBBI.postOffset() - UPad - 8; + DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); + } + unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + CPEMI->getOperand(2).getImm(); MachineBasicBlock::iterator MI = UserMI; ++MI; @@ -1342,6 +1321,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), MI = llvm::next(MI)) { + assert(MI != UserMBB->end() && "Fell off end of block"); if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!isOffsetInRange(Offset, EndInsertOffset, U)) { @@ -1353,9 +1333,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // reused within the block, but it doesn't matter much. Also assume CPEs // are added in order with alignment padding. We may eventually be able // to pack the aligned CPEs better. - EndInsertOffset = RoundUpToAlignment(EndInsertOffset, - 1u << getCPELogAlign(U.CPEMI)) + - U.CPEMI->getOperand(2).getImm(); + EndInsertOffset += U.CPEMI->getOperand(2).getImm(); CPUIndex++; } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 5fc0360..15bb32e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -459,22 +459,23 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); + bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0); + MIB.addReg(D0, getUndefRegState(SrcIsUndef)); if (NumRegs > 1 && TableEntry->copyAllListRegs) - MIB.addReg(D1); + MIB.addReg(D1, getUndefRegState(SrcIsUndef)); if (NumRegs > 2 && TableEntry->copyAllListRegs) - MIB.addReg(D2); + MIB.addReg(D2, getUndefRegState(SrcIsUndef)); if (NumRegs > 3 && TableEntry->copyAllListRegs) - MIB.addReg(D3); + MIB.addReg(D3, getUndefRegState(SrcIsUndef)); // Copy the predicate operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) // Add an implicit kill for the super-reg. + if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); @@ -925,7 +926,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (isARM) { AddDefaultPred(MIB3); if (Opcode == ARM::MOV_ga_pcrel_ldr) - MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); @@ -1008,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; + unsigned Lane = TRI->getEncodingValue(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2e1eaca..57f8116 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -47,11 +47,6 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt -DisableARMFastISel("disable-arm-fast-isel", - cl::desc("Turn off experimental ARM fast-isel support"), - cl::init(false), cl::Hidden); - extern cl::opt EnableARMLongCalls; namespace { @@ -92,8 +87,9 @@ class ARMFastISel : public FastISel { LLVMContext *Context; public: - explicit ARMFastISel(FunctionLoweringInfo &funcInfo) - : FastISel(funcInfo), + explicit ARMFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()) { @@ -172,6 +168,7 @@ class ARMFastISel : public FastISel { bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); + bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); // Utility routines. private: @@ -182,7 +179,6 @@ class ARMFastISel : public FastISel { bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -195,21 +191,25 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); - unsigned ARMSelectCallOp(const GlobalValue *GV); + unsigned ARMSelectCallOp(bool UseReg); // Call handling routines. private: - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, + bool Return, + bool isVarArg); bool ProcessCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, SmallVectorImpl &RegArgs, CallingConv::ID CC, - unsigned &NumBytes); + unsigned &NumBytes, + bool isVarArg); + unsigned getLibcallReg(const Twine &Name); bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes); + unsigned &NumBytes, bool isVarArg); bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); // OptionalDef handling routines. @@ -719,7 +719,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; MVT VT; - if (!isLoadTypeLegal(AI->getType(), VT)) return false; + if (!isLoadTypeLegal(AI->getType(), VT)) return 0; DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); @@ -910,8 +910,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass - : ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -1005,7 +1006,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, useAM3 = true; } } - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; break; case MVT::i16: if (isThumb2) { @@ -1017,7 +1018,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, Opc = isZExt ? ARM::LDRH : ARM::LDRSH; useAM3 = true; } - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; break; case MVT::i32: if (isThumb2) { @@ -1028,7 +1029,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, } else { Opc = ARM::LDRi12; } - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1037,7 +1038,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; } else { Opc = ARM::VLDRS; RC = TLI.getRegClassFor(VT); @@ -1106,8 +1107,9 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { - unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass); + unsigned Res = createResultReg(isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) @@ -1358,7 +1360,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) .addReg(AddrReg)); - return true; + return true; } bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -1423,12 +1425,12 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (!UseImm) CmpOpc = ARM::t2CMPrr; else - CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; + CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri; } else { if (!UseImm) CmpOpc = ARM::CMPrr; else - CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; + CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri; } break; } @@ -1491,8 +1493,9 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass - : ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? + (const TargetRegisterClass*)&ARM::rGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); @@ -1516,7 +1519,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { unsigned Op = getRegForValue(V); if (Op == 0) return false; - unsigned Result = createResultReg(ARM::DPRRegisterClass); + unsigned Result = createResultReg(&ARM::DPRRegClass); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VCVTDS), Result) .addReg(Op)); @@ -1535,7 +1538,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { unsigned Op = getRegForValue(V); if (Op == 0) return false; - unsigned Result = createResultReg(ARM::SPRRegisterClass); + unsigned Result = createResultReg(&ARM::SPRRegClass); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VCVTSD), Result) .addReg(Op)); @@ -1736,7 +1739,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { // type and the target independent selector doesn't know how to handle it. if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; - + unsigned Opc; switch (ISDOpcode) { default: return false; @@ -1809,10 +1812,11 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code -// This is largely taken directly from CCAssignFnForNode - we don't support -// varargs in FastISel so that part has been removed. +// This is largely taken directly from CCAssignFnForNode // TODO: We may not support all of this. -CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { +CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, + bool Return, + bool isVarArg) { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); @@ -1825,18 +1829,26 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - TM.Options.FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard && !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); } else return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + if (!isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + // Fall through to soft float variant, variadic functions don't + // use hard floating point ABI. case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::GHC: + if (Return) + llvm_unreachable("Can't return in GHC call convention"); + else + return CC_ARM_APCS_GHC; } } @@ -1846,10 +1858,12 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, SmallVectorImpl &ArgFlags, SmallVectorImpl &RegArgs, CallingConv::ID CC, - unsigned &NumBytes) { + unsigned &NumBytes, + bool isVarArg) { SmallVector ArgLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, + CCAssignFnForCall(CC, false, isVarArg)); // Check that we can handle all of the arguments. If we can't, then bail out // now before we add code to the MBB. @@ -1981,7 +1995,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes) { + unsigned &NumBytes, bool isVarArg) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -1991,8 +2005,8 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, // Now the return value. if (RetVT != MVT::isVoid) { SmallVector RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); // Copy all of the result registers out of their specified physreg. if (RVLocs.size() == 2 && RetVT == MVT::f64) { @@ -2041,9 +2055,6 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; - if (F.isVarArg()) - return false; - CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector Outs; @@ -2053,7 +2064,8 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); - CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */, + F.isVarArg())); const Value *RV = Ret->getOperand(0); unsigned Reg = getRegForValue(RV); @@ -2110,12 +2122,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } -unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - if (isThumb2) { - return ARM::tBL; - } else { - return ARM::BL; - } +unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { + if (UseReg) + return isThumb2 ? ARM::tBLXr : ARM::BLX; + else + return isThumb2 ? ARM::tBL : ARM::BL; +} + +unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, + GlobalValue::ExternalLinkage, 0, Name); + return ARMMaterializeGV(GV, TLI.getValueType(GV->getType())); } // A quick function that will emit a call for a named libcall in F with the @@ -2136,8 +2153,14 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // TODO: For now if we have long calls specified we don't handle the call. - if (EnableARMLongCalls) return false; + // Can't handle non-double multi-reg retvals. + if (RetVT != MVT::isVoid && RetVT != MVT::i32) { + SmallVector RVLocs; + CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false)); + if (RVLocs.size() >= 2 && RetVT != MVT::f64) + return false; + } // Set up the argument vectors. SmallVector Args; @@ -2170,23 +2193,36 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Handle the arguments now that we've gotten them. SmallVector RegArgs; unsigned NumBytes; - if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, false)) return false; + unsigned CalleeReg = 0; + if (EnableARMLongCalls) { + CalleeReg = getLibcallReg(TLI.getLibcallName(Call)); + if (CalleeReg == 0) return false; + } + // Issue the call. - MachineInstrBuilder MIB; - unsigned CallOpc = ARMSelectCallOp(NULL); - if (isThumb2) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addExternalSymbol(TLI.getLibcallName(Call)); - else + unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(CallOpc)); + if (isThumb2) { // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addExternalSymbol(TLI.getLibcallName(Call))); + AddDefaultPred(MIB); + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); + } else { + if (EnableARMLongCalls) + MIB.addReg(CalleeReg); + else + MIB.addExternalSymbol(TLI.getLibcallName(Call)); + // Explicitly adding the predicate here. + AddDefaultPred(MIB); + } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2197,7 +2233,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Finish off the call including any return values. SmallVector UsedRegs; - if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; + if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false; // Set all unused physreg defs as dead. static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); @@ -2213,22 +2249,15 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Can't handle inline asm. if (isa(Callee)) return false; - // Only handle global variable Callees. - const GlobalValue *GV = dyn_cast(Callee); - if (!GV) - return false; - // Check the calling convention. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); // TODO: Avoid some calling conventions? - // Let SDISel handle vararg functions. PointerType *PT = cast(CS.getCalledValue()->getType()); FunctionType *FTy = cast(PT->getElementType()); - if (FTy->isVarArg()) - return false; + bool isVarArg = FTy->isVarArg(); // Handle *simple* calls for now. Type *RetTy = I->getType(); @@ -2239,8 +2268,15 @@ bool ARMFastISel::SelectCall(const Instruction *I, RetVT != MVT::i8 && RetVT != MVT::i1) return false; - // TODO: For now if we have long calls specified we don't handle the call. - if (EnableARMLongCalls) return false; + // Can't handle non-double multi-reg retvals. + if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 && + RetVT != MVT::i16 && RetVT != MVT::i32) { + SmallVector RVLocs; + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); + if (RVLocs.size() >= 2 && RetVT != MVT::f64) + return false; + } // Set up the argument vectors. SmallVector Args; @@ -2295,33 +2331,49 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Handle the arguments now that we've gotten them. SmallVector RegArgs; unsigned NumBytes; - if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, + RegArgs, CC, NumBytes, isVarArg)) return false; + bool UseReg = false; + const GlobalValue *GV = dyn_cast(Callee); + if (!GV || EnableARMLongCalls) UseReg = true; + + unsigned CalleeReg = 0; + if (UseReg) { + if (IntrMemName) + CalleeReg = getLibcallReg(IntrMemName); + else + CalleeReg = getRegForValue(Callee); + + if (CalleeReg == 0) return false; + } + // Issue the call. - MachineInstrBuilder MIB; - unsigned CallOpc = ARMSelectCallOp(GV); - // Explicitly adding the predicate here. + unsigned CallOpc = ARMSelectCallOp(UseReg); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(CallOpc)); if(isThumb2) { // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))); - if (!IntrMemName) + AddDefaultPred(MIB); + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) MIB.addGlobalAddress(GV, 0, 0); - else + else MIB.addExternalSymbol(IntrMemName, 0); } else { - if (!IntrMemName) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); + if (UseReg) + MIB.addReg(CalleeReg); + else if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); else - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addExternalSymbol(IntrMemName, 0)); + MIB.addExternalSymbol(IntrMemName, 0); + + // Explicitly adding the predicate here. + AddDefaultPred(MIB); } - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2332,7 +2384,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Finish off the call including any return values. SmallVector UsedRegs; - if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; + if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg)) + return false; // Set all unused physreg defs as dead. static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); @@ -2383,6 +2436,42 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::frameaddress: { + MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + unsigned LdrOpc; + const TargetRegisterClass *RC; + if (isThumb2) { + LdrOpc = ARM::t2LDRi12; + RC = (const TargetRegisterClass*)&ARM::tGPRRegClass; + } else { + LdrOpc = ARM::LDRi12; + RC = (const TargetRegisterClass*)&ARM::GPRRegClass; + } + + const ARMBaseRegisterInfo *RegInfo = + static_cast(TM.getRegisterInfo()); + unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); + unsigned SrcReg = FramePtr; + + // Recursively load frame address + // ldr r0 [fp] + // ldr r0 [r0] + // ldr r0 [r0] + // ... + unsigned DestReg; + unsigned Depth = cast(I.getOperand(0))->getZExtValue(); + while (Depth--) { + DestReg = createResultReg(RC); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(LdrOpc), DestReg) + .addReg(SrcReg).addImm(0)); + SrcReg = DestReg; + } + UpdateValueMap(&I, SrcReg); + return true; + } case Intrinsic::memcpy: case Intrinsic::memmove: { const MemTransferInst &MTI = cast(I); @@ -2406,10 +2495,10 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return true; } } - + if (!MTI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) return false; @@ -2421,20 +2510,24 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // Don't handle volatile. if (MSI.isVolatile()) return false; - + if (!MSI.getLength()->getType()->isIntegerTy(32)) return false; - + if (MSI.getDestAddressSpace() > 255) return false; - + return SelectCall(&I, "memset"); } + case Intrinsic::trap: { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP)); + return true; + } } } bool ARMFastISel::SelectTrunc(const Instruction *I) { - // The high bits for a type smaller than the register size are assumed to be + // The high bits for a type smaller than the register size are assumed to be // undefined. Value *Op = I->getOperand(0); @@ -2522,6 +2615,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { return true; } +bool ARMFastISel::SelectShift(const Instruction *I, + ARM_AM::ShiftOpc ShiftTy) { + // We handle thumb2 mode by target independent selector + // or SelectionDAG ISel. + if (isThumb2) + return false; + + // Only handle i32 now. + EVT DestVT = TLI.getValueType(I->getType(), true); + if (DestVT != MVT::i32) + return false; + + unsigned Opc = ARM::MOVsr; + unsigned ShiftImm; + Value *Src2Value = I->getOperand(1); + if (const ConstantInt *CI = dyn_cast(Src2Value)) { + ShiftImm = CI->getZExtValue(); + + // Fall back to selection DAG isel if the shift amount + // is zero or greater than the width of the value type. + if (ShiftImm == 0 || ShiftImm >=32) + return false; + + Opc = ARM::MOVsi; + } + + Value *Src1Value = I->getOperand(0); + unsigned Reg1 = getRegForValue(Src1Value); + if (Reg1 == 0) return false; + + unsigned Reg2; + if (Opc == ARM::MOVsr) { + Reg2 = getRegForValue(Src2Value); + if (Reg2 == 0) return false; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + if(ResultReg == 0) return false; + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg1); + + if (Opc == ARM::MOVsi) + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm)); + else if (Opc == ARM::MOVsr) { + MIB.addReg(Reg2); + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0)); + } + + AddOptionalDefs(MIB); + UpdateValueMap(I, ResultReg); + return true; +} + // TODO: SoftFP support. bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { @@ -2582,6 +2730,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); + case Instruction::Shl: + return SelectShift(I, ARM_AM::lsl); + case Instruction::LShr: + return SelectShift(I, ARM_AM::lsr); + case Instruction::AShr: + return SelectShift(I, ARM_AM::asr); default: break; } return false; @@ -2625,7 +2779,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; - + unsigned ResultReg = MI->getOperand(0).getReg(); if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; @@ -2634,15 +2788,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() && - !DisableARMFastISel) - return new ARMFastISel(funcInfo); + if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + return new ARMFastISel(funcInfo, libInfo); return 0; } } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 402ecb0..aee72d2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -15,6 +15,8 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, @@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); @@ -790,7 +800,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // The writeback is only needed when emitting two vst1.64 instructions. if (NumAlignedDPRCS2Regs >= 6) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QQPRRegisterClass); + &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4) @@ -808,7 +818,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // 16-byte aligned vst1.64 with 4 d-regs, no writeback. if (NumAlignedDPRCS2Regs >= 4) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QQPRRegisterClass); + &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) .addReg(ARM::R4).addImm(16).addReg(NextReg) @@ -820,7 +830,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // 16-byte aligned vst1.64 with 2 d-regs. if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QPRRegisterClass); + &ARM::QPRRegClass); MBB.addLiveIn(SupReg); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) .addReg(ARM::R4).addImm(16).addReg(SupReg)); @@ -908,7 +918,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, // 16-byte aligned vld1.64 with 4 d-regs and writeback. if (NumAlignedDPRCS2Regs >= 6) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QQPRRegisterClass); + &ARM::QQPRRegClass); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) .addReg(ARM::R4, RegState::Define) .addReg(ARM::R4, RegState::Kill).addImm(16) @@ -924,7 +934,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, // 16-byte aligned vld1.64 with 4 d-regs, no writeback. if (NumAlignedDPRCS2Regs >= 4) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QQPRRegisterClass); + &ARM::QQPRRegClass); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) .addReg(ARM::R4).addImm(16) .addReg(SupReg, RegState::ImplicitDefine)); @@ -935,7 +945,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, // 16-byte aligned vld1.64 with 2 d-regs. if (NumAlignedDPRCS2Regs >= 2) { unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, - ARM::QPRRegisterClass); + &ARM::QPRRegClass); AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) .addReg(ARM::R4).addImm(16)); NextReg += 2; @@ -1244,7 +1254,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, CanEliminateFrame = false; } - if (!ARM::GPRRegisterClass->contains(Reg)) + if (!ARM::GPRRegClass.contains(Reg)) continue; if (Spilled) { @@ -1404,7 +1414,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } else if (!AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. - const TargetRegisterClass *RC = ARM::GPRRegisterClass; + const TargetRegisterClass *RC = &ARM::GPRRegClass; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1eafbbc..ee349a7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); -static cl::opt -DisableARMIntABS("disable-arm-int-abs", cl::Hidden, - cl::desc("Enable / disable ARM integer abs transform"), - cl::init(false)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -210,29 +205,29 @@ private: /// loads of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, - unsigned *QOpcodes0, unsigned *QOpcodes1); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); /// SelectVST - Select NEON store intrinsics. NumVecs should /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// stores of D registers and even subregs and odd subregs of Q registers. /// For NumVecs <= 2, QOpcodes1 is not used. SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, - unsigned *QOpcodes0, unsigned *QOpcodes1); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, const uint16_t *QOpcodes1); /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// load/store of D registers and Q registers. SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, unsigned *QOpcodes); + const uint16_t *DOpcodes, const uint16_t *QOpcodes); /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs /// should be 2, 3 or 4. The opcode array specifies the instructions used /// for loading D registers. (Q registers are not supported.) SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - unsigned *Opcodes); + const uint16_t *Opcodes); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be @@ -583,8 +578,6 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, } - - //----- AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, @@ -1597,8 +1590,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { } SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, unsigned *QOpcodes0, - unsigned *QOpcodes1) { + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); @@ -1729,8 +1723,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, } SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, unsigned *QOpcodes0, - unsigned *QOpcodes1) { + const uint16_t *DOpcodes, + const uint16_t *QOpcodes0, + const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); @@ -1875,8 +1870,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, unsigned NumVecs, - unsigned *DOpcodes, - unsigned *QOpcodes) { + const uint16_t *DOpcodes, + const uint16_t *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); @@ -1994,7 +1989,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, - unsigned NumVecs, unsigned *Opcodes) { + unsigned NumVecs, + const uint16_t *Opcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); @@ -2491,14 +2487,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ SDValue XORSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (DisableARMIntABS) - return NULL; - if (Subtarget->isThumb1Only()) return NULL; - if (XORSrc0.getOpcode() != ISD::ADD || - XORSrc1.getOpcode() != ISD::SRA) + if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) return NULL; SDValue ADDSrc0 = XORSrc0.getOperand(0); @@ -2509,16 +2501,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && - ADDSrc0 == SRASrc0 && - XType.isInteger() && - SRAConstant != NULL && + if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && + XType.isInteger() && SRAConstant != NULL && Size == SRAConstant->getZExtValue()) { - - unsigned Opcode = ARM::ABS; - if (Subtarget->isThumb2()) - Opcode = ARM::t2ABS; - + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } @@ -2893,176 +2879,199 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP: { - unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, - ARM::VLD2DUPd32 }; + static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, + ARM::VLD2DUPd32 }; return SelectVLDDup(N, false, 2, Opcodes); } case ARMISD::VLD3DUP: { - unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo, - ARM::VLD3DUPd32Pseudo }; + static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, + ARM::VLD3DUPd16Pseudo, + ARM::VLD3DUPd32Pseudo }; return SelectVLDDup(N, false, 3, Opcodes); } case ARMISD::VLD4DUP: { - unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo, - ARM::VLD4DUPd32Pseudo }; + static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, + ARM::VLD4DUPd16Pseudo, + ARM::VLD4DUPd32Pseudo }; return SelectVLDDup(N, false, 4, Opcodes); } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, - ARM::VLD2DUPd32wb_fixed }; + static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, + ARM::VLD2DUPd16wb_fixed, + ARM::VLD2DUPd32wb_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } case ARMISD::VLD3DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD, - ARM::VLD3DUPd32Pseudo_UPD }; + static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, + ARM::VLD3DUPd16Pseudo_UPD, + ARM::VLD3DUPd32Pseudo_UPD }; return SelectVLDDup(N, true, 3, Opcodes); } case ARMISD::VLD4DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD, - ARM::VLD4DUPd32Pseudo_UPD }; + static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, + ARM::VLD4DUPd16Pseudo_UPD, + ARM::VLD4DUPd32Pseudo_UPD }; return SelectVLDDup(N, true, 4, Opcodes); } case ARMISD::VLD1_UPD: { - unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, - ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed, - ARM::VLD1q16wb_fixed, - ARM::VLD1q32wb_fixed, - ARM::VLD1q64wb_fixed }; + static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, + ARM::VLD1d16wb_fixed, + ARM::VLD1d32wb_fixed, + ARM::VLD1d64wb_fixed }; + static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, + ARM::VLD1q16wb_fixed, + ARM::VLD1q32wb_fixed, + ARM::VLD1q64wb_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed, - ARM::VLD2d16wb_fixed, - ARM::VLD2d32wb_fixed, - ARM::VLD1q64wb_fixed}; - unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, - ARM::VLD2q16PseudoWB_fixed, - ARM::VLD2q32PseudoWB_fixed }; + static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed, + ARM::VLD2d16wb_fixed, + ARM::VLD2d32wb_fixed, + ARM::VLD1q64wb_fixed}; + static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VLD3_UPD: { - unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; - unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, - ARM::VLD3q16Pseudo_UPD, - ARM::VLD3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, - ARM::VLD3q16oddPseudo_UPD, - ARM::VLD3q32oddPseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, + ARM::VLD3d16Pseudo_UPD, + ARM::VLD3d32Pseudo_UPD, + ARM::VLD1q64wb_fixed}; + static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, + ARM::VLD3q16oddPseudo_UPD, + ARM::VLD3q32oddPseudo_UPD }; return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case ARMISD::VLD4_UPD: { - unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; - unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, - ARM::VLD4q16Pseudo_UPD, - ARM::VLD4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, - ARM::VLD4q16oddPseudo_UPD, - ARM::VLD4q32oddPseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, + ARM::VLD4d16Pseudo_UPD, + ARM::VLD4d32Pseudo_UPD, + ARM::VLD1q64wb_fixed}; + static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD, + ARM::VLD4q16oddPseudo_UPD, + ARM::VLD4q32oddPseudo_UPD }; return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } case ARMISD::VLD2LN_UPD: { - unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD, - ARM::VLD2LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, - ARM::VLD2LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, + ARM::VLD2LNd16Pseudo_UPD, + ARM::VLD2LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, + ARM::VLD2LNq32Pseudo_UPD }; return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); } case ARMISD::VLD3LN_UPD: { - unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD, - ARM::VLD3LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, - ARM::VLD3LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, + ARM::VLD3LNd16Pseudo_UPD, + ARM::VLD3LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, + ARM::VLD3LNq32Pseudo_UPD }; return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); } case ARMISD::VLD4LN_UPD: { - unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD, - ARM::VLD4LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, - ARM::VLD4LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, + ARM::VLD4LNd16Pseudo_UPD, + ARM::VLD4LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, + ARM::VLD4LNq32Pseudo_UPD }; return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); } case ARMISD::VST1_UPD: { - unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, - ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; - unsigned QOpcodes[] = { ARM::VST1q8wb_fixed, - ARM::VST1q16wb_fixed, - ARM::VST1q32wb_fixed, - ARM::VST1q64wb_fixed }; + static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, + ARM::VST1d16wb_fixed, + ARM::VST1d32wb_fixed, + ARM::VST1d64wb_fixed }; + static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, + ARM::VST1q16wb_fixed, + ARM::VST1q32wb_fixed, + ARM::VST1q64wb_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8wb_fixed, - ARM::VST2d16wb_fixed, - ARM::VST2d32wb_fixed, - ARM::VST1q64wb_fixed}; - unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, - ARM::VST2q16PseudoWB_fixed, - ARM::VST2q32PseudoWB_fixed }; + static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed, + ARM::VST2d16wb_fixed, + ARM::VST2d32wb_fixed, + ARM::VST1q64wb_fixed}; + static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { - unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; - unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, - ARM::VST3q16Pseudo_UPD, - ARM::VST3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, - ARM::VST3q16oddPseudo_UPD, - ARM::VST3q32oddPseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, + ARM::VST3d16Pseudo_UPD, + ARM::VST3d32Pseudo_UPD, + ARM::VST1d64TPseudoWB_fixed}; + static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, + ARM::VST3q16oddPseudo_UPD, + ARM::VST3q32oddPseudo_UPD }; return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case ARMISD::VST4_UPD: { - unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; - unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, - ARM::VST4q16Pseudo_UPD, - ARM::VST4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, - ARM::VST4q16oddPseudo_UPD, - ARM::VST4q32oddPseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD, + ARM::VST4d16Pseudo_UPD, + ARM::VST4d32Pseudo_UPD, + ARM::VST1d64QPseudoWB_fixed}; + static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD, + ARM::VST4q16oddPseudo_UPD, + ARM::VST4q32oddPseudo_UPD }; return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } case ARMISD::VST2LN_UPD: { - unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD, - ARM::VST2LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, - ARM::VST2LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, + ARM::VST2LNd16Pseudo_UPD, + ARM::VST2LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, + ARM::VST2LNq32Pseudo_UPD }; return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); } case ARMISD::VST3LN_UPD: { - unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD, - ARM::VST3LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, - ARM::VST3LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, + ARM::VST3LNd16Pseudo_UPD, + ARM::VST3LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, + ARM::VST3LNq32Pseudo_UPD }; return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); } case ARMISD::VST4LN_UPD: { - unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD, - ARM::VST4LNd32Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, - ARM::VST4LNq32Pseudo_UPD }; + static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, + ARM::VST4LNd16Pseudo_UPD, + ARM::VST4LNd32Pseudo_UPD }; + static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, + ARM::VST4LNq32Pseudo_UPD }; return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); } @@ -3179,124 +3188,144 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_neon_vld1: { - unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, - ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, - ARM::VLD1q32, ARM::VLD1q64}; + static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, + ARM::VLD1d32, ARM::VLD1d64 }; + static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64}; return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, - ARM::VLD2d32, ARM::VLD1q64 }; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, - ARM::VLD2q32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD1q64 }; + static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, + ARM::VLD2q32Pseudo }; return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld3: { - unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo, - ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo }; - unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, - ARM::VLD3q16Pseudo_UPD, - ARM::VLD3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo, - ARM::VLD3q16oddPseudo, - ARM::VLD3q32oddPseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, + ARM::VLD3d16Pseudo, + ARM::VLD3d32Pseudo, + ARM::VLD1d64TPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, + ARM::VLD3q16Pseudo_UPD, + ARM::VLD3q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, + ARM::VLD3q16oddPseudo, + ARM::VLD3q32oddPseudo }; return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { - unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo, - ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo }; - unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, - ARM::VLD4q16Pseudo_UPD, - ARM::VLD4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo, - ARM::VLD4q16oddPseudo, - ARM::VLD4q32oddPseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, + ARM::VLD4d16Pseudo, + ARM::VLD4d32Pseudo, + ARM::VLD1d64QPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, + ARM::VLD4q16Pseudo_UPD, + ARM::VLD4q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, + ARM::VLD4q16oddPseudo, + ARM::VLD4q32oddPseudo }; return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { - unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo, - ARM::VLD2LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, + ARM::VLD2LNd16Pseudo, + ARM::VLD2LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, + ARM::VLD2LNq32Pseudo }; return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vld3lane: { - unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo, - ARM::VLD3LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, + ARM::VLD3LNd16Pseudo, + ARM::VLD3LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, + ARM::VLD3LNq32Pseudo }; return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vld4lane: { - unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo, - ARM::VLD4LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, + ARM::VLD4LNd16Pseudo, + ARM::VLD4LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, + ARM::VLD4LNq32Pseudo }; return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst1: { - unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, - ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, - ARM::VST1q32, ARM::VST1q64 }; + static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, + ARM::VST1d32, ARM::VST1d64 }; + static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, - ARM::VST2d32, ARM::VST1q64 }; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, - ARM::VST2q32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST1q64 }; + static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, + ARM::VST2q32Pseudo }; return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { - unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo, - ARM::VST3d32Pseudo, ARM::VST1d64TPseudo }; - unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, - ARM::VST3q16Pseudo_UPD, - ARM::VST3q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo, - ARM::VST3q16oddPseudo, - ARM::VST3q32oddPseudo }; + static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, + ARM::VST3d16Pseudo, + ARM::VST3d32Pseudo, + ARM::VST1d64TPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, + ARM::VST3q16Pseudo_UPD, + ARM::VST3q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, + ARM::VST3q16oddPseudo, + ARM::VST3q32oddPseudo }; return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { - unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo, - ARM::VST4d32Pseudo, ARM::VST1d64QPseudo }; - unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, - ARM::VST4q16Pseudo_UPD, - ARM::VST4q32Pseudo_UPD }; - unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo, - ARM::VST4q16oddPseudo, - ARM::VST4q32oddPseudo }; + static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, + ARM::VST4d16Pseudo, + ARM::VST4d32Pseudo, + ARM::VST1d64QPseudo }; + static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, + ARM::VST4q16Pseudo_UPD, + ARM::VST4q32Pseudo_UPD }; + static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, + ARM::VST4q16oddPseudo, + ARM::VST4q32oddPseudo }; return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { - unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo, - ARM::VST2LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, + ARM::VST2LNd16Pseudo, + ARM::VST2LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, + ARM::VST2LNq32Pseudo }; return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst3lane: { - unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo, - ARM::VST3LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, + ARM::VST3LNd16Pseudo, + ARM::VST3LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, + ARM::VST3LNq32Pseudo }; return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); } case Intrinsic::arm_neon_vst4lane: { - unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo, - ARM::VST4LNd32Pseudo }; - unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo }; + static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, + ARM::VST4LNd16Pseudo, + ARM::VST4LNd32Pseudo }; + static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, + ARM::VST4LNq32Pseudo }; return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a103c94..c66618a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -52,6 +52,7 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); +STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); // This option should go away when tail calls fully work. static cl::opt @@ -89,76 +90,71 @@ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, - EVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, + MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } - EVT ElemTy = VT.getVectorElementType(); + MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); - } - setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); - setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { - setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::AND, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::OR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::XOR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. - setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); } -void ARMTargetLowering::addDRTypeForNEON(EVT VT) { - addRegisterClass(VT, ARM::DPRRegisterClass); +void ARMTargetLowering::addDRTypeForNEON(MVT VT) { + addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } -void ARMTargetLowering::addQRTypeForNEON(EVT VT) { - addRegisterClass(VT, ARM::QPRRegisterClass); +void ARMTargetLowering::addQRTypeForNEON(MVT VT) { + addRegisterClass(VT, &ARM::QPRRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } @@ -431,14 +427,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } if (Subtarget->isThumb1Only()) - addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); + addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else - addRegisterClass(MVT::i32, ARM::GPRRegisterClass); + addRegisterClass(MVT::i32, &ARM::GPRRegClass); if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { - addRegisterClass(MVT::f32, ARM::SPRRegisterClass); + addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) - addRegisterClass(MVT::f64, ARM::DPRRegisterClass); + addRegisterClass(MVT::f64, &ARM::DPRRegClass); setTruncStoreAction(MVT::f64, MVT::f32, Expand); } @@ -824,6 +820,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) benefitFromCodePlacementOpt = true; + // Prefer likely predicted branches to selects on out-of-order cores. + predictableSelectIsExpensive = Subtarget->isCortexA9(); + setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -849,7 +848,7 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{ // the cost is 1 for both f32 and f64. case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: - RRC = ARM::DPRRegisterClass; + RRC = &ARM::DPRRegClass; // When NEON is used for SP, only half of the register file is available // because operations that define both SP and DP results will be constrained // to the VFP2 class (D0-D15). We currently model this constraint prior to @@ -859,15 +858,15 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{ break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: - RRC = ARM::DPRRegisterClass; + RRC = &ARM::DPRRegClass; Cost = 2; break; case MVT::v4i64: - RRC = ARM::DPRRegisterClass; + RRC = &ARM::DPRRegClass; Cost = 4; break; case MVT::v8i64: - RRC = ARM::DPRRegisterClass; + RRC = &ARM::DPRRegClass; Cost = 8; break; } @@ -891,6 +890,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; + case ARMISD::CMN: return "ARMISD::CMN"; case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; @@ -1027,17 +1027,18 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // load / store 4 to 8 consecutive D registers. if (Subtarget->hasNEON()) { if (VT == MVT::v4i64) - return ARM::QQPRRegisterClass; - else if (VT == MVT::v8i64) - return ARM::QQQQPRRegisterClass; + return &ARM::QQPRRegClass; + if (VT == MVT::v8i64) + return &ARM::QQQQPRRegClass; } return TargetLowering::getRegClassFor(VT); } // Create a fast isel object. FastISel * -ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return ARM::createFastISel(funcInfo); +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return ARM::createFastISel(funcInfo, libInfo); } /// getMaximalGlobalOffset - Returns the maximal possible offset which can @@ -1166,6 +1167,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::GHC: + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } } @@ -1286,14 +1289,20 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. SDValue -ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool doesNotRet = CLI.DoesNotReturn; + bool isVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool IsSibCall = false; @@ -1415,21 +1424,22 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CCInfo.clearFirstByValReg(); } - unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); - SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, - StkPtrOff); - SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); - SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); - SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, - MVT::i32); - MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, - Flags.getByValAlign(), - /*isVolatile=*/false, - /*AlwaysInline=*/false, - MachinePointerInfo(0), - MachinePointerInfo(0))); - + if (Flags.getByValSize() - 4*offset > 0) { + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + StkPtrOff); + SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); + SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, + MVT::i32); + SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); + + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; + MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, + Ops, array_lengthof(Ops))); + } } else if (!IsSibCall) { assert(VA.isMemLoc()); @@ -2095,12 +2105,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? - std::pair CallResult = - LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), + TargetLowering::CallLoweringInfo CLI(Chain, + (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2108,7 +2119,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const { + SelectionDAG &DAG, + TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); DebugLoc dl = GA->getDebugLoc(); SDValue Offset; @@ -2117,7 +2129,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); - if (GV->isDeclaration()) { + if (model == TLSModel::InitialExec) { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); @@ -2142,6 +2154,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, false, false, false, 0); } else { // local exec model + assert(model == TLSModel::LocalExec); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); @@ -2162,12 +2175,18 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast(Op); - // If the relocation model is PIC, use the "General Dynamic" TLS Model, - // otherwise use the "Local Exec" TLS Model - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) - return LowerToTLSGeneralDynamicModel(GA, DAG); - else - return LowerToTLSExecModels(GA, DAG); + + TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: + return LowerToTLSGeneralDynamicModel(GA, DAG); + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModels(GA, DAG, model); + } + llvm_unreachable("bogus TLS model"); } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, @@ -2457,9 +2476,9 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) - RC = ARM::tGPRRegisterClass; + RC = &ARM::tGPRRegClass; else - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); @@ -2543,9 +2562,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) - RC = ARM::tGPRRegisterClass; + RC = &ARM::tGPRRegClass; else - RC = ARM::GPRRegisterClass; + RC = &ARM::GPRRegClass; unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); @@ -2627,14 +2646,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, const TargetRegisterClass *RC; if (RegVT == MVT::f32) - RC = ARM::SPRRegisterClass; + RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64) - RC = ARM::DPRRegisterClass; + RC = &ARM::DPRRegClass; else if (RegVT == MVT::v2f64) - RC = ARM::QPRRegisterClass; + RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) - RC = (AFI->isThumb1OnlyFunction() ? - ARM::tGPRRegisterClass : ARM::GPRRegisterClass); + RC = AFI->isThumb1OnlyFunction() ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -4249,6 +4269,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Record this extraction against the appropriate vector if possible... SDValue SourceVec = V.getOperand(0); + // If the element number isn't a constant, we can't effectively + // analyze what's going on. + if (!isa(V.getOperand(1))) + return SDValue(); unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); bool FoundSource = false; for (unsigned j = 0; j < SourceVecs.size(); ++j) { @@ -4791,7 +4815,9 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { for (unsigned i = 0; i != NumElts; ++i) { ConstantSDNode *C = cast(N->getOperand(i)); const APInt &CInt = C->getAPIntValue(); - Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); + // Element types smaller than 32 bits are not legal, so use i32 elements. + // The values are implicitly truncated so sext vs. zext doesn't matter. + Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); @@ -5252,14 +5278,14 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, bool isThumb2 = Subtarget->isThumb2(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned scratch = - MRI.createVirtualRegister(isThumb2 ? ARM::rGPRRegisterClass - : ARM::GPRRegisterClass); + unsigned scratch = MRI.createVirtualRegister(isThumb2 ? + (const TargetRegisterClass*)&ARM::rGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass); if (isThumb2) { - MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); - MRI.constrainRegClass(oldval, ARM::rGPRRegisterClass); - MRI.constrainRegClass(newval, ARM::rGPRRegisterClass); + MRI.constrainRegClass(dest, &ARM::rGPRRegClass); + MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); + MRI.constrainRegClass(newval, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; @@ -5362,8 +5388,8 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); if (isThumb2) { - MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); - MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + MRI.constrainRegClass(dest, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; @@ -5394,8 +5420,9 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - const TargetRegisterClass *TRC = - isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + const TargetRegisterClass *TRC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5469,8 +5496,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); if (isThumb2) { - MRI.constrainRegClass(dest, ARM::rGPRRegisterClass); - MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + MRI.constrainRegClass(dest, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc, extendOpc; @@ -5504,8 +5531,9 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - const TargetRegisterClass *TRC = - isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + const TargetRegisterClass *TRC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5531,7 +5559,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // Sign extend the value, if necessary. if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(ARM::GPRRegisterClass); + oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) .addReg(dest) .addImm(0)); @@ -5586,9 +5614,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); if (isThumb2) { - MRI.constrainRegClass(destlo, ARM::rGPRRegisterClass); - MRI.constrainRegClass(desthi, ARM::rGPRRegisterClass); - MRI.constrainRegClass(ptr, ARM::rGPRRegisterClass); + MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); + MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; @@ -5614,8 +5642,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - const TargetRegisterClass *TRC = - isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + const TargetRegisterClass *TRC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned storesuccess = MRI.createVirtualRegister(TRC); // thisMBB: @@ -5722,8 +5751,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); - const TargetRegisterClass *TRC = - isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + const TargetRegisterClass *TRC = isThumb ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = @@ -5827,8 +5857,9 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFrameInfo *MFI = MF->getFrameInfo(); int FI = MFI->getFunctionContextIndex(); - const TargetRegisterClass *TRC = - Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; + const TargetRegisterClass *TRC = Subtarget->isThumb() ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. @@ -6176,14 +6207,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; if (Subtarget->isThumb2() && - !ARM::tGPRRegisterClass->contains(Reg) && - !ARM::hGPRRegisterClass->contains(Reg)) + !ARM::tGPRRegClass.contains(Reg) && + !ARM::hGPRRegClass.contains(Reg)) continue; - else if (Subtarget->isThumb1Only() && - !ARM::tGPRRegisterClass->contains(Reg)) + if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) continue; - else if (!Subtarget->isThumb() && - !ARM::GPRRegisterClass->contains(Reg)) + if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) continue; if (!DefRegs[Reg]) MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); @@ -6214,6 +6243,304 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +MachineBasicBlock *ARMTargetLowering:: +EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { + // This pseudo instruction has 3 operands: dst, src, size + // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). + // Otherwise, we will generate unrolled scalar copies. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned src = MI->getOperand(1).getReg(); + unsigned SizeVal = MI->getOperand(2).getImm(); + unsigned Align = MI->getOperand(3).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned ldrOpc, strOpc, UnitSize = 0; + + const TargetRegisterClass *TRC = isThumb2 ? + (const TargetRegisterClass*)&ARM::tGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC_Vec = 0; + + if (Align & 1) { + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + UnitSize = 1; + } else if (Align & 2) { + ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; + strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; + UnitSize = 2; + } else { + // Check whether we can use NEON instructions. + if (!MF->getFunction()->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if ((Align % 16 == 0) && SizeVal >= 16) { + ldrOpc = ARM::VLD1q32wb_fixed; + strOpc = ARM::VST1q32wb_fixed; + UnitSize = 16; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; + } + else if ((Align % 8 == 0) && SizeVal >= 8) { + ldrOpc = ARM::VLD1d32wb_fixed; + strOpc = ARM::VST1d32wb_fixed; + UnitSize = 8; + TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; + } + } + // Can't use NEON instructions. + if (UnitSize == 0) { + ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + UnitSize = 4; + } + } + + unsigned BytesLeft = SizeVal % UnitSize; + unsigned LoopSize = SizeVal - BytesLeft; + + if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { + // Use LDR and STR to copy. + // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) + // [destOut] = STR_POST(scratch, destIn, UnitSize) + unsigned srcIn = src; + unsigned destIn = dest; + for (unsigned i = 0; i < LoopSize; i+=UnitSize) { + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(destIn).addImm(0).addReg(scratch)); + } else if (isThumb2) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addImm(UnitSize)); + } else { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc), scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) + .addImm(UnitSize)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(UnitSize)); + } + srcIn = srcOut; + destIn = destOut; + } + + // Handle the leftover bytes with LDRB and STRB. + // [scratch, srcOut] = LDRB_POST(srcIn, 1) + // [destOut] = STRB_POST(scratch, destIn, 1) + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + for (unsigned i = 0; i < BytesLeft; i++) { + unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } else { + AddDefaultPred(BuildMI(*BB, MI, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } + srcIn = srcOut; + destIn = destOut; + } + MI->eraseFromParent(); // The instruction is gone now. + return BB; + } + + // Expand the pseudo op to a loop. + // thisMBB: + // ... + // movw varEnd, # --> with thumb2 + // movt varEnd, # + // ldrcp varEnd, idx --> without thumb2 + // fallthrough --> loopMBB + // loopMBB: + // PHI varPhi, varEnd, varLoop + // PHI srcPhi, src, srcLoop + // PHI destPhi, dst, destLoop + // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) + // [destLoop] = STR_POST(scratch, destPhi, UnitSize) + // subs varLoop, varPhi, #UnitSize + // bne loopMBB + // fallthrough --> exitMBB + // exitMBB: + // epilogue to handle left-over bytes + // [scratch, srcOut] = LDRB_POST(srcLoop, 1) + // [destOut] = STRB_POST(scratch, destLoop, 1) + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Load an immediate to varEnd. + unsigned varEnd = MRI.createVirtualRegister(TRC); + if (isThumb2) { + unsigned VReg1 = varEnd; + if ((LoopSize & 0xFFFF0000) != 0) + VReg1 = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) + .addImm(LoopSize & 0xFFFF)); + + if ((LoopSize & 0xFFFF0000) != 0) + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) + .addReg(VReg1) + .addImm(LoopSize >> 16)); + } else { + MachineConstantPool *ConstantPool = MF->getConstantPool(); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + const Constant *C = ConstantInt::get(Int32Ty, LoopSize); + + // MachineConstantPool wants an explicit alignment. + unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + if (Align == 0) + Align = getTargetData()->getTypeAllocSize(C->getType()); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); + + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) + .addReg(varEnd, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0)); + } + BB->addSuccessor(loopMBB); + + // Generate the loop body: + // varPhi = PHI(varLoop, varEnd) + // srcPhi = PHI(srcLoop, src) + // destPhi = PHI(destLoop, dst) + MachineBasicBlock *entryBB = BB; + BB = loopMBB; + unsigned varLoop = MRI.createVirtualRegister(TRC); + unsigned varPhi = MRI.createVirtualRegister(TRC); + unsigned srcLoop = MRI.createVirtualRegister(TRC); + unsigned srcPhi = MRI.createVirtualRegister(TRC); + unsigned destLoop = MRI.createVirtualRegister(TRC); + unsigned destPhi = MRI.createVirtualRegister(TRC); + + BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) + .addReg(varLoop).addMBB(loopMBB) + .addReg(varEnd).addMBB(entryBB); + BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) + .addReg(srcLoop).addMBB(loopMBB) + .addReg(src).addMBB(entryBB); + BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) + .addReg(destLoop).addMBB(loopMBB) + .addReg(dest).addMBB(entryBB); + + // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) + // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) + unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); + if (UnitSize >= 8) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(destPhi).addImm(0).addReg(scratch)); + } else if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(scratch).addReg(destPhi) + .addImm(UnitSize)); + } else { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) + .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) + .addImm(UnitSize)); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) + .addReg(scratch).addReg(destPhi) + .addReg(0).addImm(UnitSize)); + } + + // Decrement loop variable by UnitSize. + MachineInstrBuilder MIB = BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); + AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB->getOperand(5).setReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + // loopMBB can loop back to loopMBB or fall through to exitMBB. + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // Add epilogue to handle BytesLeft. + BB = exitMBB; + MachineInstr *StartOfExit = exitMBB->begin(); + ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; + strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; + + // [scratch, srcOut] = LDRB_POST(srcLoop, 1) + // [destOut] = STRB_POST(scratch, destLoop, 1) + unsigned srcIn = srcLoop; + unsigned destIn = destLoop; + for (unsigned i = 0; i < BytesLeft; i++) { + unsigned scratch = MRI.createVirtualRegister(TRC); + unsigned srcOut = MRI.createVirtualRegister(TRC); + unsigned destOut = MRI.createVirtualRegister(TRC); + if (isThumb2) { + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); + + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addImm(1)); + } else { + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, + TII->get(ldrOpc),scratch) + .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); + + AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) + .addReg(scratch).addReg(destIn) + .addReg(0).addImm(1)); + } + srcIn = srcOut; + destIn = destOut; + } + + MI->eraseFromParent(); // The instruction is gone now. + return BB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -6517,10 +6844,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewMovDstReg = MRI.createVirtualRegister( - isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); - unsigned NewRsbDstReg = MRI.createVirtualRegister( - isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? + (const TargetRegisterClass*)&ARM::rGPRRegClass : + (const TargetRegisterClass*)&ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, @@ -6534,12 +6860,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // fall through to SinkMBB RSBBB->addSuccessor(SinkBB); - // insert a movs at the end of BB - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), - NewMovDstReg) - .addReg(ABSSrcReg, RegState::Kill) - .addImm((unsigned)ARMCC::AL).addReg(0) - .addReg(ARM::CPSR, RegState::Define); + // insert a cmp at the end of BB + AddDefaultPred(BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(ABSSrcReg).addImm(0)); // insert a bcc with opposite CC to ARMCC::MI at the end of BB BuildMI(BB, dl, @@ -6551,7 +6875,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // by if-conversion pass BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) - .addReg(NewMovDstReg, RegState::Kill) + .addReg(ABSSrcReg, RegState::Kill) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); // insert PHI in SinkBB, @@ -6559,7 +6883,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) .addReg(NewRsbDstReg).addMBB(RSBBB) - .addReg(NewMovDstReg).addMBB(BB); + .addReg(ABSSrcReg).addMBB(BB); // remove ABS instruction MI->eraseFromParent(); @@ -6567,6 +6891,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // return last added BB return SinkBB; } + case ARM::COPY_STRUCT_BYVAL_I32: + ++NumLoopByVals; + return EmitStructByval(MI, BB); } } @@ -7095,8 +7422,12 @@ static SDValue PerformORCombine(SDNode *N, return COR; } + + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); SDValue N1 = N->getOperand(1); @@ -7353,7 +7684,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (St->isVolatile()) return SDValue(); - // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // Optimize trunc store (of multiple scalars) to shuffle and store. First, // pack all of the elements in one place. Next, store to memory in fewer // chunks. SDValue StVal = St->getValue(); @@ -8721,12 +9052,19 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return Imm >= 0 && Imm <= 255; } -/// isLegalAddImmediate - Return true if the specified immediate is legal -/// add immediate, that is the target has add instructions which can add -/// a register with the immediate without having to materialize the +/// isLegalAddImmediate - Return true if the specified immediate is a legal add +/// *or sub* immediate, that is the target has add or sub instructions which can +/// add a register with the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { - return ARM_AM::getSOImmVal(Imm) != -1; + // Same encoding for add/sub, just flip the sign. + int64_t AbsImm = llvm::abs64(Imm); + if (!Subtarget->isThumb()) + return ARM_AM::getSOImmVal(AbsImm) != -1; + if (Subtarget->isThumb2()) + return ARM_AM::getT2SOImmVal(AbsImm) != -1; + // Thumb1 only has 8-bit unsigned immediate. + return AbsImm >= 0 && AbsImm <= 255; } static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, @@ -9030,39 +9368,38 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { case 'l': // Low regs or general regs. if (Subtarget->isThumb()) - return RCPair(0U, ARM::tGPRRegisterClass); - else - return RCPair(0U, ARM::GPRRegisterClass); + return RCPair(0U, &ARM::tGPRRegClass); + return RCPair(0U, &ARM::GPRRegClass); case 'h': // High regs or no regs. if (Subtarget->isThumb()) - return RCPair(0U, ARM::hGPRRegisterClass); + return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': - return RCPair(0U, ARM::GPRRegisterClass); + return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::f32) - return RCPair(0U, ARM::SPRRegisterClass); + return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) - return RCPair(0U, ARM::DPRRegisterClass); + return RCPair(0U, &ARM::DPRRegClass); if (VT.getSizeInBits() == 128) - return RCPair(0U, ARM::QPRRegisterClass); + return RCPair(0U, &ARM::QPRRegClass); break; case 'x': if (VT == MVT::f32) - return RCPair(0U, ARM::SPR_8RegisterClass); + return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) - return RCPair(0U, ARM::DPR_8RegisterClass); + return RCPair(0U, &ARM::DPR_8RegClass); if (VT.getSizeInBits() == 128) - return RCPair(0U, ARM::QPR_8RegisterClass); + return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': if (VT == MVT::f32) - return RCPair(0U, ARM::SPRRegisterClass); + return RCPair(0U, &ARM::SPRRegClass); break; } } if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); + return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 352d980..51d1205 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -41,6 +41,9 @@ namespace llvm { // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable + // Add pseudo op to model memcpy for struct byval. + COPY_STRUCT_BYVAL, + CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. @@ -53,6 +56,7 @@ namespace llvm { PIC_ADD, // Add with a PC operand and a PIC label. CMP, // ARM compare instructions. + CMN, // ARM CMN instructions. CMPZ, // ARM compare that sets only Z flag. CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. @@ -357,7 +361,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; Sched::Preference getSchedulingPreference(SDNode *N) const; @@ -389,9 +394,9 @@ namespace llvm { /// unsigned ARMPCLabelIndex; - void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT); - void addDRTypeForNEON(EVT VT); - void addQRTypeForNEON(EVT VT); + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); + void addDRTypeForNEON(MVT VT); + void addQRTypeForNEON(MVT VT); typedef SmallVector, 8> RegsToPassVector; void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -422,7 +427,8 @@ namespace llvm { SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const; SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) const; + SelectionDAG &DAG, + TLSModel::Model model) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -462,13 +468,7 @@ namespace llvm { unsigned &VARegSize, unsigned &VARegSaveSize) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; /// HandleByVal - Target-specific cleanup for ByVal support. @@ -532,6 +532,9 @@ namespace llvm { MachineBasicBlock *MBB) const; bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitStructByval(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { @@ -542,7 +545,8 @@ namespace llvm { namespace ARM { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index f04926a..c8966fb 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -827,6 +827,8 @@ class AExtI opcod, dag oops, dag iops, InstrItinClass itin, let Inst{7-4} = 0b0111; let Inst{9-8} = 0b00; let Inst{27-20} = opcod; + + let Unpredictable{9-8} = 0b11; } // Misc Arithmetic instructions. @@ -1862,7 +1864,6 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, string opc, string dt, string asm, string cstr, list pattern> : N3VCommon { - // Instruction operands. bits<5> Vd; bits<5> Vn; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index b8f607e..31b0c41 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -31,7 +31,8 @@ ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) /// getNoopForMachoTarget - Return the noop instruction to use for a noop. void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { if (hasNOP()) { - NopInst.setOpcode(ARM::NOP); + NopInst.setOpcode(ARM::HINT); + NopInst.addOperand(MCOperand::CreateImm(0)); NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); NopInst.addOperand(MCOperand::CreateReg(0)); } else { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1eb561d..6340a58 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -18,6 +18,9 @@ // Type profiles. def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; +def SDT_ARMStructByVal : SDTypeProfile<0, 4, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; @@ -90,6 +93,10 @@ def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , + SDT_ARMStructByVal, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPMayStore, SDNPMayLoad]>; def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, @@ -121,6 +128,9 @@ def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutGlue]>; +def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp, + [SDNPOutGlue]>; + def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPOutGlue, SDNPCommutative]>; @@ -161,53 +171,59 @@ def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; // ARM Instruction Predicate Definitions. // def HasV4T : Predicate<"Subtarget->hasV4TOps()">, - AssemblerPredicate<"HasV4TOps">; + AssemblerPredicate<"HasV4TOps", "armv4t">; def NoV4T : Predicate<"!Subtarget->hasV4TOps()">; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, - AssemblerPredicate<"HasV5TEOps">; + AssemblerPredicate<"HasV5TEOps", "armv5te">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">, - AssemblerPredicate<"HasV6Ops">; + AssemblerPredicate<"HasV6Ops", "armv6">; def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, - AssemblerPredicate<"HasV6T2Ops">; + AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, - AssemblerPredicate<"HasV7Ops">; + AssemblerPredicate<"HasV7Ops", "armv7">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, - AssemblerPredicate<"FeatureVFP2">; + AssemblerPredicate<"FeatureVFP2", "VFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, - AssemblerPredicate<"FeatureVFP3">; + AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, - AssemblerPredicate<"FeatureVFP4">; + AssemblerPredicate<"FeatureVFP4", "VFP4">; def HasNEON : Predicate<"Subtarget->hasNEON()">, - AssemblerPredicate<"FeatureNEON">; + AssemblerPredicate<"FeatureNEON", "NEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, - AssemblerPredicate<"FeatureFP16">; + AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv">; + AssemblerPredicate<"FeatureHWDiv", "divide">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, - AssemblerPredicate<"FeatureT2XtPk">; + AssemblerPredicate<"FeatureT2XtPk", + "pack/extract">; def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">, - AssemblerPredicate<"FeatureDSPThumb2">; + AssemblerPredicate<"FeatureDSPThumb2", + "thumb2-dsp">; def HasDB : Predicate<"Subtarget->hasDataBarrier()">, - AssemblerPredicate<"FeatureDB">; + AssemblerPredicate<"FeatureDB", + "data-barriers">; def HasMP : Predicate<"Subtarget->hasMPExtension()">, - AssemblerPredicate<"FeatureMP">; + AssemblerPredicate<"FeatureMP", + "mp-extensions">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">, - AssemblerPredicate<"ModeThumb">; + AssemblerPredicate<"ModeThumb", "thumb">; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; def IsThumb2 : Predicate<"Subtarget->isThumb2()">, - AssemblerPredicate<"ModeThumb,FeatureThumb2">; + AssemblerPredicate<"ModeThumb,FeatureThumb2", + "thumb2">; def IsMClass : Predicate<"Subtarget->isMClass()">, - AssemblerPredicate<"FeatureMClass">; + AssemblerPredicate<"FeatureMClass", "armv7m">; def IsARClass : Predicate<"!Subtarget->isMClass()">, - AssemblerPredicate<"!FeatureMClass">; + AssemblerPredicate<"!FeatureMClass", + "armv7a/r">; def IsARM : Predicate<"!Subtarget->isThumb()">, - AssemblerPredicate<"!ModeThumb">; + AssemblerPredicate<"!ModeThumb", "arm-mode">; def IsIOS : Predicate<"Subtarget->isTargetIOS()">; def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -220,7 +236,8 @@ def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. // But only select them if more precision in FP computation is allowed. // Do not use them for Darwin platforms. -def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; @@ -236,9 +253,9 @@ class RegConstraint { // ARM specific transformation functions and pattern fragments. // -// so_imm_neg_XFORM - Return a so_imm value packed into the format described for -// so_imm_neg def below. -def so_imm_neg_XFORM : SDNodeXFormgetTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; @@ -257,7 +274,7 @@ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand, PatLeaf<(imm), [{ int64_t Value = -(int)N->getZExtValue(); return Value && ARM_AM::getSOImmVal(Value) != -1; - }], so_imm_neg_XFORM> { + }], imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -399,8 +416,11 @@ def pclabel : Operand { } // ADR instruction labels. +def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand { let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrLabelAsmOperand; + let PrintMethod = "printAdrLabelOperand"; } def neon_vcvt_imm32 : Operand { @@ -570,7 +590,10 @@ def imm1_31 : Operand, ImmLeaf 0 && Imm < 32; }]> { } /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } +def Imm0_15AsmOperand: ImmAsmOperand { + let Name = "Imm0_15"; + let DiagnosticType = "ImmRange0_15"; +} def imm0_15 : Operand, ImmLeaf= 0 && Imm < 16; }]> { @@ -615,6 +638,11 @@ def imm0_65535 : Operand, ImmLeaf, ImmLeaf= 0 && -Imm < 65536; +}]>; + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // @@ -940,9 +968,10 @@ include "ARMInstrFormats.td" /// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a /// binop that produces a value. +let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1003,38 +1032,15 @@ multiclass AsI1_bin_irs opcod, string opc, let Inst{4} = 1; let Inst{3-0} = shift{3-0}; } - - // Assembly aliases for optional destination operand when it's the same - // as the source operand. - def : InstAlias(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, - so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, - GPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, - so_reg_imm:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, - so_reg_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - } /// AsI1_rbin_irs - Same as AsI1_bin_irs except the order of operands are /// reversed. The 'rr' form is only defined for the disassembler; for codegen /// it is equivalent to the AsI1_bin_irs counterpart. +let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_rbin_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1094,30 +1100,6 @@ multiclass AsI1_rbin_irs opcod, string opc, let Inst{4} = 1; let Inst{3-0} = shift{3-0}; } - - // Assembly aliases for optional destination operand when it's the same - // as the source operand. - def : InstAlias(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, - so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, - GPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, - so_reg_imm:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, - so_reg_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - } /// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. @@ -1304,8 +1286,9 @@ class AI_exta_rrot_np opcod, string opc> } /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. +let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, - string baseOpc, bit Commutable = 0> { + bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1 opcod, string opc, PatFrag opnode, def rsr : AsI1, + [(set GPRnopc:$Rd, CPSR, + (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -1366,34 +1350,11 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{3-0} = shift{3-0}; } } - - // Assembly aliases for optional destination operand when it's the same - // as the source operand. - def : InstAlias(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, - so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, - GPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, - so_reg_imm:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn, - so_reg_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; } /// AI1_rsc_irs - Define instructions and patterns for rsc -multiclass AI1_rsc_irs opcod, string opc, PatFrag opnode, - string baseOpc> { +let TwoOperandAliasConstraint = "$Rn = $Rd" in +multiclass AI1_rsc_irs opcod, string opc, PatFrag opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1 opcod, string opc, PatFrag opnode, let Inst{3-0} = shift{3-0}; } } - - // Assembly aliases for optional destination operand when it's the same - // as the source operand. - def : InstAlias(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn, - so_imm:$imm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn, - GPR:$Rm, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsi")) GPR:$Rdn, GPR:$Rdn, - so_reg_imm:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; - def : InstAlias(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, - so_reg_reg:$shift, pred:$p, - cc_out:$s)>, - Requires<[IsARM]>; } let canFoldAsLoad = 1, isReMaterializable = 1 in { @@ -1511,9 +1449,10 @@ multiclass AI_ldr1nopc { + [(set GPRnopc:$Rt, (opnode addrmode_imm12:$addr))]> { bits<4> Rt; bits<17> addr; let Inst{23} = addr{12}; // U (add = ('U' == 1)) @@ -1521,9 +1460,10 @@ multiclass AI_ldr1nopc { + def rs : AI2ldst<0b011, 1, isByte, (outs GPRnopc:$Rt), + (ins ldst_so_reg:$shift), + AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift", + [(set GPRnopc:$Rt, (opnode ldst_so_reg:$shift))]> { bits<4> Rt; bits<17> shift; let shift{4} = 0; // Inst{4} = 0 @@ -1581,9 +1521,10 @@ multiclass AI_str1nopc { + def rs : AI2ldst<0b011, 0, isByte, (outs), + (ins GPRnopc:$Rt, ldst_so_reg:$shift), + AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift", + [(opnode GPRnopc:$Rt, ldst_so_reg:$shift)]> { bits<4> Rt; bits<17> shift; let shift{4} = 0; // Inst{4} = 0 @@ -1655,33 +1596,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), NoItinerary, []>; } -def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000000; +def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, + "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; } -def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000001; -} - -def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000010; -} - -def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "", []>, - Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000011; -} +def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1694,16 +1620,10 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", let Inst{27-20} = 0b01101000; let Inst{7-4} = 0b1011; let Inst{11-8} = 0b1111; + let Unpredictable{11-8} = 0b1111; } -def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "", - []>, Requires<[IsARM, HasV6T2]> { - let Inst{27-16} = 0b001100100000; - let Inst{15-8} = 0b11110000; - let Inst{7-0} = 0b00000100; -} - -// The i32imm operand $val can be used by a debugger to store more information +// The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, "bkpt", "\t$val", []>, Requires<[IsARM]> { @@ -1922,7 +1842,7 @@ let isCall = 1, // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. Defs = [LR], Uses = [SP] in { - def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), + def BL : ABXI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM]> { @@ -1932,7 +1852,7 @@ let isCall = 1, let DecoderMethod = "DecodeBranchImmInstruction"; } - def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), + def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsARM]> { @@ -1942,7 +1862,7 @@ let isCall = 1, } // ARMv5T and above - def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> { @@ -1951,7 +1871,7 @@ let isCall = 1, let Inst{3-0} = func; } - def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, Requires<[IsARM, HasV5T]> { @@ -1962,19 +1882,18 @@ let isCall = 1, // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, HasV4T]>; // ARMv4 - def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), + def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, NoV4T]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. - def BMOVPCB_CALL : ARMPseudoInst<(outs), - (ins bl_target:$func, variable_ops), + def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsARM]>; } @@ -2044,18 +1963,16 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>; + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>; - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>; - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, Requires<[IsARM]>; - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (BX GPR:$dst)>, Requires<[IsARM]>; @@ -2509,6 +2426,7 @@ multiclass AI2_stridx; multiclass arm_ldst_mult { // IA is the default, so no need for an explicit suffix on the - // mnemonic here. Without it is the cannonical spelling. + // mnemonic here. Without it is the canonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, @@ -2900,9 +2818,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } -def : ARMInstAlias<"movs${p} $Rd, $Rm", - (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; - // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -3113,10 +3028,10 @@ def UBFX : I<(outs GPR:$Rd), defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>; + BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. // @@ -3134,15 +3049,13 @@ defm SUBS : AsI1_bin_s_irs>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, - "ADC", 1>; + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "SBC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; -defm RSB : AsI1_rbin_irs <0b0011, "rsb", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">; +defm RSB : AsI1_rbin_irs<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. @@ -3150,8 +3063,7 @@ defm RSBS : AsI1_rbin_s_is>; defm RSC : AI1_rsc_irs<0b0111, "rsc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "RSC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -3163,6 +3075,11 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm), def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), + (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; +def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), + (SUBSrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>; + // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. @@ -3190,7 +3107,7 @@ class AAI op27_20, bits<8> op11_4, string opc, let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{3-0} = Rm; - + let Unpredictable{11-8} = 0b1111; } @@ -3355,16 +3272,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), defm AND : AsI1_bin_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; // FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just // like in the actual instruction encoding. The complexity of mapping the mask @@ -3482,27 +3399,28 @@ class AsMul1I64 opcod, dag oops, dag iops, InstrItinClass itin, // FIXME: The v5 pseudos are only necessary for the additional Constraint // property. Remove them when it's possible to add those properties -// on an individual MachineInstr, not just an instuction description. -let isCommutable = 1 in { -def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]> { +// on an individual MachineInstr, not just an instruction description. +let isCommutable = 1, TwoOperandAliasConstraint = "$Rn = $Rd" in { +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, GPRnopc:$Rm), + IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; let Unpredictable{15-12} = 0b1111; } let Constraints = "@earlyclobber $Rd" in def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, - pred:$p, cc_out:$s), - 4, IIC_iMUL32, - [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], - (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + pred:$p, cc_out:$s), + 4, IIC_iMUL32, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; } def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", + IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, Requires<[IsARM, HasV6]> { bits<4> Ra; @@ -3511,8 +3429,8 @@ def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), let Constraints = "@earlyclobber $Rd" in def MLAv5: ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - 4, IIC_iMAC32, + (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), + 4, IIC_iMAC32, [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))], (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; @@ -3630,8 +3548,7 @@ def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), - IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", - [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>, + IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV6]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), @@ -3912,49 +3829,85 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), (CMPrsr GPR:$src, so_reg_reg:$rhs)>; -// FIXME: We have to be careful when using the CMN instruction and comparison -// with 0. One would expect these two pieces of code should give identical -// results: -// -// rsbs r1, r1, 0 -// cmp r0, r1 -// mov r0, #0 -// it ls -// mov r0, #1 -// -// and: -// -// cmn r0, r1 -// mov r0, #0 -// it ls -// mov r0, #1 -// -// However, the CMN gives the *opposite* result when r1 is 0. This is because -// the carry flag is set in the CMP case but not in the CMN case. In short, the -// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the -// value of r0 and the carry bit (because the "carry bit" parameter to -// AddWithCarry is defined as 1 in this case, the carry flag will always be set -// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is -// never a "carry" when this AddWithCarry is performed (because the "carry bit" -// parameter to AddWithCarry is defined as 0). -// -// When x is 0 and unsigned: -// -// x = 0 -// ~x = 0xFFFF FFFF -// ~x + 1 = 0x1 0000 0000 -// (-x = 0) != (0x1 0000 0000 = ~x + 1) -// -// Therefore, we should disable CMN when comparing against zero, until we can -// limit when the CMN instruction is used (when we know that the RHS is not 0 or -// when it's a comparison which doesn't look at the 'carry' flag). -// -// (See the ARM docs for the "AddWithCarry" pseudo-code.) -// -// This is related to . -// -//defm CMN : AI1_cmp_irs<0b1011, "cmn", -// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; +// CMN register-integer +let isCompare = 1, Defs = [CPSR] in { +def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, + "cmn", "\t$Rn, $imm", + [(ARMcmn GPR:$Rn, so_imm:$imm)]> { + bits<4> Rn; + bits<12> imm; + let Inst{25} = 1; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; +} + +// CMN register-register/shift +def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, + "cmn", "\t$Rn, $Rm", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, GPR:$Rm)]> { + bits<4> Rn; + bits<4> Rm; + let isCommutable = 1; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rm; + + let Unpredictable{15-12} = 0b1111; +} + +def CMNzrsi : AI1<0b1011, (outs), + (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, + "cmn", "\t$Rn, $shift", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, so_reg_imm:$shift)]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-5} = shift{11-5}; + let Inst{4} = 0; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; +} + +def CMNzrsr : AI1<0b1011, (outs), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, + "cmn", "\t$Rn, $shift", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, so_reg_reg:$shift)]> { + bits<4> Rn; + bits<12> shift; + let Inst{25} = 0; + let Inst{20} = 1; + let Inst{19-16} = Rn; + let Inst{15-12} = 0b0000; + let Inst{11-8} = shift{11-8}; + let Inst{7} = 0; + let Inst{6-5} = shift{6-5}; + let Inst{4} = 1; + let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; +} + +} + +def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; + +def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), + (CMNri GPR:$src, so_imm_neg:$imm)>; // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", @@ -3964,16 +3917,6 @@ defm TEQ : AI1_cmp_irs<0b1001, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; -defm CMNz : AI1_cmp_irs<0b1011, "cmn", - IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, - BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; - -//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), -// (CMNri GPR:$src, so_imm_neg:$imm)>; - -def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), - (CMNzri GPR:$src, so_imm_neg:$imm)>; - // Pseudo i64 compares for some floating point compares. let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, Defs = [CPSR] in { @@ -4121,11 +4064,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in { -def ABS : ARMPseudoInst< - (outs GPR:$dst), (ins GPR:$src), - 8, NoItinerary, []>; -} +let usesCustomInserter = 1, Defs = [CPSR] in +def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; let usesCustomInserter = 1 in { let Defs = [CPSR] in { @@ -4242,6 +4182,13 @@ let usesCustomInserter = 1 in { } } +let usesCustomInserter = 1 in { + def COPY_STRUCT_BYVAL_I32 : PseudoInst< + (outs), (ins GPR:$dst, GPR:$src, i32imm:$size, i32imm:$alignment), + NoItinerary, + [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; +} + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, @@ -4280,10 +4227,10 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { -def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), - "swp", []>; -def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), - "swpb", []>; +def SWP : AIswp<0, (outs GPRnopc:$Rt), + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; +def SWPB: AIswp<1, (outs GPRnopc:$Rt), + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; } //===----------------------------------------------------------------------===// @@ -4609,8 +4556,8 @@ class MovRRCopro pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, - imm:$CRm)]>; + [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2 pattern = []> @@ -4637,8 +4584,8 @@ class MovRRCopro2 pattern = []> } def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, - imm:$CRm)]>; + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, + GPRnopc:$Rt2, imm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; //===----------------------------------------------------------------------===// @@ -4658,7 +4605,8 @@ def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, let Unpredictable{11-0} = 0b110100001111; } -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>; +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, + Requires<[IsARM]>; // The MRSsys instruction is the MRS instruction from the ARM ARM, // section B9.3.9, with the R bit set to 1. @@ -5114,7 +5062,7 @@ def : ARMInstAlias<"add${s}${p} $Rd, $imm", (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via so_imm_neg def : ARMInstAlias<"cmp${p} $Rd, $imm", - (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; def : ARMInstAlias<"cmn${p} $Rd, $imm", (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; @@ -5123,6 +5071,7 @@ def : ARMInstAlias<"cmn${p} $Rd, $imm", // FIXME: We need C++ parser hooks to map the alias to the MOV // encoding. It seems we should be able to do that sort of thing // in tblgen, but it could get ugly. +let TwoOperandAliasConstraint = "$Rm = $Rd" in { def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; @@ -5135,8 +5084,10 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +} def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +let TwoOperandAliasConstraint = "$Rn = $Rd" in { def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>; @@ -5149,32 +5100,7 @@ def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>; -// shifter instructions also support a two-operand form. -def : ARMInstAlias<"asr${s}${p} $Rm, $imm", - (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"lsr${s}${p} $Rm, $imm", - (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"lsl${s}${p} $Rm, $imm", - (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"ror${s}${p} $Rm, $imm", - (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; -def : ARMInstAlias<"asr${s}${p} $Rn, $Rm", - (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm", - (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm", - (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; -def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", - (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, - cc_out:$s)>; - - -// 'mul' instruction can be specified with only two operands. -def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", - (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>; +} // "neg" is and alias for "rsb rd, rn, #0" def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index fd8ac0b..3134088 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1962,7 +1962,7 @@ def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; @@ -2300,14 +2300,14 @@ class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; @@ -2325,7 +2325,7 @@ class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyD, ValueType TyQ, Intrinsic IntOp> + ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> : N2V; @@ -2343,7 +2343,7 @@ class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N2V; @@ -2368,6 +2368,8 @@ class N3VD op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } // Same as N3VD but no data type. @@ -2379,6 +2381,8 @@ class N3VDX op21_20, bits<4> op11_8, bit op4, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2391,6 +2395,8 @@ class N3VDSL op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, @@ -2401,6 +2407,8 @@ class N3VDSL16 op21_20, bits<4> op11_8, [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } @@ -2411,6 +2419,8 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQX op21_20, bits<4> op11_8, bit op4, @@ -2420,6 +2430,8 @@ class N3VQX op21_20, bits<4> op11_8, bit op4, (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, OpcodeStr, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, @@ -2432,6 +2444,8 @@ class N3VQSL op21_20, bits<4> op11_8, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, @@ -2443,21 +2457,25 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_8:$Vm), imm:$lane)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = 0; } // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2468,7 +2486,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2479,26 +2497,29 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } class N3VQInt op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2510,7 +2531,7 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", @@ -2522,11 +2543,12 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQIntSh op21_20, bits<4> op11_8, bit op4, Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V { + let TwoOperandAliasConstraint = "$Vm = $Vd"; let isCommutable = 0; } @@ -2606,7 +2628,7 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Neon Intrinsic-Op instructions (VABA): double- and quad-register. class N3VDIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; class N3VQIntOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp, SDNode OpNode> + ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, // Long Intrinsic-Op vector operations with explicit extend (VABAL). class N3VLIntExtOp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> : N3V op21_20, bits<4> op11_8, bit op4, // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> : N3V op21_20, bits<4> op11_8, bit op4, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, - Intrinsic IntOp, bit Commutable> + SDPatternOperator IntOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics with explicit extend (VABDL). class N3VLIntExt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp, + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> + ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> : N3V op21_20, bits<4> op11_8, bit op4, } class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane32 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]>; class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N3VLane16 op21_20, bits<4> op11_8, bit op4, OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), (TyQ (ExtOp (TyD DPR:$Vm)))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } @@ -2837,14 +2861,14 @@ class N3VW op21_20, bits<4> op11_8, bit op4, class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V; @@ -2855,7 +2879,7 @@ class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2V op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDSh op11_8, bit op7, bit op4, Format f, InstrItinClass itin, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> @@ -2885,6 +2910,7 @@ class N2VQSh op11_8, bit op7, bit op4, (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; +} // Long shift by immediate. class N2VLSh op11_8, bit op7, bit op6, bit op4, @@ -2908,6 +2934,7 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShAdd op11_8, bit op7, bit op4, Operand ImmTy, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> @@ -2924,9 +2951,11 @@ class N2VQShAdd op11_8, bit op7, bit op4, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (add QPR:$src1, (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; +} // Shift by immediate and insert, // both double- and quad-register. +let TwoOperandAliasConstraint = "$Vm = $Vd" in { class N2VDShIns op11_8, bit op7, bit op4, Operand ImmTy, Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> @@ -2941,19 +2970,20 @@ class N2VQShIns op11_8, bit op7, bit op4, (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; +} // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + SDPatternOperator IntOp> : N2VImm op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; @@ -3064,7 +3094,7 @@ multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v8i8 : N2VNInt; @@ -3152,7 +3182,7 @@ multiclass N3VInt_HS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { // 64-bit vector types. def v4i16 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { // 64-bit vector types. def v4i16 : N3VDIntSh op11_8, bit op4, Format f, multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, @@ -3210,7 +3240,7 @@ multiclass N3VInt_QHS op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_HS { def v8i8 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_HSSh { def v8i8 : N3VDIntSh op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VInt_QHS { def v1i64 : N3VDInt op11_8, bit op4, Format f, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - Intrinsic IntOp> + SDPatternOperator IntOp> : N3VInt_QHSSh { def v1i64 : N3VDIntSh op11_8, bit op4, Format f, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v8i8 : N3VNInt; @@ -3330,7 +3360,7 @@ multiclass N3VLExt_QHS op11_8, bit op4, multiclass N3VLInt_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { + SDPatternOperator IntOp, bit Commutable = 0> { def v4i32 : N3VLInt; @@ -3341,7 +3371,7 @@ multiclass N3VLInt_HS op11_8, bit op4, multiclass N3VLIntSL_HS op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp> { + SDPatternOperator IntOp> { def v4i16 : N3VLIntSL16; def v2i32 : N3VLIntSL op11_8, multiclass N3VLInt_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> + SDPatternOperator IntOp, bit Commutable = 0> : N3VLInt_HS { def v8i16 : N3VLInt op11_8, bit op4, // ....with explicit extend (VABDL). multiclass N3VLIntExt_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> { + SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { def v8i16 : N3VLIntExt; @@ -3436,7 +3466,7 @@ multiclass N3VMulOpSL_HS op11_8, // element sizes of 8, 16 and 32 bits: multiclass N3VIntOp_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp, + string OpcodeStr, string Dt, SDPatternOperator IntOp, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDIntOp op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3; @@ -3506,7 +3536,7 @@ multiclass N3VLMulOpSL_HS op11_8, string OpcodeStr, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i32 : N3VLInt3; def v2i64 : N3VLInt3 op11_8, bit op4, } multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { def v4i16 : N3VLInt3SL16; def v2i32 : N3VLInt3SL op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, InstrItinClass itin16, InstrItinClass itin32, - string OpcodeStr, string Dt, Intrinsic IntOp> + string OpcodeStr, string Dt, SDPatternOperator IntOp> : N3VLInt3_HS { def v8i16 : N3VLInt3; @@ -3533,7 +3563,7 @@ multiclass N3VLInt3_QHS op11_8, bit op4, // ....with explicit extend (VABAL). multiclass N3VLIntExtOp_QHS op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> { + SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { def v8i16 : N3VLIntExtOp; @@ -3550,7 +3580,7 @@ multiclass N3VLIntExtOp_QHS op11_8, bit op4, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt; @@ -3573,7 +3603,7 @@ multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, string Dt, Intrinsic IntOp> { + string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2; @@ -3668,33 +3698,6 @@ multiclass N2VShR_QHSD op11_8, bit op4, def v2i64 : N2VQSh; // imm6 = xxxxxx - - // Aliases for two-operand forms (source and dest regs the same). - def : NEONInstAlias(!strconcat(baseOpc, "v8i8")) - DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v4i16")) - DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v2i32")) - DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v1i64")) - DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - - def : NEONInstAlias(!strconcat(baseOpc, "v16i8")) - QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v8i16")) - QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v4i32")) - QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; - def : NEONInstAlias(!strconcat(baseOpc, "v2i64")) - QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; } // Neon Shift-Accumulate vector operations, @@ -4133,16 +4136,16 @@ def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", Requires<[HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), +def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, Requires<[HasVFP4]>; -def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), +def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; -def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), +def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, Requires<[HasVFP4]>; -def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), +def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; @@ -4305,6 +4308,7 @@ def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, // VBIC : Vector Bitwise Bit Clear (AND NOT) +let TwoOperandAliasConstraint = "$Vn = $Vd" in { def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbic", "$Vd, $Vn, $Vm", "", @@ -4315,6 +4319,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), "vbic", "$Vd, $Vn, $Vm", "", [(set QPR:$Vd, (v4i32 (and QPR:$Vn, (vnotq QPR:$Vm))))]>; +} def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), @@ -4820,14 +4825,14 @@ defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", - int_arm_neon_vclz>; + ctlz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiD, "vcnt", "8", - v8i8, v8i8, int_arm_neon_vcnt>; + v8i8, v8i8, ctpop>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", - v16i8, v16i8, int_arm_neon_vcnt>; + v16i8, v16i8, ctpop>; // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, @@ -5308,6 +5313,9 @@ def : AlignedVEXTq; // VEXT : Vector Extract + +// All of these have a two-operand InstAlias. +let TwoOperandAliasConstraint = "$Vn = $Vd" in { class VEXTd : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, @@ -5327,6 +5335,7 @@ class VEXTq bits<4> index; let Inst{11-8} = index{3-0}; } +} def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; @@ -5588,82 +5597,87 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) -// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; +// Lengthen_Single<"8", "i16", "8"> = +// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) +// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0)))>; multiclass Lengthen_Single { + let AddedComplexity = 10 in { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode5:$addr)), + (!cast("extloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLuv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast("zextloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLuv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast("sextloadvi" # SrcTy) addrmode6:$addr)), (!cast("VMOVLsv" # DestLanes # DestTy) - (VLDRD addrmode5:$addr))>; + (!cast("VLD1d" # SrcTy) addrmode6:$addr))>; + } } // extload, zextload and sextload for a lengthening load which only uses // half the lanes available. Example: // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = -// Pat<(v4i16 (extloadvi8 addrmode5:$addr)) -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), +// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)>; multiclass Lengthen_HalfSingle { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode5:$addr)), + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (EXTRACT_SUBREG (!cast("VMOVLsv" # InsnLanes # InsnTy) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)>; } // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. // -// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = -// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) -// (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (f64 (IMPLICIT_DEF)), +// (i32 0))), // dsub_0)), -// qsub_0)>; +// dsub_0)>; multiclass Lengthen_Double { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode5:$addr)), + (!cast("extloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast("zextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast("sextloadv" # SrcTy) addrmode6oneL32:$addr)), (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0))>; + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0))>; } // extload, zextload and sextload for a lengthening load followed by another @@ -5671,45 +5685,43 @@ multiclass Lengthen_Double = -// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) -// (EXTRACT_SUBREG (VMOVLuv4i32 -// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -// (VLDRS addrmode5:$addr), -// ssub_0)), -// dsub_0)), -// dsub_0)>; +// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, +// (f64 (IMPLICIT_DEF)), (i32 0))), +// dsub_0)), +// dsub_0)>; multiclass Lengthen_HalfDouble { def _Any : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("extloadv" # SrcTy) addrmode5:$addr)), + (!cast("extloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; def _Z : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast("zextloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLuv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; def _S : Pat<(!cast("v" # DestLanes # DestTy) - (!cast("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast("sextloadv" # SrcTy) addrmode6:$addr)), (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast("VMOVLsv" # Insn1Lanes # Insn1Ty) - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), - ssub_0)), dsub_0)), + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), + dsub_0)), dsub_0)>; } -defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 -defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 -defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 +defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 -defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 // Double lengthening - v4i8 -> v4i16 -> v4i32 @@ -5720,18 +5732,18 @@ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), +def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), - dsub_0)), dsub_0))>; + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; //===----------------------------------------------------------------------===// // Assembler aliases @@ -5742,69 +5754,6 @@ def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; - -// VADD two-operand aliases. -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", - (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", - (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", - (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", - (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", - (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSUB two-operand aliases. -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", - (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", - (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", - (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", - (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", - (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VADDW two-operand aliases. -def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", - (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", - (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", - (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", - (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", - (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", - (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; - // VAND/VBIC/VEOR/VORR accept but do not require a type suffix. defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; @@ -5823,23 +5772,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; // ... two-operand aliases -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vand${p} $Vdn, $Vm", - (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vbic${p} $Vdn, $Vm", - (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"veor${p} $Vdn, $Vm", - (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vorr${p} $Vdn, $Vm", - (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", @@ -5853,212 +5785,6 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VMUL two-operand aliases. -def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", - (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", - (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", - (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", - (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", - (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", - (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", - (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", - (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", - (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", - (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", - (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", - (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, - VectorIndex16:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", - (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", - (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", - (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; -def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", - (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, - VectorIndex32:$lane, pred:$p)>; - -// VQADD (register) two-operand aliases. -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", - (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", - (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", - (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", - (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", - (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", - (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", - (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", - (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHL (immediate) two-operand aliases. -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", - (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", - (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", - (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", - (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; - -// VSHL (register) two-operand aliases. -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", - (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", - (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", - (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", - (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", - (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", - (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", - (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", - (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// VSHR (immediate) two-operand aliases. -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", - (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", - (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", - (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", - (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", - (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", - (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", - (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", - (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; - -// VRSHL two-operand aliases. -def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", - (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", - (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", - (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", - (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", - (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", - (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", - (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", - (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", - (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", - (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", - (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", - (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", - (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", - (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", - (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", - (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", @@ -6223,17 +5949,17 @@ def VST2LNqWB_register_Asm_32 : // VLD3 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", +def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", +def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", +def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", +def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; def VLD3DUPdWB_fixed_Asm_8 : @@ -6499,17 +6225,17 @@ def VST3qWB_register_Asm_32 : // VLD4 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. -def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", +def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", +def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", +def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; -def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", +def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; def VLD4DUPdWB_fixed_Asm_8 : @@ -6845,277 +6571,6 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; -// Two-operand variants for VEXT -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; - -def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", - (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", - (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", - (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; -def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", - (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; - -// Two-operand variants for VQDMULH -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", - (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", - (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VMAX. -def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", - (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", - (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", - (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", - (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", - (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", - (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", - (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", - (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", - (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", - (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", - (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", - (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", - (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", - (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VMIN. -def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", - (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", - (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", - (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", - (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", - (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", - (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", - (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", - (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", - (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", - (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", - (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", - (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", - (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", - (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VPADD. -def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", - (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", - (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", - (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", - (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -// Two-operand variants for VSRA. - // Signed. -def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", - (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", - (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", - (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", - (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", - (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", - (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", - (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", - (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; - - // Unsigned. -def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", - (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", - (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", - (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", - (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", - (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", - (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", - (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", - (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -// Two-operand variants for VSRI. -def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", - (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", - (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", - (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", - (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", - (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", - (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", - (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", - (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -// Two-operand variants for VSLI. -def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", - (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", - (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", - (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", - (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", - (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", - (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", - (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; -def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", - (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; - -// Two-operand variants for VHSUB. - // Signed. -def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", - (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", - (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", - (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", - (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", - (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", - (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - - // Unsigned. -def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", - (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", - (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", - (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", - (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", - (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", - (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - - -// Two-operand variants for VHADD. - // Signed. -def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", - (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", - (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", - (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", - (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", - (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", - (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - - // Unsigned. -def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", - (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", - (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", - (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; - -def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", - (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", - (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", - (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; - -// Two-operand variants for VRHADD. - // Signed. -def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", - (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", - (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", - (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; - -def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", - (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", - (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", - (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; - - // Unsigned. -def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", - (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", - (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", - (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; - -def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", - (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", - (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; -def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", - (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; - // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 6335229..554f6d9 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -32,9 +32,6 @@ def imm_sr : Operand, PatLeaf<(imm), [{ let ParserMatchClass = ThumbSRImmAsmOperand; } -def imm_neg_XFORM : SDNodeXFormgetTargetConstant(-(int)N->getZExtValue(), MVT::i32); -}]>; def imm_comp_XFORM : SDNodeXFormgetTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; @@ -258,16 +255,20 @@ def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>, Requires<[IsThumb2]>; def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>, - T1SystemEncoding<0x10>; // A8.6.410 + T1SystemEncoding<0x10>, // A8.6.410 + Requires<[IsThumb2]>; def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>, - T1SystemEncoding<0x20>; // A8.6.408 + T1SystemEncoding<0x20>, // A8.6.408 + Requires<[IsThumb2]>; def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>, - T1SystemEncoding<0x30>; // A8.6.409 + T1SystemEncoding<0x30>, // A8.6.409 + Requires<[IsThumb2]>; def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>, - T1SystemEncoding<0x40>; // A8.6.157 + T1SystemEncoding<0x40>, // A8.6.157 + Requires<[IsThumb2]>; // The imm operand $val can be used by a debugger to store more information // about the breakpoint. @@ -363,8 +364,8 @@ def : tInstAlias<"sub${p} sp, sp, $imm", (tSUBspi SP, t_imm0_508s4:$imm, pred:$p)>; // ADD , sp -def tADDrSP : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPRsp:$sp), IIC_iALUr, - "add", "\t$Rdn, $sp, $Rn", []>, +def tADDrSP : T1pI<(outs GPR:$Rdn), (ins GPRsp:$sp, GPR:$Rn), IIC_iALUr, + "add", "\t$Rdn, $sp, $Rn", []>, T1Special<{0,0,?,?}> { // A8.6.9 Encoding T1 bits<4> Rdn; @@ -419,34 +420,35 @@ let isCall = 1, Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, - (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, + (outs), (ins pred:$p, t_bltarget:$func), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb]> { - bits<22> func; - let Inst{26} = func{21}; + bits<24> func; + let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; + let Inst{13} = func{22}; + let Inst{11} = func{21}; let Inst{10-0} = func{10-0}; } // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<0b11110, 0b11, 0, - (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, + (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T]> { - bits<21> func; + bits<24> func; + let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; - let Inst{13} = 1; - let Inst{11} = 1; + let Inst{13} = func{22}; + let Inst{11} = func{21}; let Inst{10-1} = func{10-1}; let Inst{0} = 0; // func{0} is assumed zero } // Also used for Thumb2 - def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, + def tBLXr : TI<(outs), (ins pred:$p, GPR:$func), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T]>, @@ -457,7 +459,7 @@ let isCall = 1, } // ARMv4T - def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), + def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb, IsThumb1Only]>; @@ -504,7 +506,7 @@ let isBranch = 1, isTerminator = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. let Uses = [SP] in { - def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, Requires<[IsThumb]>; @@ -514,7 +516,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Non-IOS version: let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), - (ins t_brtarget:$dst, pred:$p, variable_ops), + (ins t_brtarget:$dst, pred:$p), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotIOS]>; @@ -1398,7 +1400,7 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; // For round-trip assembly/disassembly, we have to handle a CPS instruction // without any iflags. That's not, strictly speaking, valid syntax, but it's -// a useful extention and assembles to defined behaviour (the insn does +// a useful extension and assembles to defined behaviour (the insn does // nothing). def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e6fb9d5..307006f 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -62,6 +62,15 @@ def t2_so_imm_neg_XFORM : SDNodeXFormgetTargetConstant(-((int)N->getZExtValue()), MVT::i32); }]>; +// so_imm_notSext_XFORM - Return a so_imm value packed into the format +// described for so_imm_notSext def below, with sign extension from 16 +// bits. +def t2_so_imm_notSext16_XFORM : SDNodeXFormgetAPIntValue(); + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return CurDAG->getTargetConstant(~N16bitSignExt, MVT::i32); +}]>; + // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. @@ -86,6 +95,17 @@ def t2_so_imm_not : Operand, PatLeaf<(imm), [{ let ParserMatchClass = t2_so_imm_not_asmoperand; } +// t2_so_imm_notSext - match an immediate that is a complement of a t2_so_imm +// if the upper 16 bits are zero. +def t2_so_imm_notSext : Operand, PatLeaf<(imm), [{ + APInt apIntN = N->getAPIntValue(); + if (!apIntN.isIntN(16)) return false; + unsigned N16bitSignExt = apIntN.trunc(16).sext(32).getZExtValue(); + return ARM_AM::getT2SOImmVal(~N16bitSignExt) != -1; + }], t2_so_imm_notSext16_XFORM> { + let ParserMatchClass = t2_so_imm_not_asmoperand; +} + // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand, PatLeaf<(imm), [{ @@ -152,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand { // ADR instruction labels. def t2adrlabel : Operand { let EncoderMethod = "getT2AdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand"; } @@ -509,7 +530,7 @@ class T2MulLong opc22_20, bits<4> opc7_4, /// changed to modify CPSR. multiclass T2I_bin_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0, + PatFrag opnode, bit Commutable = 0, string wide = ""> { // shifted imm def ri : T2sTwoRegImm< @@ -545,15 +566,15 @@ multiclass T2I_bin_irs opcod, string opc, // Assembly aliases for optional destination operand when it's the same // as the source operand. def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + (!cast(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + (!cast(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + (!cast(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, pred:$p, cc_out:$s)>; } @@ -562,36 +583,30 @@ multiclass T2I_bin_irs opcod, string opc, // the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> : - T2I_bin_irs { + PatFrag opnode, bit Commutable = 0> : + T2I_bin_irs { // Assembler aliases w/ the ".w" suffix. def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, + pred:$p, cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; } /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are @@ -668,16 +683,16 @@ let hasPostISelHook = 1, Defs = [CPSR] in { multiclass T2I_rbin_s_is { // shifted imm def ri : t2PseudoInst<(outs rGPR:$Rd), - (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p), + (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p), 4, IIC_iALUi, [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, - GPRnopc:$Rn))]>; + rGPR:$Rn))]>; // shifted register def rs : t2PseudoInst<(outs rGPR:$Rd), - (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p), 4, IIC_iALUsi, [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, - GPRnopc:$Rn))]>; + rGPR:$Rn))]>; } } @@ -788,8 +803,7 @@ multiclass T2I_adde_sube_irs opcod, string opc, PatFrag opnode, /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir opcod, string opc, Operand ty, PatFrag opnode, - string baseOpc> { +multiclass T2I_sh_ir opcod, string opc, Operand ty, PatFrag opnode> { // 5-bit imm def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, @@ -814,33 +828,27 @@ multiclass T2I_sh_ir opcod, string opc, Operand ty, PatFrag opnode, // Optional destination register def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; } /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test @@ -848,7 +856,7 @@ multiclass T2I_sh_ir opcod, string opc, Operand ty, PatFrag opnode, /// a explicit result, only implicitly set CPSR. multiclass T2I_cmp_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc> { + PatFrag opnode> { let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< @@ -893,12 +901,9 @@ let isCompare = 1, Defs = [CPSR] in { // No alias here for 'rr' version as not all instantiations of this // multiclass want one (CMP in particular, does not). def : t2InstAlias(!strconcat(baseOpc, "ri")) GPRnopc:$Rn, - t2_so_imm:$imm, pred:$p)>; + (!cast(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; def : t2InstAlias(!strconcat(baseOpc, "rs")) GPRnopc:$Rn, - t2_so_reg:$shift, - pred:$p)>; + (!cast(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. @@ -1911,11 +1916,16 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), + (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; + let AddedComplexity = 1 in def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm), + (t2SUBSrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. @@ -1924,6 +1934,8 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_255_not:$imm, CPSR), (t2SBCri rGPR:$src, imm0_255_not:$imm)>; def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), + (t2SBCrr rGPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; // Select Bytes -- for disassembly only @@ -2125,17 +2137,17 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; // defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, - BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">; + BinOpFrag<(shl node:$LHS, node:$RHS)>>; defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, - BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">; + BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, - BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">; + BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, - BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; + BinOpFrag<(rotr node:$LHS, node:$RHS)>>; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) -def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), - (t2RORrr rGPR:$lhs, rGPR:$rhs)>; +def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, @@ -2187,18 +2199,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm< defm t2AND : T2I_bin_w_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, - "t2BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; class T2BitFI pattern> @@ -2278,8 +2289,7 @@ let Constraints = "$src = $Rd" in { defm t2ORN : T2I_bin_irs<0b0011, "orn", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, (not node:$RHS))>, - "t2ORN", 0, "">; + BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be @@ -2332,6 +2342,17 @@ let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), (t2BICri rGPR:$src, t2_so_imm_not:$imm)>; +// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise +def top16Zero: PatLeaf<(i32 rGPR:$src), [{ + return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); + }]>; + +// so_imm_notSext is needed instead of so_imm_not, as the value of imm +// will match the extended, not the original bitWidth for $src. +def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm), + (t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>; + + // FIXME: Disable this pattern on Darwin to workaround an assembler bug. def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm), (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>, @@ -2840,7 +2861,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">; + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; @@ -2849,29 +2870,68 @@ def : T2Pat<(ARMcmpZ GPRnopc:$lhs, rGPR:$rhs), def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_reg:$rhs), (t2CMPrs GPRnopc:$lhs, t2_so_reg:$rhs)>; -//FIXME: Disable CMN, as CCodes are backwards from compare expectations -// Compare-to-zero still works out, just not the relationals -//defm t2CMN : T2I_cmp_irs<0b1000, "cmn", -// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; -defm t2CMNz : T2I_cmp_irs<0b1000, "cmn", - IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>, - "t2CMNz">; +let isCompare = 1, Defs = [CPSR] in { + // shifted imm + def t2CMNri : T2OneRegCmpImm< + (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi, + "cmn", ".w\t$Rn, $imm", + [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]> { + let Inst{31-27} = 0b11110; + let Inst{25} = 0; + let Inst{24-21} = 0b1000; + let Inst{20} = 1; // The S bit. + let Inst{15} = 0; + let Inst{11-8} = 0b1111; // Rd + } + // register + def t2CMNzrr : T2TwoRegCmp< + (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr, + "cmn", ".w\t$Rn, $Rm", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, rGPR:$Rm)]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b1000; + let Inst{20} = 1; // The S bit. + let Inst{14-12} = 0b000; // imm3 + let Inst{11-8} = 0b1111; // Rd + let Inst{7-6} = 0b00; // imm2 + let Inst{5-4} = 0b00; // type + } + // shifted register + def t2CMNzrs : T2OneRegCmpShiftedReg< + (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi, + "cmn", ".w\t$Rn, $ShiftedRm", + [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]> { + let Inst{31-27} = 0b11101; + let Inst{26-25} = 0b01; + let Inst{24-21} = 0b1000; + let Inst{20} = 1; // The S bit. + let Inst{11-8} = 0b1111; // Rd + } +} -//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), -// (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; +// Assembler aliases w/o the ".w" suffix. +// No alias here for 'rr' version as not all instantiations of this multiclass +// want one (CMP in particular, does not). +def : t2InstAlias<"cmn${p} $Rn, $imm", + (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rn, $shift", + (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; -def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), - (t2CMNzri GPRnopc:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), + (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; + +def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), + (t2CMNri GPRnopc:$src, t2_so_imm_neg:$imm)>; defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, - "t2TST">; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, - "t2TEQ">; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use @@ -3017,7 +3077,7 @@ def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, def t2ISB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary, "isb", "\t$opt", - []>, Requires<[IsThumb2, HasDB]> { + []>, Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; @@ -3271,7 +3331,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), - (ins uncondbrtarget:$dst, pred:$p, variable_ops), + (ins uncondbrtarget:$dst, pred:$p), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, Requires<[IsThumb2, IsIOS]>; @@ -3281,7 +3341,7 @@ let isCall = 1, Defs = [LR], Uses = [SP] in { // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def t2BMOVPCB_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func, variable_ops), + (ins t_bltarget:$func), 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, Requires<[IsThumb]>; } @@ -3382,21 +3442,18 @@ let imod = 0, iflags = 0, M = 1 in // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -class T2I_hint op7_0, string opc, string asm> - : T2I<(outs), (ins), NoItinerary, opc, asm, []> { - let Inst{31-20} = 0xf3a; - let Inst{19-16} = 0b1111; - let Inst{15-14} = 0b10; - let Inst{12} = 0; - let Inst{10-8} = 0b000; - let Inst{7-0} = op7_0; +def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{ + bits<8> imm; + let Inst{31-8} = 0b111100111010111110000000; + let Inst{7-0} = imm; } -def t2NOP : T2I_hint<0b00000000, "nop", ".w">; -def t2YIELD : T2I_hint<0b00000001, "yield", ".w">; -def t2WFE : T2I_hint<0b00000010, "wfe", ".w">; -def t2WFI : T2I_hint<0b00000011, "wfi", ".w">; -def t2SEV : T2I_hint<0b00000100, "sev", ".w">; +def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>; +def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>; +def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>; +def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>; +def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>; +def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>; def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { bits<4> opt; @@ -3622,8 +3679,8 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">; // A/R class MRS. // // A/R class can only move from CPSR or SPSR. -def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []>, - Requires<[IsThumb2,IsARClass]> { +def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", + []>, Requires<[IsThumb2,IsARClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; @@ -3632,8 +3689,8 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []> def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; -def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []>, - Requires<[IsThumb2,IsARClass]> { +def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", + []>, Requires<[IsThumb2,IsARClass]> { bits<4> Rd; let Inst{31-12} = 0b11110011111111111000; let Inst{11-8} = Rd; @@ -3646,7 +3703,7 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", [ // the A/R class (a full msr_mask). def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, "mrs", "\t$Rd, $mask", []>, - Requires<[IsThumb2,IsMClass]> { + Requires<[IsThumb,IsMClass]> { bits<4> Rd; bits<8> mask; let Inst{31-12} = 0b11110011111011111000; @@ -3682,14 +3739,14 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), // Move from ARM core register to Special Register def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), NoItinerary, "msr", "\t$SYSm, $Rn", []>, - Requires<[IsThumb2,IsMClass]> { - bits<8> SYSm; + Requires<[IsThumb,IsMClass]> { + bits<12> SYSm; bits<4> Rn; let Inst{31-21} = 0b11110011100; let Inst{20} = 0b0; let Inst{19-16} = Rn; let Inst{15-12} = 0b1000; - let Inst{7-0} = SYSm; + let Inst{11-0} = SYSm; } @@ -3969,6 +4026,17 @@ def : t2InstAlias<"add${s}${p} $Rdn, $imm", def : t2InstAlias<"add${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"addw${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p}.w $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"addw${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", @@ -4002,9 +4070,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", (t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; // Memory barriers -def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb2, HasDB]>; -def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb2, HasDB]>; -def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb2, HasDB]>; +def : InstAlias<"dmb", (t2DMB 0xf)>, Requires<[IsThumb, HasDB]>; +def : InstAlias<"dsb", (t2DSB 0xf)>, Requires<[IsThumb, HasDB]>; +def : InstAlias<"isb", (t2ISB 0xf)>, Requires<[IsThumb, HasDB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. @@ -4213,7 +4281,7 @@ def : t2InstAlias<"add${s}${p} $Rd, $imm", pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via t2_so_imm_neg def : t2InstAlias<"cmp${p} $Rd, $imm", - (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; + (t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; def : t2InstAlias<"cmn${p} $Rd, $imm", (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 3600b88..23c132e 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -221,11 +221,13 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", // FP Binary Operations. // +let TwoOperandAliasConstraint = "$Dn = $Dd" in def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm", [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", @@ -235,11 +237,13 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm", [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", @@ -249,21 +253,25 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm", [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; +let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm", [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", @@ -559,8 +567,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, bits<4> Rt2; // Encode instruction operands. - let Inst{3-0} = src1{3-0}; - let Inst{5} = src1{4}; + let Inst{3-0} = src1{4-1}; + let Inst{5} = src1{0}; let Inst{15-12} = Rt; let Inst{19-16} = Rt2; @@ -609,8 +617,8 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, bits<4> src2; // Encode instruction operands. - let Inst{3-0} = dst1{3-0}; - let Inst{5} = dst1{4}; + let Inst{3-0} = dst1{4-1}; + let Inst{5} = dst1{0}; let Inst{15-12} = src1; let Inst{19-16} = src2; @@ -819,9 +827,9 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: // Single Precision register -class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, - dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list pattern> +class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, + bit op5, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> : AVConv1XI { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); @@ -830,9 +838,9 @@ class AVConv1XInsS_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bi } // Double Precision register -class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, - dag oops, dag iops, InstrItinClass itin, string opc, string asm, - list pattern> +class AVConv1XInsD_Encode op1, bits<2> op2, bits<4> op3, bits<4> op4, + bit op5, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> : AVConv1XI { bits<5> dst; // if dp_operation then UInt(D:Vd) else UInt(Vd:D); @@ -1081,10 +1089,11 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), +// (fma x, y, z) -> (vfms z, x, y) +def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)), (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), +def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)), (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1115,18 +1124,18 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -// (fma (fneg x), y, z) -> (vfms x, y, z) -def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), +// (fma (fneg x), y, z) -> (vfms z, x, y) +def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), +def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) -def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), +// (fma x, (fneg y), z) -> (vfms z, x, y) +def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), +def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1157,18 +1166,18 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -// (fneg (fma x, y, z)) -> (vfnma x, y, z) -def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), +// (fneg (fma x, y, z)) -> (vfnma z, x, y) +def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), +def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) -def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), +// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) +def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), +def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1198,18 +1207,26 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; // Match @llvm.fma.* intrinsics -// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) -def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + +// (fma x, y, (fneg z)) -> (vfnms z, x, y)) +def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) +def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), +def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; -// (fma x, (fneg y), z) -> (vnfms x, y, z) -def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), +// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) +def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, Requires<[HasVFP4]>; -def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), +def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; @@ -1426,22 +1443,6 @@ def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr", def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -// VMUL has a two-operand form (implied destination operand) -def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", - (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; -def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm", - (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; -// VADD has a two-operand form (implied destination operand) -def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm", - (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; -def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm", - (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; -// VSUB has a two-operand form (implied destination operand) -def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", - (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; -def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", - (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; - // VMOV can accept optional 32-bit or less data type suffix suffix. def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn", (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 98930cc..3f99cce 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -289,9 +289,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) ResultPtr = ResultPtr >> 2; *((intptr_t*)RelocPos) |= ResultPtr; - // Set register Rn to PC. - *((intptr_t*)RelocPos) |= - getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + // Set register Rn to PC (which is register 15 on all architectures). + // FIXME: This avoids the need for register info in the JIT class. + *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; break; } case ARM::reloc_arm_pic_jt: diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 9ef2ace..897ceb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, DebugLoc dl = Loc->getDebugLoc(); const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(PReg); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; @@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, int NewOffset = MemOps[i].Offset; const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(Reg); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); // Register numbers must be in ascending order. For VFP / NEON load and // store multiples, the registers must also be consecutive and within the // limit on the number of registers per instruction. @@ -1177,8 +1175,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) - NewOpc = ARM::t2LDRi12; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1326,7 +1322,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); + unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); // Process the load / store instructions. RS->forward(prior(MBBI)); @@ -1739,7 +1735,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, Ops.pop_back(); const MCInstrDesc &MCID = TII->get(NewOpc); - const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); + const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); MRI->constrainRegClass(EvenReg, TRC); MRI->constrainRegClass(OddReg, TRC); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 1466e98..6f974fd 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// // Registers are identified with 4-bit ID numbers. -class ARMReg num, string n, list subregs = []> : Register { - field bits<4> Num; +class ARMReg Enc, string n, list subregs = []> : Register { + let HWEncoding = Enc; let Namespace = "ARM"; let SubRegs = subregs; // All bits of ARM registers with sub-registers are covered by sub-registers. let CoveredBySubRegs = 1; } -class ARMFReg num, string n> : Register { - field bits<6> Num; +class ARMFReg Enc, string n> : Register { + let HWEncoding = Enc; let Namespace = "ARM"; } @@ -267,21 +267,16 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, - (trunc DPR, 16)> { - let SubRegClasses = [(SPR ssub_0, ssub_1)]; -} + (trunc DPR, 16)>; // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, - (trunc DPR, 8)> { - let SubRegClasses = [(SPR_8 ssub_0, ssub_1)]; -} + (trunc DPR, 8)>; // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, (sequence "Q%u", 0, 15)> { - let SubRegClasses = [(DPR dsub_0, dsub_1)]; // Allocate non-VFP2 aliases Q8-Q15 first. let AltOrders = [(rotl QPR, 8)]; let AltOrderSelect = [{ return 1; }]; @@ -289,17 +284,11 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 8)> { - let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), - (DPR_VFP2 dsub_0, dsub_1)]; -} + 128, (trunc QPR, 8)>; // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 4)> { - let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3), - (DPR_8 dsub_0, dsub_1)]; -} + 128, (trunc QPR, 4)>; // Pseudo-registers representing odd-even pairs of D registers. The even-odd // pairs are already represented by the Q registers. @@ -338,8 +327,6 @@ def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>; // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> { - let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), - (QPR qsub_0, qsub_1)]; // Allocate non-VFP2 aliases first. let AltOrders = [(rotl QQPR, 8)]; let AltOrderSelect = [{ return 1; }]; @@ -363,9 +350,6 @@ def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1], // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> { - let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, - dsub_4, dsub_5, dsub_6, dsub_7), - (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; // Allocate non-VFP2 aliases first. let AltOrders = [(rotl QQQQPR, 8)]; let AltOrderSelect = [{ return 1; }]; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 45486fd..81d2fa3 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -70,11 +70,11 @@ def IIC_iLoad_bh_siu : InstrItinClass; def IIC_iLoad_d_i : InstrItinClass; def IIC_iLoad_d_r : InstrItinClass; def IIC_iLoad_d_ru : InstrItinClass; -def IIC_iLoad_m : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mu : InstrItinClass<0>; // micro-coded -def IIC_iLoad_mBr : InstrItinClass<0>; // micro-coded -def IIC_iPop : InstrItinClass<0>; // micro-coded -def IIC_iPop_Br : InstrItinClass<0>; // micro-coded +def IIC_iLoad_m : InstrItinClass; +def IIC_iLoad_mu : InstrItinClass; +def IIC_iLoad_mBr : InstrItinClass; +def IIC_iPop : InstrItinClass; +def IIC_iPop_Br : InstrItinClass; def IIC_iLoadiALU : InstrItinClass; def IIC_iStore_i : InstrItinClass; def IIC_iStore_r : InstrItinClass; @@ -91,8 +91,8 @@ def IIC_iStore_bh_siu : InstrItinClass; def IIC_iStore_d_i : InstrItinClass; def IIC_iStore_d_r : InstrItinClass; def IIC_iStore_d_ru : InstrItinClass; -def IIC_iStore_m : InstrItinClass<0>; // micro-coded -def IIC_iStore_mu : InstrItinClass<0>; // micro-coded +def IIC_iStore_m : InstrItinClass; +def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; @@ -126,12 +126,12 @@ def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; -def IIC_fpLoad_m : InstrItinClass<0>; // micro-coded -def IIC_fpLoad_mu : InstrItinClass<0>; // micro-coded +def IIC_fpLoad_m : InstrItinClass; +def IIC_fpLoad_mu : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; -def IIC_fpStore_m : InstrItinClass<0>; // micro-coded -def IIC_fpStore_mu : InstrItinClass<0>; // micro-coded +def IIC_fpStore_m : InstrItinClass; +def IIC_fpStore_mu : InstrItinClass; def IIC_VLD1 : InstrItinClass; def IIC_VLD1x2 : InstrItinClass; def IIC_VLD1x3 : InstrItinClass; @@ -258,8 +258,6 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], [], []>; - include "ARMScheduleV6.td" include "ARMScheduleA8.td" include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 8b1fb93..2c63825 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -151,28 +151,30 @@ def CortexA8Itineraries : ProcessorItineraries< // Load multiple, def is the 5th operand. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData, - InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>, + InstrStage<2, [A8_LSPipe]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData, - InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops // // Load multiple plus branch InstrItinData, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 2, 1, 1, 3]>, + [1, 2, 1, 1, 3], [], -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData, - InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>, + InstrStage<3, [A8_LSPipe]>], + [1, 1, 3], [], -1>, // dynamic uops // // Push, def is the 3th operand. InstrItinData, InstrStage<3, [A8_LSPipe]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>], - [1, 1, 3]>, - + [1, 1, 3], [], -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData, @@ -227,12 +229,13 @@ def CortexA8Itineraries : ProcessorItineraries< // Store multiple. Pipeline 0 only. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. InstrItinData, - InstrStage<2, [A8_LSPipe]>]>, + InstrStage<2, [A8_LSPipe]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData, - InstrStage<2, [A8_LSPipe]>], [2]>, - + InstrStage<2, [A8_LSPipe]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData], [2, 2]>, @@ -393,14 +396,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 2], [], -1>, // dynamic uops // // FP Load Multiple + update InstrItinData, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>, + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 2], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData, @@ -419,15 +424,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>, + InstrStage<1, [A8_LSPipe]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update InstrItinData, InstrStage<1, [A8_NLSPipe], 0>, InstrStage<1, [A8_LSPipe]>, InstrStage<1, [A8_NLSPipe], 0>, - InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>, - + InstrStage<1, [A8_LSPipe]>], + [2, 1, 1, 1, 1], [], -1>, // dynamic uops // NEON // Issue through integer pipeline, and execute in NEON unit. // @@ -1051,3 +1057,19 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<1, [A8_NPipe], 0>, InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Cortex-A8 machine model for scheduling and other instruction cost heuristics. +def CortexA8Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 2; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 13; // Based on estimate of pipeline depth. + + let Itineraries = CortexA8Itineraries; +} diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 0d710cc..7bc590f 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -11,6 +11,10 @@ // //===----------------------------------------------------------------------===// +// ===---------------------------------------------------------------------===// +// This section contains legacy support for itineraries. This is +// required until SD and PostRA schedulers are replaced by MachineScheduler. + // // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical // Reference Manual". @@ -280,7 +284,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData, @@ -288,7 +293,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Load multiple plus branch InstrItinData, @@ -297,7 +303,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], - [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop, def is the 3rd operand. InstrItinData, @@ -305,7 +312,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_AGU], 1>, InstrStage<2, [A9_LSUnit]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // Pop + branch, def is the 3rd operand. InstrItinData, @@ -314,8 +322,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_LSUnit]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], - [NoBypass, NoBypass, A9_LdBypass]>, - + [NoBypass, NoBypass, A9_LdBypass], + -1>, // dynamic uops // // iLoadi + iALUr for t2LDRpci_pic. InstrItinData, @@ -409,14 +417,15 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>]>, + InstrStage<2, [A9_LSUnit]>], + [], [], -1>, // dynamic uops // // Store multiple + update InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU], 0>, - InstrStage<2, [A9_LSUnit]>], [2]>, - + InstrStage<2, [A9_LSUnit]>], + [2], [], -1>, // dynamic uops // // Preload InstrItinData], [1, 1]>, @@ -713,7 +722,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Load Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -722,7 +732,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // // Single-precision FP Store InstrItinData, @@ -749,7 +760,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 1, 1], [], -1>, // dynamic uops // // FP Store Multiple + update // FIXME: assumes 2 doubles which requires 2 LS cycles. @@ -758,7 +770,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], + [2, 1, 1, 1], [], -1>, // dynamic uops // NEON // VLD1 InstrItinData, @@ -1861,3 +1874,22 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; + +// ===---------------------------------------------------------------------===// +// This following definitions describe the simple machine model which +// will replace itineraries. + +// Cortex-A9 machine model for scheduling and other instruction cost heuristics. +def CortexA9Model : SchedMachineModel { + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let LoadLatency = 2; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 8; // Based on estimate of pipeline depth. + + let Itineraries = CortexA9Itineraries; +} + +// TODO: Add Cortex-A9 processor and scheduler resources. + diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index e2530d0..31d5d38 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -179,8 +179,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Args.push_back(Entry); // Emit __eabi_memset call - std::pair CallResult = - TLI.LowerCallTo(Chain, + TargetLowering::CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()), // return type false, // return sign ext false, // return zero ext @@ -193,7 +192,9 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, false, // is return val used DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), // callee - Args, DAG, dl); // arg list, DAG and debug + Args, DAG, dl); + std::pair CallResult = + TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e247b76..4762854 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -67,6 +67,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasDataBarrier(false) , Pref32BitThumb(false) , AvoidCPSRPartialUpdate(false) + , HasRAS(false) , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) @@ -82,7 +83,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TT); + std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString); if (!FS.empty()) { if (!ArchFS.empty()) ArchFS = ArchFS + "," + FS; @@ -96,13 +97,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, if (!HasV6T2Ops && hasThumb2()) HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true; + // Keep a pointer to static instruction cost data for the specified CPU. + SchedModel = getSchedModelForCPU(CPUString); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - // After parsing Itineraries, set ItinData.IssueWidth. - computeIssueWidth(); - - if (TT.find("eabi") != std::string::npos) + if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass())) // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; @@ -181,31 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, } unsigned ARMSubtarget::getMispredictionPenalty() const { - // If we have a reasonable estimate of the pipeline depth, then we can - // estimate the penalty of a misprediction based on that. - if (isCortexA8()) - return 13; - else if (isCortexA9()) - return 8; - - // Otherwise, just return a sensible default. - return 10; -} - -void ARMSubtarget::computeIssueWidth() { - unsigned allStage1Units = 0; - for (const InstrItinerary *itin = InstrItins.Itineraries; - itin->FirstStage != ~0U; ++itin) { - const InstrStage *IS = InstrItins.Stages + itin->FirstStage; - allStage1Units |= IS->getUnits(); - } - InstrItins.IssueWidth = 0; - while (allStage1Units) { - ++InstrItins.IssueWidth; - // clear the lowest bit - allStage1Units ^= allStage1Units & ~(allStage1Units - 1); - } - assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units"); + return SchedModel->MispredictPenalty; } bool ARMSubtarget::enablePostRAScheduler( diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e72b06f..b394061 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -74,7 +74,7 @@ protected: /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; - /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - + /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - /// v6m, v7m for example. bool IsMClass; @@ -155,6 +155,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + /// SchedModel - Processor specific instruction costs. + const MCSchedModel *SchedModel; + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 9aa8308..171c9ad 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -136,22 +136,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) - PM->add(createGlobalMergePass(TM->getTargetLowering())); + addPass(createGlobalMergePass(TM->getTargetLowering())); return false; } bool ARMPassConfig::addInstSelector() { - PM->add(createARMISelDag(getARMTargetMachine(), getOptLevel())); + addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) - PM->add(createARMLoadStoreOptimizationPass(true)); + addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) - PM->add(createMLxExpansionPass()); + addPass(createMLxExpansionPass()); return true; } @@ -159,23 +159,23 @@ bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) { - PM->add(createARMLoadStoreOptimizationPass()); + addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } if (getARMSubtarget().hasNEON()) - PM->add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); + addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. - PM->add(createARMExpandPseudoPass()); + addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOpt::None) { if (!getARMSubtarget().isThumb1Only()) - addPass(IfConverterID); + addPass(&IfConverterID); } if (getARMSubtarget().isThumb2()) - PM->add(createThumb2ITBlockPass()); + addPass(createThumb2ITBlockPass()); return true; } @@ -183,13 +183,13 @@ bool ARMPassConfig::addPreSched2() { bool ARMPassConfig::addPreEmitPass() { if (getARMSubtarget().isThumb2()) { if (!getARMSubtarget().prefers32BitThumb()) - PM->add(createThumb2SizeReductionPass()); + addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. - addPass(UnpackMachineBundlesID); + addPass(&UnpackMachineBundlesID); } - PM->add(createARMConstantIslandPass()); + addPass(createARMConstantIslandPass()); return true; } diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index a5ea1c2..3d85ca7 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -24,20 +24,11 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + bool isAAPCS_ABI = TM.getSubtarget().isAAPCS_ABI(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); - isAAPCS_ABI = TM.getSubtarget().isAAPCS_ABI(); + InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - StaticCtorSection = - getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); - StaticDtorSection = - getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getDataRel()); LSDASection = NULL; } @@ -47,33 +38,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } - -const MCSection * -ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); - - if (Priority == 65535) - return StaticCtorSection; - - // Emit ctors in priority order. - std::string Name = std::string(".init_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} - -const MCSection * -ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { - if (!isAAPCS_ABI) - return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); - - if (Priority == 65535) - return StaticDtorSection; - - // Emit dtors in priority order. - std::string Name = std::string(".fini_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); -} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index ff21060..c6a7261 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,7 +20,6 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; - bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -32,9 +31,6 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } - - const MCSection * getStaticCtorSection(unsigned Priority) const; - const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2c53e3f..3a5957b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -236,7 +236,10 @@ public: Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, Match_RequiresNotITBlock, Match_RequiresV6, - Match_RequiresThumb2 + Match_RequiresThumb2, +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "ARMGenAsmMatcher.inc" + }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) @@ -793,6 +796,13 @@ public: int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } + bool isAdrLabel() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, but it can't fit + // into shift immediate encoding, we reject it. + if (isImm() && !isa(getImm())) return true; + else return (isARMSOImm() || isARMSOImmNeg()); + } bool isARMSOImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); @@ -914,7 +924,9 @@ public: // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return Val > -256 && Val < 256; + // The #-0 offset is encoded as INT32_MIN, and we have to check + // for this too. + return (Val > -256 && Val < 256) || Val == INT32_MIN; } bool isAM3Offset() const { if (Kind != k_Immediate && Kind != k_PostIndexRegister) @@ -1028,7 +1040,8 @@ public: // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return Val >= -1020 && Val <= 1020 && (Val & 3) == 0; + // Special case, #-0 is INT32_MIN. + return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; } bool isMemImm0_1020s4Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1446,8 +1459,10 @@ public: assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); + // Shift of #32 is encoded as 0 where permitted + unsigned Imm = (RegShiftedImm.ShiftImm == 32 ? 0 : RegShiftedImm.ShiftImm); Inst.addOperand(MCOperand::CreateImm( - ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); + ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, Imm))); } void addShifterImmOperands(MCInst &Inst, unsigned N) const { @@ -1637,6 +1652,22 @@ public: Inst.addOperand(MCOperand::CreateImm(Imm)); } + void addAdrLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + assert(isImm() && "Not an immediate!"); + + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. + if (!isa(getImm())) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + return; + } + + const MCConstantExpr *CE = dyn_cast(getImm()); + int Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -2301,7 +2332,7 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ""; break; case k_ITCondMask: { - static const char *MaskStr[] = { + static const char *const MaskStr[] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)" }; @@ -2672,7 +2703,7 @@ parseITCondCode(SmallVectorImpl &Operands) { const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; - unsigned CC = StringSwitch(Tok.getString()) + unsigned CC = StringSwitch(Tok.getString().lower()) .Case("eq", ARMCC::EQ) .Case("ne", ARMCC::NE) .Case("hs", ARMCC::HS) @@ -2877,7 +2908,7 @@ parseRegisterList(SmallVectorImpl &Operands) { if (!RC->contains(EndReg)) return Error(EndLoc, "invalid register in register list"); // Ranges must go from low to high. - if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg)) + if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) return Error(EndLoc, "bad range in register list"); // Add all the registers in the range to the register list. @@ -2904,13 +2935,13 @@ parseRegisterList(SmallVectorImpl &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) { if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) Warning(RegLoc, "register list not in ascending order"); else return Error(RegLoc, "register list not in ascending order"); } - if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) { Warning(RegLoc, "duplicated register (" + RegTok.getString() + ") in register list"); continue; @@ -3249,28 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - StringRef OptStr = Tok.getString(); - - unsigned Opt = StringSwitch(OptStr.slice(0, OptStr.size())) - .Case("sy", ARM_MB::SY) - .Case("st", ARM_MB::ST) - .Case("sh", ARM_MB::ISH) - .Case("ish", ARM_MB::ISH) - .Case("shst", ARM_MB::ISHST) - .Case("ishst", ARM_MB::ISHST) - .Case("nsh", ARM_MB::NSH) - .Case("un", ARM_MB::NSH) - .Case("nshst", ARM_MB::NSHST) - .Case("unst", ARM_MB::NSHST) - .Case("osh", ARM_MB::OSH) - .Case("oshst", ARM_MB::OSHST) - .Default(~0U); + unsigned Opt; + + if (Tok.is(AsmToken::Identifier)) { + StringRef OptStr = Tok.getString(); + + Opt = StringSwitch(OptStr.slice(0, OptStr.size()).lower()) + .Case("sy", ARM_MB::SY) + .Case("st", ARM_MB::ST) + .Case("sh", ARM_MB::ISH) + .Case("ish", ARM_MB::ISH) + .Case("shst", ARM_MB::ISHST) + .Case("ishst", ARM_MB::ISHST) + .Case("nsh", ARM_MB::NSH) + .Case("un", ARM_MB::NSH) + .Case("nshst", ARM_MB::NSHST) + .Case("unst", ARM_MB::NSHST) + .Case("osh", ARM_MB::OSH) + .Case("oshst", ARM_MB::OSHST) + .Default(~0U); - if (Opt == ~0U) - return MatchOperand_NoMatch; + if (Opt == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + } else if (Tok.is(AsmToken::Hash) || + Tok.is(AsmToken::Dollar) || + Tok.is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat the '#'. + SMLoc Loc = Parser.getTok().getLoc(); + + const MCExpr *MemBarrierID; + if (getParser().ParseExpression(MemBarrierID)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast(MemBarrierID); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + + int Val = CE->getValue(); + if (Val & ~0xf) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Opt = ARM_MB::RESERVED_0 + Val; + } else + return MatchOperand_ParseFail; - Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S)); return MatchOperand_Success; } @@ -3280,7 +3342,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseProcIFlagsOperand(SmallVectorImpl &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; StringRef IFlagsStr = Tok.getString(); // An iflags string of "none" is interpreted to mean that none of the AIF @@ -3320,26 +3383,51 @@ parseMSRMaskOperand(SmallVectorImpl &Operands) { // See ARMv6-M 10.1.1 std::string Name = Mask.lower(); unsigned FlagsVal = StringSwitch(Name) - .Case("apsr", 0) - .Case("iapsr", 1) - .Case("eapsr", 2) - .Case("xpsr", 3) - .Case("ipsr", 5) - .Case("epsr", 6) - .Case("iepsr", 7) - .Case("msp", 8) - .Case("psp", 9) - .Case("primask", 16) - .Case("basepri", 17) - .Case("basepri_max", 18) - .Case("faultmask", 19) - .Case("control", 20) + // Note: in the documentation: + // ARM deprecates using MSR APSR without a _ qualifier as an alias + // for MSR APSR_nzcvq. + // but we do make it an alias here. This is so to get the "mask encoding" + // bits correct on MSR APSR writes. + // + // FIXME: Note the 0xc00 "mask encoding" bits version of the registers + // should really only be allowed when writing a special register. Note + // they get dropped in the MRS instruction reading a special register as + // the SYSm field is only 8 bits. + // + // FIXME: the _g and _nzcvqg versions are only allowed if the processor + // includes the DSP extension but that is not checked. + .Case("apsr", 0x800) + .Case("apsr_nzcvq", 0x800) + .Case("apsr_g", 0x400) + .Case("apsr_nzcvqg", 0xc00) + .Case("iapsr", 0x801) + .Case("iapsr_nzcvq", 0x801) + .Case("iapsr_g", 0x401) + .Case("iapsr_nzcvqg", 0xc01) + .Case("eapsr", 0x802) + .Case("eapsr_nzcvq", 0x802) + .Case("eapsr_g", 0x402) + .Case("eapsr_nzcvqg", 0xc02) + .Case("xpsr", 0x803) + .Case("xpsr_nzcvq", 0x803) + .Case("xpsr_g", 0x403) + .Case("xpsr_nzcvqg", 0xc03) + .Case("ipsr", 0x805) + .Case("epsr", 0x806) + .Case("iepsr", 0x807) + .Case("msp", 0x808) + .Case("psp", 0x809) + .Case("primask", 0x810) + .Case("basepri", 0x811) + .Case("basepri_max", 0x812) + .Case("faultmask", 0x813) + .Case("control", 0x814) .Default(~0U); if (FlagsVal == ~0U) return MatchOperand_NoMatch; - if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) + if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; @@ -5216,8 +5304,8 @@ validateInstruction(MCInst &Inst, case ARM::LDRD_POST: case ARM::LDREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "destination operands must be sequential"); @@ -5225,8 +5313,8 @@ validateInstruction(MCInst &Inst, } case ARM::STRD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); @@ -5236,8 +5324,8 @@ validateInstruction(MCInst &Inst, case ARM::STRD_POST: case ARM::STREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); @@ -5315,6 +5403,16 @@ validateInstruction(MCInst &Inst, "registers must be in range r0-r7"); break; } + case ARM::tADDrSP: { + // If the non-SP source operand and the destination operand are not the + // same, we need thumb2 (for the wide encoding), or we have an error. + if (!isThumbTwo() && + Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) { + return Error(Operands[4]->getStartLoc(), + "source register must be the same as destination"); + } + break; + } } return false; @@ -6750,8 +6848,8 @@ processInstruction(MCInst &Inst, case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; } - unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); - if (Ammount == 32) Ammount = 0; + unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + if (Amount == 32) Amount = 0; TmpInst.setOpcode(newOpc); TmpInst.addOperand(Inst.getOperand(0)); // Rd if (isNarrow) @@ -6759,7 +6857,7 @@ processInstruction(MCInst &Inst, Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); TmpInst.addOperand(Inst.getOperand(1)); // Rn if (newOpc != ARM::t2RRX) - TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + TmpInst.addOperand(MCOperand::CreateImm(Amount)); TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); if (!isNarrow) @@ -6809,6 +6907,9 @@ processInstruction(MCInst &Inst, // A shift by zero is a plain MOVr, not a MOVsi. unsigned Amt = Inst.getOperand(2).getImm(); unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi; + // A shift by 32 should be encoded as 0 when permitted + if (Amt == 32 && (ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr)) + Amt = 0; unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt); MCInst TmpInst; TmpInst.setOpcode(Opc); @@ -6985,6 +7086,16 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + case ARM::tADDrSP: { + // If the non-SP source operand and the destination operand are not the + // same, we need to use the 32-bit encoding if it's available. + if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) { + Inst.setOpcode(ARM::t2ADDrr); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + return true; + } + break; + } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { @@ -7154,7 +7265,9 @@ processInstruction(MCInst &Inst, } case ARM::MOVsi: { ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); - if (SOpc == ARM_AM::rrx) return false; + // rrx shifts and asr/lsr of #32 is encoded as 0 + if (SOpc == ARM_AM::rrx || SOpc == ARM_AM::asr || SOpc == ARM_AM::lsr) + return false; if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) { // Shifting by zero is accepted as a vanilla 'MOVr' MCInst TmpInst; @@ -7188,7 +7301,9 @@ processInstruction(MCInst &Inst, case ARM::ADDrsi: newOpc = ARM::ADDrr; break; } // If the shift is by zero, use the non-shifted instruction definition. - if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) { + // The exception is for right shifts, where 0 == 32 + if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 && + !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) { MCInst TmpInst; TmpInst.setOpcode(newOpc); TmpInst.addOperand(Inst.getOperand(0)); @@ -7207,9 +7322,7 @@ processInstruction(MCInst &Inst, // The mask bits for all but the first condition are represented as // the low bit of the condition code value implies 't'. We currently // always have 1 implies 't', so XOR toggle the bits if the low bit - // of the condition code is zero. The encoding also expects the low - // bit of the condition to be encoded as bit 4 of the mask operand, - // so mask that in if needed + // of the condition code is zero. MCOperand &MO = Inst.getOperand(1); unsigned Mask = MO.getImm(); unsigned OrigMask = Mask; @@ -7218,8 +7331,7 @@ processInstruction(MCInst &Inst, assert(Mask && TZ <= 3 && "illegal IT mask value!"); for (unsigned i = 3; i != TZ; --i) Mask ^= 1 << i; - } else - Mask |= 0x10; + } MO.setImm(Mask); // Set up the IT block state according to the IT instruction we just @@ -7231,6 +7343,86 @@ processInstruction(MCInst &Inst, ITState.FirstCond = true; break; } + case ARM::t2LSLrr: + case ARM::t2LSRrr: + case ARM::t2ASRrr: + case ARM::t2SBCrr: + case ARM::t2RORrr: + case ARM::t2BICrr: + { + // Assemblers should use the narrow encodings of these instructions when permissible. + if ((isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast(Operands[3])->isToken() || + !static_cast(Operands[3])->getToken().equals_lower(".w"))) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break; + case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break; + case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break; + case ARM::t2SBCrr: NewOpc = ARM::tSBC; break; + case ARM::t2RORrr: NewOpc = ARM::tROR; break; + case ARM::t2BICrr: NewOpc = ARM::tBIC; break; + } + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::t2ANDrr: + case ARM::t2EORrr: + case ARM::t2ADCrr: + case ARM::t2ORRrr: + { + // Assemblers should use the narrow encodings of these instructions when permissible. + // These instructions are special in that they are commutable, so shorter encodings + // are available more often. + if ((isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) && + (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || + Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && + ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast(Operands[3])->isToken() || + !static_cast(Operands[3])->getToken().equals_lower(".w"))) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2ADCrr: NewOpc = ARM::tADC; break; + case ARM::t2ANDrr: NewOpc = ARM::tAND; break; + case ARM::t2EORrr: NewOpc = ARM::tEOR; break; + case ARM::t2ORRrr: NewOpc = ARM::tORR; break; + } + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) { + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + } else { + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(1)); + } + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } } return false; } @@ -7277,6 +7469,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } +static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl &Operands, @@ -7317,9 +7510,21 @@ MatchAndEmitInstruction(SMLoc IDLoc, Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; - case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing (Thumb vs. ARM, e.g.). + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg); + } case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; if (ErrorInfo != ~0U) { @@ -7336,7 +7541,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "invalid instruction", ((ARMOperand*)Operands[0])->getLocRange()); case Match_ConversionFail: - // The converter function will have already emited a diagnostic. + // The converter function will have already emitted a diagnostic. return true; case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); @@ -7346,6 +7551,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(IDLoc, "instruction variant requires ARMv6 or later"); case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); + case Match_ImmRange0_15: { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); + } } llvm_unreachable("Implement any new match types added!"); @@ -7582,5 +7792,6 @@ extern "C" void LLVMInitializeARMAsmParser() { } #define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 9a2aab5..ac916cc 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -49,6 +49,8 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) +add_dependencies(LLVMARMCodeGen intrinsics_gen) + # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 912935d..e47bf66 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -24,12 +24,66 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { + // Handles the condition code status of instructions in IT blocks + class ITStatus + { + public: + // Returns the condition code for instruction in IT block + unsigned getITCC() { + unsigned CC = ARMCC::AL; + if (instrInITBlock()) + CC = ITStates.back(); + return CC; + } + + // Advances the IT block state to the next T or E + void advanceITState() { + ITStates.pop_back(); + } + + // Returns true if the current instruction is in an IT block + bool instrInITBlock() { + return !ITStates.empty(); + } + + // Returns true if current instruction is the last instruction in an IT block + bool instrLastInITBlock() { + return ITStates.size() == 1; + } + + // Called when decoding an IT instruction. Sets the IT state for the following + // instructions that for the IT block. Firstcond and Mask correspond to the + // fields in the IT instruction encoding. + void setITState(char Firstcond, char Mask) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned CondBit0 = Firstcond & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + unsigned char CCBits = static_cast(Firstcond & 0xf); + assert(NumTZ <= 3 && "Invalid IT mask!"); + // push condition codes onto the stack the correct order for the pops + for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + ITStates.push_back(CCBits); + else + ITStates.push_back(CCBits ^ 1); + } + ITStates.push_back(CCBits); + } + + private: + std::vector ITStates; + }; +} + +namespace { /// ARMDisassembler - ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -78,7 +132,7 @@ public: /// getEDInfo - See MCDisassembler. const EDInstInfo *getEDInfo() const; private: - mutable std::vector ITBlock; + mutable ITStatus ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(MCInst&) const; }; @@ -549,7 +603,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// These can often be values in a literal pool near the Address of the /// instruction. The Address of the instruction and its immediate Value are /// used as a possible literal pool entry. The SymbolLookUp call back will -/// return the name of a symbol referenced by the the literal pool's entry if +/// return the name of a symbol referenced by the literal pool's entry if /// the referenced address is that of a symbol. Or it will return a pointer to /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. @@ -612,7 +666,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { case ARM::tSETEND: // Some instructions (mostly conditional branches) are not // allowed in IT blocks. - if (!ITBlock.empty()) + if (ITBlock.instrInITBlock()) S = SoftFail; else return Success; @@ -623,7 +677,7 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { case ARM::t2TBH: // Some instructions (mostly unconditional branches) can // only appears at the end of, or outside of, an IT. - if (ITBlock.size() > 1) + if (ITBlock.instrInITBlock() && !ITBlock.instrLastInITBlock()) S = SoftFail; break; default: @@ -633,13 +687,11 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { // If we're in an IT block, base the predicate on that. Otherwise, // assume a predicate of AL. unsigned CC; - if (!ITBlock.empty()) { - CC = ITBlock.back(); - if (CC == 0xF) - CC = ARMCC::AL; - ITBlock.pop_back(); - } else + CC = ITBlock.getITCC(); + if (CC == 0xF) CC = ARMCC::AL; + if (ITBlock.instrInITBlock()) + ITBlock.advanceITState(); const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; unsigned short NumOps = ARMInsts[MI.getOpcode()].NumOperands; @@ -674,11 +726,9 @@ ThumbDisassembler::AddThumbPredicate(MCInst &MI) const { // context as a post-pass. void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const { unsigned CC; - if (!ITBlock.empty()) { - CC = ITBlock.back(); - ITBlock.pop_back(); - } else - CC = ARMCC::AL; + CC = ITBlock.getITCC(); + if (ITBlock.instrInITBlock()) + ITBlock.advanceITState(); const MCOperandInfo *OpInfo = ARMInsts[MI.getOpcode()].OpInfo; MCInst::iterator I = MI.begin(); @@ -726,7 +776,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI); if (result) { Size = 2; - bool InITBlock = !ITBlock.empty(); + bool InITBlock = ITBlock.instrInITBlock(); Check(result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); return result; @@ -739,7 +789,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // Nested IT blocks are UNPREDICTABLE. Must be checked before we add // the Thumb predicate. - if (MI.getOpcode() == ARM::t2IT && !ITBlock.empty()) + if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock()) result = MCDisassembler::SoftFail; Check(result, AddThumbPredicate(MI)); @@ -749,21 +799,9 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // to the subsequent instructions. if (MI.getOpcode() == ARM::t2IT) { - // (3 - the number of trailing zeros) is the number of then / else. - unsigned firstcond = MI.getOperand(0).getImm(); + unsigned Firstcond = MI.getOperand(0).getImm(); unsigned Mask = MI.getOperand(1).getImm(); - unsigned CondBit0 = Mask >> 4 & 1; - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = ((Mask >> Pos) & 1) == CondBit0; - if (T) - ITBlock.insert(ITBlock.begin(), firstcond); - else - ITBlock.insert(ITBlock.begin(), firstcond ^ 1); - } - - ITBlock.push_back(firstcond); + ITBlock.setITState(Firstcond, Mask); } return result; @@ -783,7 +821,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, result = decodeThumbInstruction32(MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; - bool InITBlock = ITBlock.size(); + bool InITBlock = ITBlock.instrInITBlock(); Check(result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); return result; @@ -1186,8 +1224,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 4); - unsigned regs = Val & 0xFF; + unsigned Vd = fieldFromInstruction32(Val, 8, 5); + unsigned regs = fieldFromInstruction32(Val, 0, 8); if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -1203,8 +1241,10 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 4); - unsigned regs = (Val & 0xFF) / 2; + unsigned Vd = fieldFromInstruction32(Val, 8, 5); + unsigned regs = fieldFromInstruction32(Val, 0, 8); + + regs = regs >> 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -2976,7 +3016,7 @@ static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<21>(Val) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; @@ -3111,9 +3151,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - int imm = Val & 0xFF; - if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + if (Val == 0) + Inst.addOperand(MCOperand::CreateImm(INT32_MIN)); + else { + int imm = Val & 0xFF; + + if (!(Val & 0x100)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm << 2)); + } return MCDisassembler::Success; } @@ -3258,9 +3303,9 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(ARM::SP)); if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(ARM::SP)); } else if (Inst.getOpcode() == ARM::tADDspr) { unsigned Rm = fieldFromInstruction16(Insn, 3, 4); @@ -3299,10 +3344,25 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { + // Val is passed in as S:J1:J2:imm10H:imm10L:'0' + // Note only one trailing zero not two. Also the J1 and J2 values are from + // the encoded instruction. So here change to I1 and I2 values via: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with two trailing zeros as documented: + // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:'00', 32); + unsigned S = (Val >> 23) & 1; + unsigned J1 = (Val >> 22) & 1; + unsigned J2 = (Val >> 21) & 1; + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21); + int imm32 = SignExtend32<25>(tmp << 1); + if (!tryAddingSymbolicOperand(Address, - (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, + (Address & ~2u) + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(imm32)); return MCDisassembler::Success; } @@ -3408,35 +3468,39 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4, true, 2, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(SignExtend32<9>(Val << 1))); return MCDisassembler::Success; } static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + // Val is passed in as S:J1:J2:imm10:imm11 + // Note no trailing zero after imm11. Also the J1 and J2 values are from + // the encoded instruction. So here change to I1 and I2 values via: + // I1 = NOT(J1 EOR S); + // I2 = NOT(J2 EOR S); + // and build the imm32 with one trailing zero as documented: + // imm32 = SignExtend(S:I1:I2:imm10:imm11:'0', 32); + unsigned S = (Val >> 23) & 1; + unsigned J1 = (Val >> 22) & 1; + unsigned J2 = (Val >> 21) & 1; + unsigned I1 = !(J1 ^ S); + unsigned I2 = !(J2 ^ S); + unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21); + int imm32 = SignExtend32<25>(tmp << 1); + + if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(imm32)); return MCDisassembler::Success; } static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - switch (Val) { - default: + if (Val & ~0xf) return MCDisassembler::Fail; - case 0xF: // SY - case 0xE: // ST - case 0xB: // ISH - case 0xA: // ISHST - case 0x7: // NSH - case 0x6: // NSHST - case 0x3: // OSH - case 0x2: // OSHST - break; - } Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; @@ -4128,9 +4192,9 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4154,9 +4218,9 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4179,19 +4243,14 @@ static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction16(Insn, 4, 4); - // The InstPrinter needs to have the low bit of the predicate in - // the mask operand to be able to print it properly. - unsigned mask = fieldFromInstruction16(Insn, 0, 5); + unsigned mask = fieldFromInstruction16(Insn, 0, 4); if (pred == 0xF) { pred = 0xE; S = MCDisassembler::SoftFail; } - if ((mask & 0xF) == 0) { - // Preserve the high bit of the mask, which is the low bit of - // the predicate. - mask &= 0x10; + if (mask == 0x0) { mask |= 0x8; S = MCDisassembler::SoftFail; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index cbd81c1..8b9109e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -52,6 +52,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { unsigned Opcode = MI->getOpcode(); + // Check for HINT instructions w/ canonical names. + if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) { + switch (MI->getOperand(0).getImm()) { + case 0: O << "\tnop"; break; + case 1: O << "\tyield"; break; + case 2: O << "\twfe"; break; + case 3: O << "\twfi"; break; + case 4: O << "\tsev"; break; + default: + // Anything else should just print normally. + printInstruction(MI, O); + printAnnotation(O, Annot); + return; + } + printPredicateOperand(MI, 1, O); + if (Opcode == ARM::t2HINT) + O << ".w"; + printAnnotation(O, Annot); + return; + } + // Check for MOVs and print canonical forms, instead. if (Opcode == ARM::MOVsr) { // FIXME: Thumb variants? @@ -426,9 +447,13 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, return; } - if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + //If the op is sub we have to print the immediate even if it is 0 + unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); + ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm()); + + if (ImmOffs || (op == ARM_AM::sub)) O << ", #" - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ARM_AM::getAddrOpcStr(op) << ImmOffs; O << ']'; } @@ -643,22 +668,50 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, unsigned Mask = Op.getImm() & 0xf; if (getAvailableFeatures() & ARM::FeatureMClass) { - switch (Op.getImm()) { + unsigned SYSm = Op.getImm(); + unsigned Opcode = MI->getOpcode(); + // For reads of the special registers ignore the "mask encoding" bits + // which are only for writes. + if (Opcode == ARM::t2MRS_M) + SYSm &= 0xff; + switch (SYSm) { default: llvm_unreachable("Unexpected mask value!"); - case 0: O << "apsr"; return; - case 1: O << "iapsr"; return; - case 2: O << "eapsr"; return; - case 3: O << "xpsr"; return; - case 5: O << "ipsr"; return; - case 6: O << "epsr"; return; - case 7: O << "iepsr"; return; - case 8: O << "msp"; return; - case 9: O << "psp"; return; - case 16: O << "primask"; return; - case 17: O << "basepri"; return; - case 18: O << "basepri_max"; return; - case 19: O << "faultmask"; return; - case 20: O << "control"; return; + case 0: + case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr + case 0x400: O << "apsr_g"; return; + case 0xc00: O << "apsr_nzcvqg"; return; + case 1: + case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr + case 0x401: O << "iapsr_g"; return; + case 0xc01: O << "iapsr_nzcvqg"; return; + case 2: + case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr + case 0x402: O << "eapsr_g"; return; + case 0xc02: O << "eapsr_nzcvqg"; return; + case 3: + case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr + case 0x403: O << "xpsr_g"; return; + case 0xc03: O << "xpsr_nzcvqg"; return; + case 5: + case 0x805: O << "ipsr"; return; + case 6: + case 0x806: O << "epsr"; return; + case 7: + case 0x807: O << "iepsr"; return; + case 8: + case 0x808: O << "msp"; return; + case 9: + case 0x809: O << "psp"; return; + case 0x10: + case 0x810: O << "primask"; return; + case 0x11: + case 0x811: O << "basepri"; return; + case 0x12: + case 0x812: O << "basepri_max"; return; + case 0x13: + case 0x813: O << "faultmask"; return; + case 0x14: + case 0x814: O << "control"; return; } } @@ -739,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (MO.isExpr()) { + O << *MO.getExpr(); + return; + } + + int32_t OffImm = (int32_t)MO.getImm(); + + if (OffImm == INT32_MIN) + O << "#-0"; + else if (OffImm < 0) + O << "#-" << -OffImm; + else + O << "#" << OffImm; +} + void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "#" << MI->getOperand(OpNum).getImm() * 4; @@ -754,7 +826,8 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(OpNum).getImm(); - unsigned CondBit0 = Mask >> 4 & 1; + unsigned Firstcond = MI->getOperand(OpNum-1).getImm(); + unsigned CondBit0 = Firstcond & 1; unsigned NumTZ = CountTrailingZeros_32(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { @@ -899,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "[" << getRegisterName(MO1.getReg()); - int32_t OffImm = (int32_t)MO2.getImm() / 4; + int32_t OffImm = (int32_t)MO2.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm * 4; + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm * 4; + O << ", #" << OffImm; O << "]"; } @@ -936,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm() / 4; + int32_t OffImm = (int32_t)MO1.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm != 0) { - O << ", "; - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; - } + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 8acb7ee..73d7bfd 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -73,6 +73,7 @@ public: void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index d10bfc1..ac6ce64 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -12,6 +12,7 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -84,7 +85,8 @@ public: { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. { "fixup_arm_movt_hi16", 0, 20, 0 }, @@ -110,32 +112,7 @@ public: void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value, - bool &IsResolved) { - const MCSymbolRefExpr *A = Target.getSymA(); - // Some fixups to thumb function symbols need the low bit (thumb bit) - // twiddled. - if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && - (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && - (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { - if (A) { - const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); - if (Asm.isThumbFunc(&Sym)) - Value |= 1; - } - } - // We must always generate a relocation for BL/BLX instructions if we have - // a symbol to reference, as the linker relies on knowing the destination - // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) - IsResolved = false; - } + bool &IsResolved); bool mayNeedRelaxation(const MCInst &Inst) const; @@ -269,7 +246,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { +static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + MCContext *Ctx = NULL) { + unsigned Kind = Fixup.getKind(); switch (Kind) { default: llvm_unreachable("Unknown fixup kind!"); @@ -322,7 +301,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value = -Value; isAdd = false; } - assert ((Value < 4096) && "Out of range pc-relative fixup value!"); + if (Ctx && Value >= 4096) + Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); Value |= isAdd << 23; // Same addressing mode as fixup_arm_pcrel_10, @@ -345,8 +325,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value = -Value; opc = 2; // 0b0010 } - assert(ARM_AM::getSOImmVal(Value) != -1 && - "Out of range pc-relative fixup value!"); + if (Ctx && ARM_AM::getSOImmVal(Value) == -1) + Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); // Encode the immediate and shift the opcode into place. return ARM_AM::getSOImmVal(Value) | (opc << 21); } @@ -414,39 +394,65 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { return swapped; } case ARM::fixup_arm_thumb_bl: { - // The value doesn't encode the low bit (always zero) and is offset by - // four. The value is encoded into disjoint bit positions in the destination - // opcode. x = unchanged, I = immediate value bit, S = sign extension bit - // - // BL: xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0; - uint32_t Binary = 0; - Value = 0x3fffff & ((Value - 4) >> 1); - Binary = (Value & 0x7ff) << 16; // Low imm11 value. - Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value. - Binary |= isNeg << 10; // Sign bit. - return Binary; + // The value doesn't encode the low bit (always zero) and is offset by + // four. The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit + // + // BL: xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 4) >> 1; + uint32_t signBit = (offset & 0x800000) >> 23; + uint32_t I1Bit = (offset & 0x400000) >> 22; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x200000) >> 21; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10Bits = (offset & 0x1FF800) >> 11; + uint32_t imm11Bits = (offset & 0x000007FF); + + uint32_t Binary = 0; + uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + (uint16_t)imm11Bits); + Binary |= secondHalf << 16; + Binary |= firstHalf; + return Binary; + } case ARM::fixup_arm_thumb_blx: { - // The value doesn't encode the low two bits (always zero) and is offset by - // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit - // positions in the destination opcode. x = unchanged, I = immediate value - // bit, S = sign extension bit, 0 = zero. - // - // BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0 - // - // Note that the halfwords are stored high first, low second; so we need - // to transpose the fixup value here to map properly. - unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0; - uint32_t Binary = 0; - Value = 0xfffff & ((Value - 2) >> 2); - Binary = (Value & 0x3ff) << 17; // Low imm10L value. - Binary |= (Value & 0xffc00) >> 10; // High imm10H value. - Binary |= isNeg << 10; // Sign bit. - return Binary; + // The value doesn't encode the low two bits (always zero) and is offset by + // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as + // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00) + // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S). + // The value is encoded into disjoint bit positions in the destination + // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, + // J = either J1 or J2 bit, 0 = zero. + // + // BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0 + // + // Note that the halfwords are stored high first, low second; so we need + // to transpose the fixup value here to map properly. + uint32_t offset = (Value - 2) >> 2; + uint32_t signBit = (offset & 0x400000) >> 22; + uint32_t I1Bit = (offset & 0x200000) >> 21; + uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit; + uint32_t I2Bit = (offset & 0x100000) >> 20; + uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit; + uint32_t imm10HBits = (offset & 0xFFC00) >> 10; + uint32_t imm10LBits = (offset & 0x3FF); + + uint32_t Binary = 0; + uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); + uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + ((uint16_t)imm10LBits) << 1); + Binary |= secondHalf << 16; + Binary |= firstHalf; + return Binary; } case ARM::fixup_arm_thumb_cp: // Offset by 4, and don't encode the low two bits. Two bytes of that @@ -473,7 +479,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { isAdd = false; } // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. - assert ((Value < 256) && "Out of range pc-relative fixup value!"); + if (Ctx && Value >= 256) + Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); Value = (Value & 0xf) | ((Value & 0xf0) << 4); return Value | (isAdd << 23); } @@ -491,7 +498,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } // These values don't encode the low two bits since they're always zero. Value >>= 2; - assert ((Value < 256) && "Out of range pc-relative fixup value!"); + if (Ctx && Value >= 256) + Ctx->FatalError(Fixup.getLoc(), "out of range pc-relative fixup value"); Value |= isAdd << 23; // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords @@ -507,6 +515,43 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } } +void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFixup &Fixup, + const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + const MCSymbolRefExpr *A = Target.getSymA(); + // Some fixups to thumb function symbols need the low bit (thumb bit) + // twiddled. + if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { + if (A) { + const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + if (Asm.isThumbFunc(&Sym)) + Value |= 1; + } + } + // We must always generate a relocation for BL/BLX instructions if we have + // a symbol to reference, as the linker relies on knowing the destination + // symbol's thumb-ness to get interworking right. + if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) + IsResolved = false; + + // Try to get the encoded value for the fixup as-if we're mapping it into + // the instruction. This allows adjustFixupValue() to issue a diagnostic + // if the value aren't invalid. + (void)adjustFixupValue(Fixup, Value, &Asm.getContext()); +} + namespace { // FIXME: This should be in a separate file. @@ -530,7 +575,7 @@ public: void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = 4; // FIXME: 2 for Thumb - Value = adjustFixupValue(Fixup.getKind(), Value); + Value = adjustFixupValue(Fixup, Value); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); @@ -615,7 +660,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup.getKind(), Value); + Value = adjustFixupValue(Fixup, Value); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ae11be8..de48a0e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -120,14 +120,22 @@ namespace ARM_MB { // The Memory Barrier Option constants map directly to the 4-bit encoding of // the option field for memory barrier operations. enum MemBOpt { - SY = 15, - ST = 14, - ISH = 11, - ISHST = 10, - NSH = 7, - NSHST = 6, + RESERVED_0 = 0, + RESERVED_1 = 1, + OSHST = 2, OSH = 3, - OSHST = 2 + RESERVED_4 = 4, + RESERVED_5 = 5, + NSHST = 6, + NSH = 7, + RESERVED_8 = 8, + RESERVED_9 = 9, + ISHST = 10, + ISH = 11, + RESERVED_12 = 12, + RESERVED_13 = 13, + ST = 14, + SY = 15 }; inline static const char *MemBOptToString(unsigned val) { @@ -135,92 +143,24 @@ namespace ARM_MB { default: llvm_unreachable("Unknown memory operation"); case SY: return "sy"; case ST: return "st"; + case RESERVED_13: return "#0xd"; + case RESERVED_12: return "#0xc"; case ISH: return "ish"; case ISHST: return "ishst"; + case RESERVED_9: return "#0x9"; + case RESERVED_8: return "#0x8"; case NSH: return "nsh"; case NSHST: return "nshst"; + case RESERVED_5: return "#0x5"; + case RESERVED_4: return "#0x4"; case OSH: return "osh"; case OSHST: return "oshst"; + case RESERVED_1: return "#0x1"; + case RESERVED_0: return "#0x0"; } } } // namespace ARM_MB -/// getARMRegisterNumbering - Given the enum value for some register, e.g. -/// ARM::LR, return the number that it corresponds to (e.g. 14). -inline static unsigned getARMRegisterNumbering(unsigned Reg) { - using namespace ARM; - switch (Reg) { - default: - llvm_unreachable("Unknown ARM register!"); - case R0: case S0: case D0: case Q0: return 0; - case R1: case S1: case D1: case Q1: return 1; - case R2: case S2: case D2: case Q2: return 2; - case R3: case S3: case D3: case Q3: return 3; - case R4: case S4: case D4: case Q4: return 4; - case R5: case S5: case D5: case Q5: return 5; - case R6: case S6: case D6: case Q6: return 6; - case R7: case S7: case D7: case Q7: return 7; - case R8: case S8: case D8: case Q8: return 8; - case R9: case S9: case D9: case Q9: return 9; - case R10: case S10: case D10: case Q10: return 10; - case R11: case S11: case D11: case Q11: return 11; - case R12: case S12: case D12: case Q12: return 12; - case SP: case S13: case D13: case Q13: return 13; - case LR: case S14: case D14: case Q14: return 14; - case PC: case S15: case D15: case Q15: return 15; - - case S16: case D16: return 16; - case S17: case D17: return 17; - case S18: case D18: return 18; - case S19: case D19: return 19; - case S20: case D20: return 20; - case S21: case D21: return 21; - case S22: case D22: return 22; - case S23: case D23: return 23; - case S24: case D24: return 24; - case S25: case D25: return 25; - case S26: case D26: return 26; - case S27: case D27: return 27; - case S28: case D28: return 28; - case S29: case D29: return 29; - case S30: case D30: return 30; - case S31: case D31: return 31; - - // Composite registers use the regnum of the first register in the list. - /* Q0 */ case D0_D2: return 0; - case D1_D2: case D1_D3: return 1; - /* Q1 */ case D2_D4: return 2; - case D3_D4: case D3_D5: return 3; - /* Q2 */ case D4_D6: return 4; - case D5_D6: case D5_D7: return 5; - /* Q3 */ case D6_D8: return 6; - case D7_D8: case D7_D9: return 7; - /* Q4 */ case D8_D10: return 8; - case D9_D10: case D9_D11: return 9; - /* Q5 */ case D10_D12: return 10; - case D11_D12: case D11_D13: return 11; - /* Q6 */ case D12_D14: return 12; - case D13_D14: case D13_D15: return 13; - /* Q7 */ case D14_D16: return 14; - case D15_D16: case D15_D17: return 15; - /* Q8 */ case D16_D18: return 16; - case D17_D18: case D17_D19: return 17; - /* Q9 */ case D18_D20: return 18; - case D19_D20: case D19_D21: return 19; - /* Q10 */ case D20_D22: return 20; - case D21_D22: case D21_D23: return 21; - /* Q11 */ case D22_D24: return 22; - case D23_D24: case D23_D25: return 23; - /* Q12 */ case D24_D26: return 24; - case D25_D26: case D25_D27: return 25; - /* Q13 */ case D26_D28: return 26; - case D27_D28: case D27_D29: return 27; - /* Q14 */ case D28_D30: return 28; - case D29_D30: case D29_D31: return 29; - /* Q15 */ - } -} - /// isARMLowRegister - Returns true if the register is a low register (r0-r7). /// static inline bool isARMLowRegister(unsigned Reg) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index aa649ba..7d6acbc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -178,9 +178,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: - case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_ARM_PLT: Type = ELF::R_ARM_PLT32; @@ -192,6 +191,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: @@ -252,10 +252,8 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_thumb_cp: case ARM::fixup_arm_thumb_br: llvm_unreachable("Unimplemented"); - case ARM::fixup_arm_uncondbranch: - Type = ELF::R_ARM_CALL; - break; case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: Type = ELF::R_ARM_JUMP24; break; case ARM::fixup_arm_movt_hi16: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 03e8d5f..d32805e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -22,40 +22,14 @@ EnableARMEHABI("arm-enable-ehabi", cl::Hidden, cl::init(false)); -static const char *const arm_asm_table[] = { - "{r0}", "r0", - "{r1}", "r1", - "{r2}", "r2", - "{r3}", "r3", - "{r4}", "r4", - "{r5}", "r5", - "{r6}", "r6", - "{r7}", "r7", - "{r8}", "r8", - "{r9}", "r9", - "{r10}", "r10", - "{r11}", "r11", - "{r12}", "r12", - "{r13}", "r13", - "{r14}", "r14", - "{lr}", "lr", - "{sp}", "sp", - "{ip}", "ip", - "{fp}", "fp", - "{sl}", "sl", - "{memory}", "memory", - "{cc}", "cc", - 0,0 -}; - void ARMMCAsmInfoDarwin::anchor() { } ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { - AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; + UseDataRegionDirectives = true; SupportsDebugInformation = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 10d1c48..94f1082 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -18,6 +18,7 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter { void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT const MCInstrInfo &MCII; const MCSubtargetInfo &STI; + const MCContext &CTX; public: ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), STI(sti) { + : MCII(mcii), STI(sti), CTX(ctx) { } ~ARMMCCodeEmitter() {} @@ -336,6 +338,7 @@ public: } // end anonymous namespace MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { return new ARMMCCodeEmitter(MCII, STI, Ctx); @@ -404,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -433,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = getARMRegisterNumbering(MO.getReg()); + Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -640,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label -/// target. +/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate +/// ADR label target. uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups) const { @@ -651,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, Fixups); int32_t offset = MO.getImm(); uint32_t Val = 0x2000; - if (offset < 0) { + + if (offset == INT32_MIN) { + Val = 0x1000; + offset = 0; + } else if (offset < 0) { Val = 0x1000; offset *= -1; } - Val |= offset; + + int SoImmVal = ARM_AM::getSOImmVal(offset); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + Val |= SoImmVal; return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label +/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -669,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, Fixups); int32_t Val = MO.getImm(); - if (Val < 0) { + if (Val == INT32_MIN) + Val = 0x1000; + else if (Val < 0) { Val *= -1; Val |= 0x1000; } return Val; } -/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label +/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -698,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO1.getReg()); - unsigned Rm = getARMRegisterNumbering(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -715,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -795,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -831,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -861,11 +874,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, // Handle :upper16: and :lower16: assembly prefixes. const MCExpr *E = MO.getExpr(); + MCFixupKind Kind; if (E->getKind() == MCExpr::Target) { const ARMMCExpr *ARM16Expr = cast(E); E = ARM16Expr->getSubExpr(); - MCFixupKind Kind; switch (ARM16Expr->getKind()) { default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: @@ -891,9 +904,21 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, } Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; - }; - - llvm_unreachable("Unsupported MCExpr type in MCOperand!"); + } + // If the expression doesn't have :upper16: or :lower16: on it, + // it's just a plain immediate expression, and those evaluate to + // the lower 16 bits of the expression regardless of whether + // we have a movt or a movw. + if (!isTargetDarwin() && EvaluateAsPCRel(E)) + Kind = MCFixupKind(isThumb2() + ? ARM::fixup_t2_movw_lo16_pcrel + : ARM::fixup_arm_movw_lo16_pcrel); + else + Kind = MCFixupKind(isThumb2() + ? ARM::fixup_t2_movw_lo16 + : ARM::fixup_arm_movw_lo16); + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); + return 0; } uint32_t ARMMCCodeEmitter:: @@ -902,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); - unsigned Rm = getARMRegisterNumbering(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -933,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -956,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -969,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -987,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1005,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1015,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO1.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1050,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1077,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1123,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1148,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1167,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1192,8 +1217,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, // Encode shift_imm bit[11:7]. Binary |= SBits << 4; unsigned Offset = ARM_AM::getSORegOffset(MO1.getImm()); - assert(Offset && "Offset must be in range 1-32!"); - if (Offset == 32) Offset = 0; + assert(Offset < 32 && "Offset must be in range 0-31!"); return Binary | (Offset << 7); } @@ -1207,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= getARMRegisterNumbering(MO2.getReg()); + Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1223,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1285,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1341,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1350,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1366,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1389,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1415,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1434,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return getARMRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index e3512cd..5df84c8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -35,7 +35,7 @@ using namespace llvm; -std::string ARM_MC::ParseARMTriple(StringRef TT) { +std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.size(); @@ -51,27 +51,48 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { Idx = 6; } + bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; if (Idx) { unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { - // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; + if (NoCPU) + // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') { - // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - // FeatureT2XtPk, FeatureMClass - ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; - } else - // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk - ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; + if (NoCPU) + // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, + // FeatureT2XtPk, FeatureMClass + ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk,+mclass"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + } else { + // v7 CPUs have lots of different feature sets. If no CPU is specified, + // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return + // the "minimum" feature set and use CPU string to figure out the exact + // features. + if (NoCPU) + // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk + ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; + else + // Use CPU to figure out the exact features. + ARMArchFeature = "+v7"; + } } else if (SubVer == '6') { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') ARMArchFeature = "+v6t2"; - else if (Len >= Idx+2 && TT[Idx+1] == 'm') - // v6m: FeatureNoARM, FeatureMClass - ARMArchFeature = "+v6t2,+noarm,+mclass"; - else + else if (Len >= Idx+2 && TT[Idx+1] == 'm') { + if (NoCPU) + // v6m: FeatureNoARM, FeatureMClass + ARMArchFeature = "+v6,+noarm,+mclass"; + else + ARMArchFeature = "+v6"; + } else ARMArchFeature = "+v6"; } else if (SubVer == '5') { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') @@ -94,7 +115,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { - std::string ArchFS = ARM_MC::ParseARMTriple(TT); + std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU); if (!FS.empty()) { if (!ArchFS.empty()) ArchFS = ArchFS + "," + FS.str(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 88472d7..510302d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -23,6 +23,7 @@ class MCCodeEmitter; class MCContext; class MCInstrInfo; class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class StringRef; class Target; @@ -31,7 +32,7 @@ class raw_ostream; extern Target TheARMTarget, TheThumbTarget; namespace ARM_MC { - std::string ParseARMTriple(StringRef TT); + std::string ParseARMTriple(StringRef TT, StringRef CPU); /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance. /// This is exposed so Asm parser, etc. do not need to go through @@ -41,6 +42,7 @@ namespace ARM_MC { } MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 8057cb6..a51e0fa 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -190,7 +190,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // 0 - arm instructions // 1 - thumb instructions // the other half of the relocated expression is in the following pair - // relocation entry in the the low 16 bits of r_address field. + // relocation entry in the low 16 bits of r_address field. unsigned ThumbBit = 0; unsigned MovtBit = 0; switch ((unsigned)Fixup.getKind()) { @@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. if (Type == macho::RIT_ARM_Half) { - // The other-half value only gets populated for the movt relocation. + // The other-half value only gets populated for the movt and movw + // relocation entries. uint32_t Value = 0;; switch ((unsigned)Fixup.getKind()) { default: break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Value = (FixedValue >> 16) & 0xffff; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - Value = FixedValue; + Value = FixedValue & 0xffff; break; } macho::RelocationEntry MREPair; diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 2899836..ad60e32 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -220,7 +220,9 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID1 = TII->get(MulOpc); const MCInstrDesc &MCID2 = TII->get(AddSubOpc); - unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); + const MachineFunction &MF = *MI->getParent()->getParent(); + unsigned TmpReg = MRI->createVirtualRegister( + TII->getRegClass(MCID1, 0, TRI, MF)); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 3eddda8..57dc6cb 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -710,3 +710,24 @@ targets, e.g., PPC, that share this behavior, it would be best to implement this in a target-independent way: we should probably fold that (when using "undefined at zero" semantics) to set the "defined at zero" bit and have the code generator expand out the right code. + + +//===---------------------------------------------------------------------===// + +Clean up the test/MC/ARM files to have more robust register choices. + +R0 should not be used as a register operand in the assembler tests as it's then +not possible to distinguish between a correct encoding and a missing operand +encoding, as zero is the default value for the binary encoder. +e.g., + add r0, r0 // bad + add r3, r5 // good + +Register operands should be distinct. That is, when the encoding does not +require two syntactical operands to refer to the same register, two different +registers should be used in the test so as to catch errors where the +operands are swapped in the encoding. +e.g., + subs.w r1, r1, r1 // bad + subs.w r1, r2, r3 // good + diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index e03e758..735b255 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -53,11 +53,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC == ARM::tGPRRegisterClass || + assert((RC == &ARM::tGPRRegClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && "Unknown regclass!"); - if (RC == ARM::tGPRRegisterClass || + if (RC == &ARM::tGPRRegClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { DebugLoc DL; @@ -81,11 +81,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC == ARM::tGPRRegisterClass || + assert((RC == &ARM::tGPRRegClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && "Unknown regclass!"); - if (RC == ARM::tGPRRegisterClass || + if (RC == &ARM::tGPRRegClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { DebugLoc DL; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index ef77bbd..a39b722 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -49,13 +49,14 @@ const TargetRegisterClass* Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const { if (ARM::tGPRRegClass.hasSubClassEq(RC)) - return ARM::tGPRRegisterClass; + return &ARM::tGPRRegClass; return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); } const TargetRegisterClass * -Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const { - return ARM::tGPRRegisterClass; +Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { + return &ARM::tGPRRegClass; } /// emitLoadConstPool - Emits a load from constpool to materialize the @@ -109,7 +110,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); } if (NumBytes <= 255 && NumBytes >= 0) @@ -693,7 +694,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); } else if (MI.mayStore()) { - VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; if (Opcode == ARM::tSTRspi) { diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 6971842..f2e4b08 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -30,7 +30,8 @@ public: const TargetRegisterClass* getLargestLegalSuperClass(const TargetRegisterClass *RC) const; - const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index ecb4c2f..d54aa93 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -24,8 +24,6 @@ STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { class Thumb2ITBlockPass : public MachineFunctionPass { - bool PreRegAlloc; - public: static char ID; Thumb2ITBlockPass() : MachineFunctionPass(ID) {} @@ -76,16 +74,14 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; Uses.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) Uses.insert(*Subreg); } for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; Defs.insert(Reg); - for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) + for (MCSubRegIterator Subreg(Reg, TRI); Subreg.isValid(); ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) continue; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 8ab486b..e9e20dd 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -126,9 +126,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || - RC == ARM::GPRnopcRegisterClass) { + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || + RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || + RC == &ARM::GPRnopcRegClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -153,9 +153,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || - RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass || - RC == ARM::GPRnopcRegisterClass) { + if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass || + RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass || + RC == &ARM::GPRnopcRegClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } -/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the -/// two-addrss instruction inserted by two-address pass. -void -Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, - MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const { - if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill()) - return; - - unsigned PredReg = 0; - ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); - if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return; - - // Schedule the copy so it doesn't come between previous instructions - // and UseMI which can form an IT block. - unsigned SrcReg = SrcMI->getOperand(1).getReg(); - ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); - MachineBasicBlock *MBB = UseMI->getParent(); - MachineBasicBlock::iterator MBBI = SrcMI; - unsigned NumInsts = 0; - while (--MBBI != MBB->begin()) { - if (MBBI->isDebugValue()) - continue; - - MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); - if (!(NCC == CC || NCC == OCC) || - NMI->modifiesRegister(SrcReg, &TRI) || - NMI->modifiesRegister(ARM::CPSR, &TRI)) - break; - if (++NumInsts == 4) - // Too many in a row! - return; - } - - if (NumInsts) { - MBB->remove(SrcMI); - MBB->insert(++MBBI, SrcMI); - } -} - ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 0911f8a..2cdcd06 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -57,11 +57,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the - /// two-addrss instruction inserted by two-address pass. - void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index b5a397e..f18f491 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -67,6 +67,7 @@ namespace { { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, //FIXME: Disable CMN, as CCodes are backwards from compare expectations //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 }, { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index cf4f796..1f8ca86 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -24,5 +24,7 @@ add_llvm_target(CellSPUCodeGen SPUNopFiller.cpp ) +add_dependencies(LLVMCellSPUCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt index 3e7e0b6..3bce960 100644 --- a/lib/Target/CellSPU/README.txt +++ b/lib/Target/CellSPU/README.txt @@ -37,6 +37,20 @@ to add 'spu' to configure's --enable-targets option, e.g.: --------------------------------------------------------------------------- TODO: +* In commit r142152 vector legalization was set to element promotion per + default. This breaks half vectors (e.g. v2i32) badly as they get element + promoted to much slower types (v2i64). + +* Many CellSPU specific codegen tests only grep & count the number of + instructions, not checking their place with FileCheck. There have also + been some commits that change the CellSPU checks, some of which might + have not been thoroughly scrutinized w.r.t. to the changes they cause in SPU + assembly. (especially since about the time of r142152) + +* Some of the i64 math have huge tablegen rules, which sometime cause + tablegen to run out of memory. See e.g. bug 8850. i64 arithmetics + should probably be done with libraries. + * Create a machine pass for performing dual-pipeline scheduling specifically for CellSPU, and insert branch prediction instructions as needed. diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 14021fe..03d5a9a 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -301,7 +301,9 @@ bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'L': // Write second word of DImode reference. // Verify that this operand has two consecutive registers. if (!MI->getOperand(OpNo).isReg() || diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp index 403d7ef..67a83f1 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -30,12 +30,6 @@ using namespace llvm; // very little right now. //===----------------------------------------------------------------------===// -SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : - TII(tii), - EvenOdd(0) -{ -} - /// Return the pipeline hazard type encountered or generated by this /// instruction. Currently returns NoHazard. /// diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h index 675632c..30acaea 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.h +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -24,12 +24,8 @@ class TargetInstrInfo; /// SPUHazardRecognizer class SPUHazardRecognizer : public ScheduleHazardRecognizer { -private: - const TargetInstrInfo &TII; - int EvenOdd; - public: - SPUHazardRecognizer(const TargetInstrInfo &TII); + SPUHazardRecognizer(const TargetInstrInfo &/*TII*/) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 0623741..4e9fcd1 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -77,12 +77,14 @@ namespace { // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); - std::pair CallInfo = - TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + TargetLowering::CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, + false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, + /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, Callee, Args, DAG, Op.getDebugLoc()); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; } @@ -100,13 +102,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); // Set up the SPU's register classes: - addRegisterClass(MVT::i8, SPU::R8CRegisterClass); - addRegisterClass(MVT::i16, SPU::R16CRegisterClass); - addRegisterClass(MVT::i32, SPU::R32CRegisterClass); - addRegisterClass(MVT::i64, SPU::R64CRegisterClass); - addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); - addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); - addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); + addRegisterClass(MVT::i8, &SPU::R8CRegClass); + addRegisterClass(MVT::i16, &SPU::R16CRegClass); + addRegisterClass(MVT::i32, &SPU::R32CRegClass); + addRegisterClass(MVT::i64, &SPU::R64CRegClass); + addRegisterClass(MVT::f32, &SPU::R32FPRegClass); + addRegisterClass(MVT::f64, &SPU::R64FPRegClass); + addRegisterClass(MVT::i128, &SPU::GPRCRegClass); // SPU has no sign or zero extended loads for i1, i8, i16: setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -397,12 +399,12 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. - addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); - addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); - addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); - addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); - addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); - addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v16i8, &SPU::VECREGRegClass); + addRegisterClass(MVT::v8i16, &SPU::VECREGRegClass); + addRegisterClass(MVT::v4i32, &SPU::VECREGRegClass); + addRegisterClass(MVT::v2i64, &SPU::VECREGRegClass); + addRegisterClass(MVT::v4f32, &SPU::VECREGRegClass); + addRegisterClass(MVT::v2f64, &SPU::VECREGRegClass); for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1133,7 +1135,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // FIXME: allow for other calling conventions CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); @@ -1263,14 +1265,19 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { } SDValue -SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +SPUTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // CellSPU target does not yet support tail call optimization. isTailCall = false; @@ -1280,7 +1287,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // FIXME: allow for other calling conventions CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); @@ -1441,7 +1448,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Now handle the return value(s) SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); @@ -1468,7 +1475,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain, SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_SPU); // If this is the first return lowered for this function, add the regs to the @@ -3139,16 +3146,16 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'b': // R1-R31 case 'r': // R0-R31 if (VT == MVT::i64) - return std::make_pair(0U, SPU::R64CRegisterClass); - return std::make_pair(0U, SPU::R32CRegisterClass); + return std::make_pair(0U, &SPU::R64CRegClass); + return std::make_pair(0U, &SPU::R32CRegClass); case 'f': if (VT == MVT::f32) - return std::make_pair(0U, SPU::R32FPRegisterClass); - else if (VT == MVT::f64) - return std::make_pair(0U, SPU::R64FPRegisterClass); + return std::make_pair(0U, &SPU::R32FPRegClass); + if (VT == MVT::f64) + return std::make_pair(0U, &SPU::R64FPRegClass); break; case 'v': - return std::make_pair(0U, SPU::GPRCRegisterClass); + return std::make_pair(0U, &SPU::GPRCRegClass); } } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index e3db7b2..9f1599f 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -86,7 +86,6 @@ namespace llvm { class SPUTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. SPUTargetMachine &SPUTM; public: @@ -159,13 +158,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 759923d..b25a639 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -140,29 +140,27 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == SPU::GPRCRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128); - } else if (RC == SPU::R64CRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); - } else if (RC == SPU::R64FPRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); - } else if (RC == SPU::R32CRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); - } else if (RC == SPU::R32FPRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); - } else if (RC == SPU::R16CRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16); - } else if (RC == SPU::R8CRegisterClass) { - opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8); - } else if (RC == SPU::VECREGRegisterClass) { - opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8; - } else { + if (RC == &SPU::GPRCRegClass) + opc = isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128; + else if (RC == &SPU::R64CRegClass) + opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; + else if (RC == &SPU::R64FPRegClass) + opc = isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64; + else if (RC == &SPU::R32CRegClass) + opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; + else if (RC == &SPU::R32FPRegClass) + opc = isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32; + else if (RC == &SPU::R16CRegClass) + opc = isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16; + else if (RC == &SPU::R8CRegClass) + opc = isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8; + else if (RC == &SPU::VECREGRegClass) + opc = isValidFrameIdx ? SPU::STQDv16i8 : SPU::STQXv16i8; + else llvm_unreachable("Unknown regclass!"); - } DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -175,29 +173,27 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); - if (RC == SPU::GPRCRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128); - } else if (RC == SPU::R64CRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); - } else if (RC == SPU::R64FPRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); - } else if (RC == SPU::R32CRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); - } else if (RC == SPU::R32FPRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); - } else if (RC == SPU::R16CRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16); - } else if (RC == SPU::R8CRegisterClass) { - opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8); - } else if (RC == SPU::VECREGRegisterClass) { - opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8; - } else { + if (RC == &SPU::GPRCRegClass) + opc = isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128; + else if (RC == &SPU::R64CRegClass) + opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; + else if (RC == &SPU::R64FPRegClass) + opc = isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64; + else if (RC == &SPU::R32CRegClass) + opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; + else if (RC == &SPU::R32FPRegClass) + opc = isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32; + else if (RC == &SPU::R16CRegClass) + opc = isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16; + else if (RC == &SPU::R8CRegClass) + opc = isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8; + else if (RC == &SPU::VECREGRegClass) + opc = isValidFrameIdx ? SPU::LQDv16i8 : SPU::LQXv16i8; + else llvm_unreachable("Unknown regclass in loadRegFromStackSlot!"); - } DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -340,11 +336,11 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) { MachineBasicBlock::iterator J = MBB.end(); - for( int i=0; i<8; i++) { - if( J == MBB.begin() ) return J; - J--; - } - return J; + for( int i=0; i<8; i++) { + if( J == MBB.begin() ) return J; + J--; + } + return J; } unsigned @@ -360,7 +356,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineInstrBuilder MIB; //TODO: make a more accurate algorithm. bool haveHBR = MBB.size()>8; - + removeHBR(MBB); MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); // Add a label just before the branch @@ -382,7 +378,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); MIB.addSym(branchLabel); MIB.addMBB(TBB); - } + } } else { // Conditional branch MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); @@ -392,7 +388,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); MIB.addSym(branchLabel); MIB.addMBB(TBB); - } + } DEBUG(errs() << "Inserted one-way cond branch: "); DEBUG((*MIB).dump()); @@ -410,7 +406,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); MIB.addSym(branchLabel); MIB.addMBB(FBB); - } + } DEBUG(errs() << "Inserted conditional branch: "); DEBUG((*MIB).dump()); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index f76ebd7..117acd7 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -3421,14 +3421,14 @@ let isCall = 1, // Branch relative and set link: Used if we actually know that the target // is within [-32768, 32767] bytes of the target def BRSL: - BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops), + BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func), "brsl\t$$lr, $func", [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>; // Branch absolute and set link: Used if we actually know that the target // is an absolute address def BRASL: - BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops), + BranchSetLink<0b011001100, (outs), (ins calltarget:$func), "brasl\t$$lr, $func", [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 1b2da5f..e6c872d 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -193,7 +193,8 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * -SPURegisterInfo::getPointerRegClass(unsigned Kind) const { +SPURegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { return &SPU::R32CRegClass; } diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index e5ab224..e9f9aba 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -46,7 +46,7 @@ namespace llvm { /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. virtual const TargetRegisterClass * - getPointerRegClass(unsigned Kind = 0) const; + getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; /// After allocating this many registers, the allocator should feel /// register pressure. The value is a somewhat random guess, based on the @@ -63,6 +63,11 @@ namespace llvm { virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return true; } + //! Enable tracking of liveness after register allocation, since register + // scavenging is enabled. + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const + { return true; } + //! Return the reserved registers BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 3b90261..54764f1 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -72,7 +72,7 @@ TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { bool SPUPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createSPUISelDag(getSPUTargetMachine())); + addPass(createSPUISelDag(getSPUTargetMachine())); return false; } @@ -85,9 +85,9 @@ bool SPUPassConfig::addPreEmitPass() { (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( "createTCESchedulerPass"); if (schedulerCreator != NULL) - PM->add(schedulerCreator("cellspu")); + addPass(schedulerCreator("cellspu")); //align instructions with nops/lnops for dual issue - PM->add(createSPUNopFillerPass(getSPUTargetMachine())); + addPass(createSPUNopFillerPass(getSPUTargetMachine())); return true; } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 69f0ff8..c8e757b 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -130,6 +130,7 @@ namespace { private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); + void printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM); void printCallingConv(CallingConv::ID cc); void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); @@ -325,6 +326,26 @@ void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { } } +void CppWriter::printThreadLocalMode(GlobalVariable::ThreadLocalMode TLM) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + Out << "GlobalVariable::NotThreadLocal"; + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "GlobalVariable::GeneralDynamicTLSModel"; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "GlobalVariable::LocalDynamicTLSModel"; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "GlobalVariable::InitialExecTLSModel"; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "GlobalVariable::LocalExecTLSModel"; + break; + } +} + // printEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. void CppWriter::printEscapedString(const std::string &Str) { @@ -496,7 +517,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL, Out << "Attrs.push_back(PAWI);"; nl(Out); } - Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + Out << name << "_PAL = AttrListPtr::get(Attrs);"; nl(Out); out(); nl(Out); Out << '}'; nl(Out); @@ -996,7 +1017,9 @@ void CppWriter::printVariableHead(const GlobalVariable *GV) { } if (GV->isThreadLocal()) { printCppName(GV); - Out << "->setThreadLocal(true);"; + Out << "->setThreadLocalMode("; + printThreadLocalMode(GV->getThreadLocalMode()); + Out << ");"; nl(Out); } if (is_inline) { @@ -1105,7 +1128,7 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out); for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { - const ConstantInt* CaseVal = i.getCaseValue(); + const IntegersSubset CaseVal = i.getCaseValueEx(); const BasicBlock *BB = i.getCaseSuccessor(); Out << iName << "->addCase(" << getOpName(CaseVal) << ", " @@ -2078,7 +2101,9 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - bool DisableVerify) { + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 92bca6c..9cbe798 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -31,7 +31,9 @@ struct CPPTargetMachine : public TargetMachine { virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - bool DisableVerify); + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index af9e813..1f2d8ac 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -28,8 +28,12 @@ add_llvm_target(HexagonCodeGen HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp + HexagonVLIWPacketizer.cpp + HexagonNewValueJump.cpp ) +add_dependencies(LLVMHexagonCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 0808323..45f857b 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -40,6 +40,9 @@ namespace llvm { FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); + FunctionPass *createHexagonPacketizer(); + FunctionPass *createHexagonNewValueJump(); + /* TODO: object output. MCCodeEmitter *createHexagonMCCodeEmitter(const Target &, @@ -47,7 +50,8 @@ namespace llvm { MCContext &Ctx); */ /* TODO: assembler input. - TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); + TargetAsmBackend *createHexagonAsmBackend(const Target &, + const std::string &); */ void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, HexagonAsmPrinter &AP); @@ -67,7 +71,7 @@ namespace llvm { // Normal instruction size (in bytes). #define HEXAGON_INSTR_SIZE 4 -// Maximum number of words in a packet (in instructions). +// Maximum number of words and instructions in a packet. #define HEXAGON_PACKET_SIZE 4 #endif diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 4a50d16..451e562 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -28,6 +28,8 @@ def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", "Hexagon v3">; def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon v4">; +def ArchV5 : SubtargetFeature<"v5", "HexagonArchVersion", "V5", + "Hexagon v5">; //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions @@ -45,13 +47,15 @@ def HexagonInstrInfo : InstrInfo; // Hexagon processors supported. //===----------------------------------------------------------------------===// -class Proc Features> - : Processor; + : ProcessorModel; + +def : Proc<"hexagonv2", HexagonModel, [ArchV2]>; +def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>; +def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; +def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; -def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>; -def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>; -def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; // Hexagon Uses the MC printer for assembler output, so make sure the TableGen // AsmWriter bits get associated with the correct class. diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 39bf45d..5fa4740 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// - #define DEBUG_TYPE "asm-printer" #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" +#include "HexagonMCInst.h" #include "HexagonTargetMachine.h" #include "HexagonSubtarget.h" #include "InstPrinter/HexagonInstPrinter.h" @@ -77,8 +77,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { - default: - assert(0 && ""); + default: llvm_unreachable (""); case MachineOperand::MO_Register: O << HexagonInstPrinter::getRegisterName(MO.getReg()); return; @@ -134,7 +133,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS); case 'c': // Don't print "$" before a global var name or constant. // Hexagon never has a prefix. printOperand(MI, OpNo, OS); @@ -196,10 +197,45 @@ void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst MCI; - - HexagonLowerToMC(MI, MCI, *this); - OutStreamer.EmitInstruction(MCI); + if (MI->isBundle()) { + std::vector BundleMIs; + + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_instr_iterator MII = MI; + ++MII; + unsigned int IgnoreCount = 0; + while (MII != MBB->end() && MII->isInsideBundle()) { + const MachineInstr *MInst = MII; + if (MInst->getOpcode() == TargetOpcode::DBG_VALUE || + MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) { + IgnoreCount++; + ++MII; + continue; + } + //BundleMIs.push_back(&*MII); + BundleMIs.push_back(MInst); + ++MII; + } + unsigned Size = BundleMIs.size(); + assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!"); + for (unsigned Index = 0; Index < Size; Index++) { + HexagonMCInst MCI; + MCI.setStartPacket(Index == 0); + MCI.setEndPacket(Index == (Size-1)); + + HexagonLowerToMC(BundleMIs[Index], MCI, *this); + OutStreamer.EmitInstruction(MCI); + } + } + else { + HexagonMCInst MCI; + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + MCI.setStartPacket(true); + MCI.setEndPacket(true); + } + HexagonLowerToMC(MI, MCI, *this); + OutStreamer.EmitInstruction(MCI); + } return; } @@ -241,15 +277,15 @@ void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo, void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); - assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && - "Expecting jump table index"); + assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && + "Expecting jump table index"); // Hexagon_TODO: Do we need name mangling? O << *GetJTISymbol(MO.getIndex()); } void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo, - raw_ostream &O) { + raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) && "Expecting constant pool index"); diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td index bd9608b..e61b2a7 100644 --- a/lib/Target/Hexagon/HexagonCallingConv.td +++ b/lib/Target/Hexagon/HexagonCallingConv.td @@ -17,8 +17,8 @@ // Hexagon 32-bit C return-value convention. def RetCC_Hexagon32 : CallingConv<[ - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, - CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>, // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> @@ -27,8 +27,8 @@ def RetCC_Hexagon32 : CallingConv<[ // Hexagon 32-bit C Calling convention. def CC_Hexagon32 : CallingConv<[ // All arguments get passed in integer registers if there is space. - CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, - CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>, // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index 46c20e9..ba8e679 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -56,11 +56,8 @@ void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void Hexagon_CCState::MarkAllocated(unsigned Reg) { - UsedRegs[Reg/32] |= 1 << (Reg&31); - - if (const uint16_t *RegAliases = TRI.getAliasSet(Reg)) - for (; (Reg = *RegAliases); ++RegAliases) - UsedRegs[Reg/32] |= 1 << (Reg&31); + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); } /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 2100474..ae2ca37 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// // The Hexagon processor has no instructions that load or store predicate -// registers directly. So, when these registers must be spilled a general -// purpose register must be found and the value copied to/from it from/to -// the predicate register. This code currently does not use the register +// registers directly. So, when these registers must be spilled a general +// purpose register must be found and the value copied to/from it from/to +// the predicate register. This code currently does not use the register // scavenger mechanism available in the allocator. There are two registers // reserved to allow spilling/restoring predicate registers. One is used to // hold the predicate value. The other is used when stack frame offsets are @@ -84,7 +84,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { int SrcReg = MI->getOperand(2).getReg(); assert(Hexagon::PredRegsRegClass.contains(SrcReg) && "Not a predicate register"); - if (!TII->isValidOffset(Hexagon::STriw, Offset)) { + if (!TII->isValidOffset(Hexagon::STriw_indexed, Offset)) { if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::CONST32_Int_Real), @@ -95,7 +95,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), HEXAGON_RESERVED_REG_2).addReg(SrcReg); BuildMI(*MBB, MII, MI->getDebugLoc(), - TII->get(Hexagon::STriw)) + TII->get(Hexagon::STriw_indexed)) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0).addReg(HEXAGON_RESERVED_REG_2); } else { @@ -103,7 +103,8 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), HEXAGON_RESERVED_REG_2).addReg(SrcReg); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)) + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw_indexed)) .addReg(HEXAGON_RESERVED_REG_1) .addImm(0) .addReg(HEXAGON_RESERVED_REG_2); @@ -111,7 +112,8 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { } else { BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), HEXAGON_RESERVED_REG_2).addReg(SrcReg); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)). + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw_indexed)). addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); } MII = MBB->erase(MI); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index e8a6924..cd682df 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -209,6 +209,16 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { FuncInfo->hasClobberLR() ); } +static inline +unsigned uniqueSuperReg(unsigned Reg, const TargetRegisterInfo *TRI) { + MCSuperRegIterator SRI(Reg, TRI); + assert(SRI.isValid() && "Expected a superreg"); + unsigned SuperReg = *SRI; + ++SRI; + assert(!SRI.isValid() && "Expected exactly one superreg"); + return SuperReg; +} + bool HexagonFrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, @@ -235,26 +245,21 @@ HexagonFrameLowering::spillCalleeSavedRegisters( // // Check if we can use a double-word store. // - const uint16_t* SuperReg = TRI->getSuperRegisters(Reg); - - // Assume that there is exactly one superreg. - assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); + unsigned SuperReg = uniqueSuperReg(Reg, TRI); bool CanUseDblStore = false; const TargetRegisterClass* SuperRegClass = 0; if (ContiguousRegs && (i < CSI.size()-1)) { - const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); - assert(SuperRegNext[0] && !SuperRegNext[1] && - "Expected exactly one superreg"); - SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); - CanUseDblStore = (SuperRegNext[0] == SuperReg[0]); + unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); + CanUseDblStore = (SuperRegNext == SuperReg); } if (CanUseDblStore) { - TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true, + TII.storeRegToStackSlot(MBB, MI, SuperReg, true, CSI[i+1].getFrameIdx(), SuperRegClass, TRI); - MBB.addLiveIn(SuperReg[0]); + MBB.addLiveIn(SuperReg); ++i; } else { // Cannot use a double-word store. @@ -295,25 +300,20 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters( // // Check if we can use a double-word load. // - const uint16_t* SuperReg = TRI->getSuperRegisters(Reg); + unsigned SuperReg = uniqueSuperReg(Reg, TRI); const TargetRegisterClass* SuperRegClass = 0; - - // Assume that there is exactly one superreg. - assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); bool CanUseDblLoad = false; if (ContiguousRegs && (i < CSI.size()-1)) { - const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); - assert(SuperRegNext[0] && !SuperRegNext[1] && - "Expected exactly one superreg"); - SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); - CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]); + unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg); + CanUseDblLoad = (SuperRegNext == SuperReg); } if (CanUseDblLoad) { - TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(), + TII.loadRegFromStackSlot(MBB, MI, SuperReg, CSI[i+1].getFrameIdx(), SuperRegClass, TRI); - MBB.addLiveIn(SuperReg[0]); + MBB.addLiveIn(SuperReg); ++i; } else { // Cannot use a double-word load. diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 57772a5..d756aec 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); const MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI)) { const MachineOperand &MO = MI->getOperand(2); @@ -491,7 +494,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); } - // Add the Loop instruction to the begining of the loop. + // Add the Loop instruction to the beginning of the loop. BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); } else { @@ -623,7 +626,7 @@ void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); MachineBasicBlock *MBB = MII->getParent(); DebugLoc DL = MII->getDebugLoc(); - unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0); + unsigned Scratch = RS.scavengeRegister(&Hexagon::IntRegsRegClass, MII, 0); // First, set the LC0 with the trip count. if (MII->getOperand(1).isReg()) { diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 9df965e..5499134 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -90,7 +90,9 @@ public: SDNode *SelectMul(SDNode *N); SDNode *SelectZeroExtend(SDNode *N); SDNode *SelectIntrinsicWOChain(SDNode *N); + SDNode *SelectIntrinsicWChain(SDNode *N); SDNode *SelectConstant(SDNode *N); + SDNode *SelectConstantFP(SDNode *N); SDNode *SelectAdd(SDNode *N); // Include the pieces autogenerated from the target description. @@ -318,7 +320,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) { else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed; else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed; else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed; - else assert (0 && "unknown memory type"); + else llvm_unreachable("unknown memory type"); // Build indexed load. SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); @@ -375,7 +377,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, }; ReplaceUses(Froms, Tos, 3); return Result_2; - } + } SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, @@ -516,7 +518,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) { else Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib; } else - assert (0 && "unknown memory type"); + llvm_unreachable("unknown memory type"); // For zero ext i64 loads, we need to add combine instructions. if (LD->getValueType(0) == MVT::i64 && @@ -613,7 +615,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri; else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri; else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri; - else assert (0 && "unknown memory type"); + else llvm_unreachable("unknown memory type"); // Build post increment store. SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, @@ -636,10 +638,10 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { // Figure out the opcode. if (StoredVT == MVT::i64) Opcode = Hexagon::STrid; - else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih; else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib; - else assert (0 && "unknown memory type"); + else llvm_unreachable("unknown memory type"); // Build regular store. SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); @@ -693,7 +695,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed; else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed; - else assert (0 && "unknown memory type"); + else llvm_unreachable("unknown memory type"); SDValue Ops[] = {SDValue(NewBase,0), CurDAG->getTargetConstant(Offset,PointerTy), @@ -723,7 +725,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { if (AM != ISD::UNINDEXED) { return SelectIndexedStore(ST, dl); } - + return SelectBaseOffsetStore(ST, dl); } @@ -752,7 +754,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { SDValue Sext0 = MulOp0.getOperand(0); if (Sext0.getNode()->getValueType(0) != MVT::i32) { - SelectCode(N); + return SelectCode(N); } OP0 = Sext0; @@ -761,7 +763,7 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { if (LD->getMemoryVT() != MVT::i32 || LD->getExtensionType() != ISD::SEXTLOAD || LD->getAddressingMode() != ISD::UNINDEXED) { - SelectCode(N); + return SelectCode(N); } SDValue Chain = LD->getChain(); @@ -1128,12 +1130,12 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { // For immediates, lower it. for (unsigned i = 1; i < N->getNumOperands(); ++i) { SDNode *Arg = N->getOperand(i).getNode(); - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI, *MF); - if (RC == Hexagon::IntRegsRegisterClass || - RC == Hexagon::DoubleRegsRegisterClass) { + if (RC == &Hexagon::IntRegsRegClass || + RC == &Hexagon::DoubleRegsRegClass) { Ops.push_back(SDValue(Arg, 0)); - } else if (RC == Hexagon::PredRegsRegisterClass) { + } else if (RC == &Hexagon::PredRegsRegClass) { // Do the transfer. SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, SDValue(Arg, 0)); @@ -1158,6 +1160,25 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { return SelectCode(N); } +// +// Map floating point constant values. +// +SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + ConstantFPSDNode *CN = dyn_cast(N); + APFloat APF = CN->getValueAPF(); + if (N->getValueType(0) == MVT::f32) { + return CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32, + CurDAG->getTargetConstantFP(APF.convertToFloat(), MVT::f32)); + } + else if (N->getValueType(0) == MVT::f64) { + return CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64, + CurDAG->getTargetConstantFP(APF.convertToDouble(), MVT::f64)); + } + + return SelectCode(N); +} + // // Map predicate true (encoded as -1 in LLVM) to a XOR. @@ -1215,7 +1236,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that // Rd and Rd' are assigned to the same register - SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32, + SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_ADD_rr, dl, MVT::i32, N->getOperand(1), Src1->getOperand(0), Src1->getOperand(1)); @@ -1234,6 +1255,9 @@ SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: return SelectConstant(N); + case ISD::ConstantFP: + return SelectConstantFP(N); + case ISD::ADD: return SelectAdd(N); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 8c4350d..703a128 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -103,12 +103,12 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } - if (LocVT == MVT::i32) { + if (LocVT == MVT::i32 || LocVT == MVT::f32) { ofst = State.AllocateStack(4, 4); State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } - if (LocVT == MVT::i64) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { ofst = State.AllocateStack(8, 8); State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; @@ -142,12 +142,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } - if (LocVT == MVT::i32) { + if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } - if (LocVT == MVT::i64) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } @@ -217,12 +217,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } - if (LocVT == MVT::i32) { + if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } - if (LocVT == MVT::i64) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) return false; } @@ -234,7 +234,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (LocVT == MVT::i32) { + if (LocVT == MVT::i32 || LocVT == MVT::f32) { if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -249,7 +249,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (LocVT == MVT::i64) { + if (LocVT == MVT::i64 || LocVT == MVT::f64) { if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -299,7 +299,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analyze return values of ISD::RET CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); @@ -351,7 +351,7 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); @@ -370,21 +370,25 @@ HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// LowerCall - Functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue -HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Check for varargs. NumNamedVarArgParams = -1; @@ -504,7 +508,7 @@ HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; if (!isTailCall) { @@ -524,7 +528,7 @@ HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. // - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, @@ -813,7 +817,7 @@ const { // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); @@ -839,14 +843,15 @@ const { // 1. int, long long, ptr args that get allocated in register. // 2. Large struct that gets an register to put its address in. EVT RegVT = VA.getLocVT(); - if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) { + if (RegVT == MVT::i8 || RegVT == MVT::i16 || + RegVT == MVT::i32 || RegVT == MVT::f32) { unsigned VReg = - RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass); + RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else if (RegVT == MVT::i64) { unsigned VReg = - RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass); + RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } else { @@ -918,14 +923,33 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(4); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); SDNode* OpNode = Op.getNode(); + EVT SVT = OpNode->getValueType(0); - SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1, - Op.getOperand(2), Op.getOperand(3), - Op.getOperand(4)); - return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0), - Cond, Op.getOperand(0), - Op.getOperand(1)); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC); + return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal); +} + +SDValue +HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { + EVT ValTy = Op.getValueType(); + + DebugLoc dl = Op.getDebugLoc(); + ConstantPoolSDNode *CP = cast(Op); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy, + CP->getAlignment()); + else + Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy, + CP->getAlignment()); + return DAG.getNode(HexagonISD::CONST32, dl, ValTy, Res); } SDValue @@ -1010,11 +1034,18 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine : TargetLowering(targetmachine, new HexagonTargetObjectFile()), TM(targetmachine) { + const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); + // Set up the register classes. - addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass); - addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass); + addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); - addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass); + if (QRI->Subtarget.hasV5TOps()) { + addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); + } + + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); computeRegisterProperties(); @@ -1028,32 +1059,16 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine // // Library calls for unsupported operations // - setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); - - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); - setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); - setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); setOperationAction(ISD::SDIV, MVT::i32, Expand); setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); @@ -1082,92 +1097,184 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); setOperationAction(ISD::FDIV, MVT::f64, Expand); - setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + if (QRI->Subtarget.hasV5TOps()) { + // Hexagon V5 Support. + setOperationAction(ISD::FADD, MVT::f32, Legal); + setOperationAction(ISD::FADD, MVT::f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGE, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGT, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOLE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLE, MVT::f64, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); + + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Expand); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } else { + + // Expand fp<->uint. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); - setOperationAction(ISD::FADD, MVT::f64, Expand); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); - setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); - setOperationAction(ISD::FADD, MVT::f32, Expand); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); - setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); - setOperationAction(ISD::FADD, MVT::f32, Expand); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); - setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); - setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); - setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setOperationAction(ISD::FADD, MVT::f64, Expand); - setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); - setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); - setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); - setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); - setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); - setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); - setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); - setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); - setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); - setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); - setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); - setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); - setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); - setOperationAction(ISD::SREM, MVT::i32, Expand); + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); - setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); - setOperationAction(ISD::FMUL, MVT::f64, Expand); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); - setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); - setOperationAction(ISD::MUL, MVT::f32, Expand); + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); - setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); - setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); - setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); - setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); - setOperationAction(ISD::SUB, MVT::f64, Expand); + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FMUL, MVT::f64, Expand); - setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); - setOperationAction(ISD::SUB, MVT::f32, Expand); + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setOperationAction(ISD::MUL, MVT::f32, Expand); - setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); - setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); - setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); - setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETO, MVT::f64, Expand); + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setOperationAction(ISD::SUB, MVT::f64, Expand); - setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::SUB, MVT::f32, Expand); - setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETO, MVT::f32, Expand); + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); - setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + + setOperationAction(ISD::FABS, MVT::f32, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f32, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } + + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); @@ -1208,20 +1315,33 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::BSWAP, MVT::i64, Expand); - // Expand fp<->uint. - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - - // Hexagon has no select or setcc: expand to SELECT_CC. - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); - // Lower SELECT_CC to SETCC and SELECT. setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - // This is a workaround documented in DAGCombiner.cpp:2892 We don't - // support SELECT_CC on every type. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + if (QRI->Subtarget.hasV5TOps()) { + + // We need to make the operation type of SELECT node to be Custom, + // such that we don't go into the infinite loop of + // select -> setcc -> select_cc -> select loop. + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + } else { + + // Hexagon has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // This is a workaround documented in DAGCombiner.cpp:2892 We don't + // support SELECT_CC on every type. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + } setOperationAction(ISD::BR_CC, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); @@ -1307,22 +1427,22 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; - case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; - case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; - case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; - case HexagonISD::BRICC: return "HexagonISD::BRICC"; - case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; - case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; - case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; - case HexagonISD::Hi: return "HexagonISD::Hi"; - case HexagonISD::Lo: return "HexagonISD::Lo"; - case HexagonISD::FTOI: return "HexagonISD::FTOI"; - case HexagonISD::ITOF: return "HexagonISD::ITOF"; - case HexagonISD::CALL: return "HexagonISD::CALL"; - case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; - case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; } } @@ -1347,9 +1467,10 @@ SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); // Frame & Return address. Currently unimplemented. - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for Hexagon."); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); @@ -1359,9 +1480,10 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::SELECT: return Op; case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); } } @@ -1404,9 +1526,11 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const case MVT::i32: case MVT::i16: case MVT::i8: - return std::make_pair(0U, Hexagon::IntRegsRegisterClass); + case MVT::f32: + return std::make_pair(0U, &Hexagon::IntRegsRegClass); case MVT::i64: - return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass); + case MVT::f64: + return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); } default: llvm_unreachable("Unknown asm register class"); @@ -1416,6 +1540,14 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); + return QRI->Subtarget.hasV5TOps(); +} + /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 4208bcb..fe6c905 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -27,6 +27,7 @@ namespace llvm { CONST32, CONST32_GP, // For marking data present in GP. + FCONST32, SETCC, ADJDYNALLOC, ARGEXTEND, @@ -48,6 +49,7 @@ namespace llvm { BR_JT, // Jump table. BARRIER, // Memory barrier. WrapperJT, + WrapperCP, TC_RETURN }; } @@ -94,13 +96,7 @@ namespace llvm { SmallVectorImpl &InVals) const; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, @@ -128,6 +124,7 @@ namespace llvm { MachineBasicBlock *BB) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; virtual EVT getSetCCResultType(EVT VT) const { return MVT::i1; } @@ -150,6 +147,7 @@ namespace llvm { /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td index e78bb79..18692c4 100644 --- a/lib/Target/Hexagon/HexagonImmediates.td +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -371,7 +371,7 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{ def u64ImmPred : PatLeaf<(i64 imm), [{ // immS16 predicate - True if the immediate fits in a 16-bit sign extended // field. - // Adding "N ||" to supress gcc unused warning. + // Adding "N ||" to suppress gcc unused warning. return (N || true); }]>; diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index c9f16fb..e472d49 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -13,29 +13,48 @@ // *** Must match HexagonBaseInfo.h *** //===----------------------------------------------------------------------===// +class Type t> { + bits<5> Value = t; +} +def TypePSEUDO : Type<0>; +def TypeALU32 : Type<1>; +def TypeCR : Type<2>; +def TypeJR : Type<3>; +def TypeJ : Type<4>; +def TypeLD : Type<5>; +def TypeST : Type<6>; +def TypeSYSTEM : Type<7>; +def TypeXTYPE : Type<8>; +def TypeMARKER : Type<31>; //===----------------------------------------------------------------------===// // Intruction Class Declaration + //===----------------------------------------------------------------------===// class InstHexagon pattern, - string cstr, InstrItinClass itin> : Instruction { + string cstr, InstrItinClass itin, Type type> : Instruction { field bits<32> Inst; let Namespace = "Hexagon"; dag OutOperandList = outs; dag InOperandList = ins; - let AsmString = asmstr; + let AsmString = asmstr; let Pattern = pattern; let Constraints = cstr; - let Itinerary = itin; - - // *** The code below must match HexagonBaseInfo.h *** - + let Itinerary = itin; + let Size = 4; + + // *** Must match HexagonBaseInfo.h *** + // Instruction type according to the ISA. + Type HexagonType = type; + let TSFlags{4-0} = HexagonType.Value; + // Solo instructions, i.e., those that cannot be in a packet with others. + bits<1> isHexagonSolo = 0; + let TSFlags{5} = isHexagonSolo; // Predicated instructions. bits<1> isPredicated = 0; - let TSFlags{1} = isPredicated; + let TSFlags{6} = isPredicated; // *** The code above must match HexagonBaseInfo.h *** } @@ -47,17 +66,25 @@ class InstHexagon pattern, // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class LDInst pattern> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<13> imm13; } +class LDInst2 pattern> + : InstHexagon { + bits<5> rd; + bits<5> rs; + bits<13> imm13; + let mayLoad = 1; +} + // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class LDInstPost pattern, string cstr> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -68,7 +95,24 @@ class LDInstPost pattern, // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. class STInst pattern> - : InstHexagon { + : InstHexagon { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +class STInst2 pattern> + : InstHexagon { + bits<5> rd; + bits<5> rs; + bits<13> imm13; + let mayStore = 1; +} + +// SYSTEM Instruction Class in V4 can take SLOT0 only +// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. +class SYSInst pattern> + : InstHexagon { bits<5> rd; bits<5> rs; bits<13> imm13; @@ -79,7 +123,7 @@ class STInst pattern> // Definition of the instruction class CHANGED from V2/V3 to V4. class STInstPost pattern, string cstr> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -89,7 +133,7 @@ class STInstPost pattern, // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class ALU32Type pattern> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -102,7 +146,17 @@ class ALU32Type pattern> // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. class ALU64Type pattern> - : InstHexagon { + : InstHexagon { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<16> imm16; + bits<16> imm16_2; +} + +class ALU64_acc pattern, + string cstr> + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -115,7 +169,7 @@ class ALU64Type pattern> // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst pattern> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -126,8 +180,8 @@ class MInst pattern> // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst_acc pattern, - string cstr> - : InstHexagon { + string cstr> + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -138,9 +192,7 @@ class MInst_acc pattern, // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst pattern> -//: InstHexagon { - : InstHexagon { -// : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -151,8 +203,8 @@ class SInst pattern> // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst_acc pattern, - string cstr> - : InstHexagon { + string cstr> + : InstHexagon { // : InstHexagon { // : InstHexagon { bits<5> rd; @@ -163,14 +215,14 @@ class SInst_acc pattern, // J Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JType pattern> - : InstHexagon { + : InstHexagon { bits<16> imm16; } // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRType pattern> - : InstHexagon { + : InstHexagon { bits<5> rs; bits<5> pu; // Predicate register } @@ -178,15 +230,22 @@ class JRType pattern> // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class CRInst pattern> - : InstHexagon { + : InstHexagon { bits<5> rs; bits<10> imm10; } +class Marker pattern> + : InstHexagon { + let isCodeGenOnly = 1; + let isPseudo = 1; +} class Pseudo pattern> - : InstHexagon; - + : InstHexagon { + let isCodeGenOnly = 1; + let isPseudo = 1; +} //===----------------------------------------------------------------------===// // Intruction Classes Definitions - @@ -222,6 +281,11 @@ class ALU64_rr pattern> : ALU64Type { } +class ALU64_ri pattern> + : ALU64Type { + let rt{0-4} = 0; +} + // J Type Instructions. class JInst pattern> : JType { @@ -234,15 +298,31 @@ class JRInst pattern> // Post increment ST Instruction. -class STInstPI pattern, string cstr> +class STInstPI pattern, + string cstr> + : STInstPost { + let rt{0-4} = 0; +} + +class STInst2PI pattern, + string cstr> : STInstPost { let rt{0-4} = 0; + let mayStore = 1; } // Post increment LD Instruction. -class LDInstPI pattern, string cstr> +class LDInstPI pattern, + string cstr> + : LDInstPost { + let rt{0-4} = 0; +} + +class LDInst2PI pattern, + string cstr> : LDInstPost { let rt{0-4} = 0; + let mayLoad = 1; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index bd5e449..49741a3 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -11,11 +11,25 @@ // //===----------------------------------------------------------------------===// +//----------------------------------------------------------------------------// +// Hexagon Intruction Flags + +// +// *** Must match BaseInfo.h *** +//----------------------------------------------------------------------------// + +def TypeMEMOP : Type<9>; +def TypeNV : Type<10>; +def TypePREFIX : Type<30>; + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + // // NV type instructions. // class NVInst_V4 pattern> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<13> imm13; @@ -24,7 +38,7 @@ class NVInst_V4 pattern> // Definition of Post increment new value store. class NVInstPost_V4 pattern, string cstr> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<5> rt; @@ -39,8 +53,15 @@ class NVInstPI_V4 pattern, } class MEMInst_V4 pattern> - : InstHexagon { + : InstHexagon { bits<5> rd; bits<5> rs; bits<6> imm6; } + +class Immext pattern> + : InstHexagon { + let isCodeGenOnly = 1; + + bits<26> imm26; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 77b3663..c8f933d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "Hexagon.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DFAPacketizer.h" @@ -34,24 +34,23 @@ using namespace llvm; /// Constants for Hexagon instructions. /// const int Hexagon_MEMW_OFFSET_MAX = 4095; -const int Hexagon_MEMW_OFFSET_MIN = 4096; +const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; -const int Hexagon_MEMD_OFFSET_MIN = 8192; +const int Hexagon_MEMD_OFFSET_MIN = -8192; const int Hexagon_MEMH_OFFSET_MAX = 2047; -const int Hexagon_MEMH_OFFSET_MIN = 2048; +const int Hexagon_MEMH_OFFSET_MIN = -2048; const int Hexagon_MEMB_OFFSET_MAX = 1023; -const int Hexagon_MEMB_OFFSET_MIN = 1024; +const int Hexagon_MEMB_OFFSET_MIN = -1024; const int Hexagon_ADDI_OFFSET_MAX = 32767; -const int Hexagon_ADDI_OFFSET_MIN = 32768; +const int Hexagon_ADDI_OFFSET_MIN = -32768; const int Hexagon_MEMD_AUTOINC_MAX = 56; -const int Hexagon_MEMD_AUTOINC_MIN = 64; +const int Hexagon_MEMD_AUTOINC_MIN = -64; const int Hexagon_MEMW_AUTOINC_MAX = 28; -const int Hexagon_MEMW_AUTOINC_MIN = 32; +const int Hexagon_MEMW_AUTOINC_MIN = -32; const int Hexagon_MEMH_AUTOINC_MAX = 14; -const int Hexagon_MEMH_AUTOINC_MIN = 16; +const int Hexagon_MEMH_AUTOINC_MIN = -16; const int Hexagon_MEMB_AUTOINC_MAX = 7; -const int Hexagon_MEMB_AUTOINC_MIN = 8; - +const int Hexagon_MEMB_AUTOINC_MIN = -8; HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) @@ -70,6 +69,7 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, switch (MI->getOpcode()) { + default: break; case Hexagon::LDriw: case Hexagon::LDrid: case Hexagon::LDrih: @@ -81,11 +81,7 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - - default: - break; } - return 0; } @@ -98,21 +94,18 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { + default: break; case Hexagon::STriw: case Hexagon::STrid: case Hexagon::STrih: case Hexagon::STrib: if (MI->getOperand(2).isFI() && MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { - FrameIndex = MI->getOperand(2).getIndex(); - return MI->getOperand(0).getReg(); + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(2).getReg(); } break; - - default: - break; } - return 0; } @@ -176,6 +169,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { + TBB = NULL; FBB = NULL; // If the block has no terminators, it just falls into the block after it. @@ -328,7 +322,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, DestReg).addReg(SrcReg).addReg(SrcReg); return; } - if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) { + if (Hexagon::DoubleRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { // We can have an overlap between single and double reg: r1:0 = r0. if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { // r1:0 = r0 @@ -343,7 +338,8 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } return; } - if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) { + if (Hexagon::CRRegsRegClass.contains(DestReg) && + Hexagon::IntRegsRegClass.contains(SrcReg)) { BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); return; } @@ -370,15 +366,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MFI.getObjectSize(FI), Align); - if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) { + if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::STriw)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) { + } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::STrid)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); - } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) { + } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); @@ -415,14 +411,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - - if (RC == Hexagon::IntRegsRegisterClass) { + if (RC == &Hexagon::IntRegsRegClass) { BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == Hexagon::DoubleRegsRegisterClass) { + } else if (RC == &Hexagon::DoubleRegsRegClass) { BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); - } else if (RC == Hexagon::PredRegsRegisterClass) { + } else if (RC == &Hexagon::PredRegsRegClass) { BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else { @@ -453,11 +448,11 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *TRC; if (VT == MVT::i1) { - TRC = Hexagon::PredRegsRegisterClass; - } else if (VT == MVT::i32) { - TRC = Hexagon::IntRegsRegisterClass; - } else if (VT == MVT::i64) { - TRC = Hexagon::DoubleRegsRegisterClass; + TRC = &Hexagon::PredRegsRegClass; + } else if (VT == MVT::i32 || VT == MVT::f32) { + TRC = &Hexagon::IntRegsRegClass; + } else if (VT == MVT::i64 || VT == MVT::f64) { + TRC = &Hexagon::DoubleRegsRegClass; } else { llvm_unreachable("Cannot handle this register class"); } @@ -466,7 +461,852 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { return NewReg; } +bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { + switch(MI->getOpcode()) { + default: return false; + // JMP_EQri + case Hexagon::JMP_EQriPt_nv_V4: + case Hexagon::JMP_EQriPnt_nv_V4: + case Hexagon::JMP_EQriNotPt_nv_V4: + case Hexagon::JMP_EQriNotPnt_nv_V4: + + // JMP_EQri - with -1 + case Hexagon::JMP_EQriPtneg_nv_V4: + case Hexagon::JMP_EQriPntneg_nv_V4: + case Hexagon::JMP_EQriNotPtneg_nv_V4: + case Hexagon::JMP_EQriNotPntneg_nv_V4: + + // JMP_EQrr + case Hexagon::JMP_EQrrPt_nv_V4: + case Hexagon::JMP_EQrrPnt_nv_V4: + case Hexagon::JMP_EQrrNotPt_nv_V4: + case Hexagon::JMP_EQrrNotPnt_nv_V4: + + // JMP_GTri + case Hexagon::JMP_GTriPt_nv_V4: + case Hexagon::JMP_GTriPnt_nv_V4: + case Hexagon::JMP_GTriNotPt_nv_V4: + case Hexagon::JMP_GTriNotPnt_nv_V4: + + // JMP_GTri - with -1 + case Hexagon::JMP_GTriPtneg_nv_V4: + case Hexagon::JMP_GTriPntneg_nv_V4: + case Hexagon::JMP_GTriNotPtneg_nv_V4: + case Hexagon::JMP_GTriNotPntneg_nv_V4: + // JMP_GTrr + case Hexagon::JMP_GTrrPt_nv_V4: + case Hexagon::JMP_GTrrPnt_nv_V4: + case Hexagon::JMP_GTrrNotPt_nv_V4: + case Hexagon::JMP_GTrrNotPnt_nv_V4: + + // JMP_GTrrdn + case Hexagon::JMP_GTrrdnPt_nv_V4: + case Hexagon::JMP_GTrrdnPnt_nv_V4: + case Hexagon::JMP_GTrrdnNotPt_nv_V4: + case Hexagon::JMP_GTrrdnNotPnt_nv_V4: + + // JMP_GTUri + case Hexagon::JMP_GTUriPt_nv_V4: + case Hexagon::JMP_GTUriPnt_nv_V4: + case Hexagon::JMP_GTUriNotPt_nv_V4: + case Hexagon::JMP_GTUriNotPnt_nv_V4: + + // JMP_GTUrr + case Hexagon::JMP_GTUrrPt_nv_V4: + case Hexagon::JMP_GTUrrPnt_nv_V4: + case Hexagon::JMP_GTUrrNotPt_nv_V4: + case Hexagon::JMP_GTUrrNotPnt_nv_V4: + + // JMP_GTUrrdn + case Hexagon::JMP_GTUrrdnPt_nv_V4: + case Hexagon::JMP_GTUrrdnPnt_nv_V4: + case Hexagon::JMP_GTUrrdnNotPt_nv_V4: + case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: + + // TFR_FI + case Hexagon::TFR_FI: + return true; + } +} + +bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { + switch(MI->getOpcode()) { + default: return false; + // JMP_EQri + case Hexagon::JMP_EQriPt_ie_nv_V4: + case Hexagon::JMP_EQriPnt_ie_nv_V4: + case Hexagon::JMP_EQriNotPt_ie_nv_V4: + case Hexagon::JMP_EQriNotPnt_ie_nv_V4: + + // JMP_EQri - with -1 + case Hexagon::JMP_EQriPtneg_ie_nv_V4: + case Hexagon::JMP_EQriPntneg_ie_nv_V4: + case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: + case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: + + // JMP_EQrr + case Hexagon::JMP_EQrrPt_ie_nv_V4: + case Hexagon::JMP_EQrrPnt_ie_nv_V4: + case Hexagon::JMP_EQrrNotPt_ie_nv_V4: + case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: + + // JMP_GTri + case Hexagon::JMP_GTriPt_ie_nv_V4: + case Hexagon::JMP_GTriPnt_ie_nv_V4: + case Hexagon::JMP_GTriNotPt_ie_nv_V4: + case Hexagon::JMP_GTriNotPnt_ie_nv_V4: + + // JMP_GTri - with -1 + case Hexagon::JMP_GTriPtneg_ie_nv_V4: + case Hexagon::JMP_GTriPntneg_ie_nv_V4: + case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: + case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: + + // JMP_GTrr + case Hexagon::JMP_GTrrPt_ie_nv_V4: + case Hexagon::JMP_GTrrPnt_ie_nv_V4: + case Hexagon::JMP_GTrrNotPt_ie_nv_V4: + case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: + + // JMP_GTrrdn + case Hexagon::JMP_GTrrdnPt_ie_nv_V4: + case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: + case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: + case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: + + // JMP_GTUri + case Hexagon::JMP_GTUriPt_ie_nv_V4: + case Hexagon::JMP_GTUriPnt_ie_nv_V4: + case Hexagon::JMP_GTUriNotPt_ie_nv_V4: + case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: + + // JMP_GTUrr + case Hexagon::JMP_GTUrrPt_ie_nv_V4: + case Hexagon::JMP_GTUrrPnt_ie_nv_V4: + case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: + case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: + + // JMP_GTUrrdn + case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: + + // V4 absolute set addressing. + case Hexagon::LDrid_abs_setimm_V4: + case Hexagon::LDriw_abs_setimm_V4: + case Hexagon::LDrih_abs_setimm_V4: + case Hexagon::LDrib_abs_setimm_V4: + case Hexagon::LDriuh_abs_setimm_V4: + case Hexagon::LDriub_abs_setimm_V4: + + case Hexagon::STrid_abs_setimm_V4: + case Hexagon::STrib_abs_setimm_V4: + case Hexagon::STrih_abs_setimm_V4: + case Hexagon::STriw_abs_setimm_V4: + + // V4 global address load. + case Hexagon::LDrid_GP_cPt_V4 : + case Hexagon::LDrid_GP_cNotPt_V4 : + case Hexagon::LDrid_GP_cdnPt_V4 : + case Hexagon::LDrid_GP_cdnNotPt_V4 : + case Hexagon::LDrib_GP_cPt_V4 : + case Hexagon::LDrib_GP_cNotPt_V4 : + case Hexagon::LDrib_GP_cdnPt_V4 : + case Hexagon::LDrib_GP_cdnNotPt_V4 : + case Hexagon::LDriub_GP_cPt_V4 : + case Hexagon::LDriub_GP_cNotPt_V4 : + case Hexagon::LDriub_GP_cdnPt_V4 : + case Hexagon::LDriub_GP_cdnNotPt_V4 : + case Hexagon::LDrih_GP_cPt_V4 : + case Hexagon::LDrih_GP_cNotPt_V4 : + case Hexagon::LDrih_GP_cdnPt_V4 : + case Hexagon::LDrih_GP_cdnNotPt_V4 : + case Hexagon::LDriuh_GP_cPt_V4 : + case Hexagon::LDriuh_GP_cNotPt_V4 : + case Hexagon::LDriuh_GP_cdnPt_V4 : + case Hexagon::LDriuh_GP_cdnNotPt_V4 : + case Hexagon::LDriw_GP_cPt_V4 : + case Hexagon::LDriw_GP_cNotPt_V4 : + case Hexagon::LDriw_GP_cdnPt_V4 : + case Hexagon::LDriw_GP_cdnNotPt_V4 : + case Hexagon::LDd_GP_cPt_V4 : + case Hexagon::LDd_GP_cNotPt_V4 : + case Hexagon::LDd_GP_cdnPt_V4 : + case Hexagon::LDd_GP_cdnNotPt_V4 : + case Hexagon::LDb_GP_cPt_V4 : + case Hexagon::LDb_GP_cNotPt_V4 : + case Hexagon::LDb_GP_cdnPt_V4 : + case Hexagon::LDb_GP_cdnNotPt_V4 : + case Hexagon::LDub_GP_cPt_V4 : + case Hexagon::LDub_GP_cNotPt_V4 : + case Hexagon::LDub_GP_cdnPt_V4 : + case Hexagon::LDub_GP_cdnNotPt_V4 : + case Hexagon::LDh_GP_cPt_V4 : + case Hexagon::LDh_GP_cNotPt_V4 : + case Hexagon::LDh_GP_cdnPt_V4 : + case Hexagon::LDh_GP_cdnNotPt_V4 : + case Hexagon::LDuh_GP_cPt_V4 : + case Hexagon::LDuh_GP_cNotPt_V4 : + case Hexagon::LDuh_GP_cdnPt_V4 : + case Hexagon::LDuh_GP_cdnNotPt_V4 : + case Hexagon::LDw_GP_cPt_V4 : + case Hexagon::LDw_GP_cNotPt_V4 : + case Hexagon::LDw_GP_cdnPt_V4 : + case Hexagon::LDw_GP_cdnNotPt_V4 : + + // V4 global address store. + case Hexagon::STrid_GP_cPt_V4 : + case Hexagon::STrid_GP_cNotPt_V4 : + case Hexagon::STrid_GP_cdnPt_V4 : + case Hexagon::STrid_GP_cdnNotPt_V4 : + case Hexagon::STrib_GP_cPt_V4 : + case Hexagon::STrib_GP_cNotPt_V4 : + case Hexagon::STrib_GP_cdnPt_V4 : + case Hexagon::STrib_GP_cdnNotPt_V4 : + case Hexagon::STrih_GP_cPt_V4 : + case Hexagon::STrih_GP_cNotPt_V4 : + case Hexagon::STrih_GP_cdnPt_V4 : + case Hexagon::STrih_GP_cdnNotPt_V4 : + case Hexagon::STriw_GP_cPt_V4 : + case Hexagon::STriw_GP_cNotPt_V4 : + case Hexagon::STriw_GP_cdnPt_V4 : + case Hexagon::STriw_GP_cdnNotPt_V4 : + case Hexagon::STd_GP_cPt_V4 : + case Hexagon::STd_GP_cNotPt_V4 : + case Hexagon::STd_GP_cdnPt_V4 : + case Hexagon::STd_GP_cdnNotPt_V4 : + case Hexagon::STb_GP_cPt_V4 : + case Hexagon::STb_GP_cNotPt_V4 : + case Hexagon::STb_GP_cdnPt_V4 : + case Hexagon::STb_GP_cdnNotPt_V4 : + case Hexagon::STh_GP_cPt_V4 : + case Hexagon::STh_GP_cNotPt_V4 : + case Hexagon::STh_GP_cdnPt_V4 : + case Hexagon::STh_GP_cdnNotPt_V4 : + case Hexagon::STw_GP_cPt_V4 : + case Hexagon::STw_GP_cNotPt_V4 : + case Hexagon::STw_GP_cdnPt_V4 : + case Hexagon::STw_GP_cdnNotPt_V4 : + + // V4 predicated global address new value store. + case Hexagon::STrib_GP_cPt_nv_V4 : + case Hexagon::STrib_GP_cNotPt_nv_V4 : + case Hexagon::STrib_GP_cdnPt_nv_V4 : + case Hexagon::STrib_GP_cdnNotPt_nv_V4 : + case Hexagon::STrih_GP_cPt_nv_V4 : + case Hexagon::STrih_GP_cNotPt_nv_V4 : + case Hexagon::STrih_GP_cdnPt_nv_V4 : + case Hexagon::STrih_GP_cdnNotPt_nv_V4 : + case Hexagon::STriw_GP_cPt_nv_V4 : + case Hexagon::STriw_GP_cNotPt_nv_V4 : + case Hexagon::STriw_GP_cdnPt_nv_V4 : + case Hexagon::STriw_GP_cdnNotPt_nv_V4 : + case Hexagon::STb_GP_cPt_nv_V4 : + case Hexagon::STb_GP_cNotPt_nv_V4 : + case Hexagon::STb_GP_cdnPt_nv_V4 : + case Hexagon::STb_GP_cdnNotPt_nv_V4 : + case Hexagon::STh_GP_cPt_nv_V4 : + case Hexagon::STh_GP_cNotPt_nv_V4 : + case Hexagon::STh_GP_cdnPt_nv_V4 : + case Hexagon::STh_GP_cdnNotPt_nv_V4 : + case Hexagon::STw_GP_cPt_nv_V4 : + case Hexagon::STw_GP_cNotPt_nv_V4 : + case Hexagon::STw_GP_cdnPt_nv_V4 : + case Hexagon::STw_GP_cdnNotPt_nv_V4 : + + // TFR_FI + case Hexagon::TFR_FI_immext_V4: + + // TFRI_F + case Hexagon::TFRI_f: + case Hexagon::TFRI_cPt_f: + case Hexagon::TFRI_cNotPt_f: + case Hexagon::CONST64_Float_Real: + return true; + } +} + +bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + // JMP_EQri + case Hexagon::JMP_EQriPt_nv_V4: + case Hexagon::JMP_EQriPnt_nv_V4: + case Hexagon::JMP_EQriNotPt_nv_V4: + case Hexagon::JMP_EQriNotPnt_nv_V4: + case Hexagon::JMP_EQriPt_ie_nv_V4: + case Hexagon::JMP_EQriPnt_ie_nv_V4: + case Hexagon::JMP_EQriNotPt_ie_nv_V4: + case Hexagon::JMP_EQriNotPnt_ie_nv_V4: + + // JMP_EQri - with -1 + case Hexagon::JMP_EQriPtneg_nv_V4: + case Hexagon::JMP_EQriPntneg_nv_V4: + case Hexagon::JMP_EQriNotPtneg_nv_V4: + case Hexagon::JMP_EQriNotPntneg_nv_V4: + case Hexagon::JMP_EQriPtneg_ie_nv_V4: + case Hexagon::JMP_EQriPntneg_ie_nv_V4: + case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: + case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: + + // JMP_EQrr + case Hexagon::JMP_EQrrPt_nv_V4: + case Hexagon::JMP_EQrrPnt_nv_V4: + case Hexagon::JMP_EQrrNotPt_nv_V4: + case Hexagon::JMP_EQrrNotPnt_nv_V4: + case Hexagon::JMP_EQrrPt_ie_nv_V4: + case Hexagon::JMP_EQrrPnt_ie_nv_V4: + case Hexagon::JMP_EQrrNotPt_ie_nv_V4: + case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: + + // JMP_GTri + case Hexagon::JMP_GTriPt_nv_V4: + case Hexagon::JMP_GTriPnt_nv_V4: + case Hexagon::JMP_GTriNotPt_nv_V4: + case Hexagon::JMP_GTriNotPnt_nv_V4: + case Hexagon::JMP_GTriPt_ie_nv_V4: + case Hexagon::JMP_GTriPnt_ie_nv_V4: + case Hexagon::JMP_GTriNotPt_ie_nv_V4: + case Hexagon::JMP_GTriNotPnt_ie_nv_V4: + + // JMP_GTri - with -1 + case Hexagon::JMP_GTriPtneg_nv_V4: + case Hexagon::JMP_GTriPntneg_nv_V4: + case Hexagon::JMP_GTriNotPtneg_nv_V4: + case Hexagon::JMP_GTriNotPntneg_nv_V4: + case Hexagon::JMP_GTriPtneg_ie_nv_V4: + case Hexagon::JMP_GTriPntneg_ie_nv_V4: + case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: + case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: + + // JMP_GTrr + case Hexagon::JMP_GTrrPt_nv_V4: + case Hexagon::JMP_GTrrPnt_nv_V4: + case Hexagon::JMP_GTrrNotPt_nv_V4: + case Hexagon::JMP_GTrrNotPnt_nv_V4: + case Hexagon::JMP_GTrrPt_ie_nv_V4: + case Hexagon::JMP_GTrrPnt_ie_nv_V4: + case Hexagon::JMP_GTrrNotPt_ie_nv_V4: + case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: + + // JMP_GTrrdn + case Hexagon::JMP_GTrrdnPt_nv_V4: + case Hexagon::JMP_GTrrdnPnt_nv_V4: + case Hexagon::JMP_GTrrdnNotPt_nv_V4: + case Hexagon::JMP_GTrrdnNotPnt_nv_V4: + case Hexagon::JMP_GTrrdnPt_ie_nv_V4: + case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: + case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: + case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: + + // JMP_GTUri + case Hexagon::JMP_GTUriPt_nv_V4: + case Hexagon::JMP_GTUriPnt_nv_V4: + case Hexagon::JMP_GTUriNotPt_nv_V4: + case Hexagon::JMP_GTUriNotPnt_nv_V4: + case Hexagon::JMP_GTUriPt_ie_nv_V4: + case Hexagon::JMP_GTUriPnt_ie_nv_V4: + case Hexagon::JMP_GTUriNotPt_ie_nv_V4: + case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: + + // JMP_GTUrr + case Hexagon::JMP_GTUrrPt_nv_V4: + case Hexagon::JMP_GTUrrPnt_nv_V4: + case Hexagon::JMP_GTUrrNotPt_nv_V4: + case Hexagon::JMP_GTUrrNotPnt_nv_V4: + case Hexagon::JMP_GTUrrPt_ie_nv_V4: + case Hexagon::JMP_GTUrrPnt_ie_nv_V4: + case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: + case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: + + // JMP_GTUrrdn + case Hexagon::JMP_GTUrrdnPt_nv_V4: + case Hexagon::JMP_GTUrrdnPnt_nv_V4: + case Hexagon::JMP_GTUrrdnNotPt_nv_V4: + case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: + case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: + case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: + return true; + } +} + +unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const { + switch(MI->getOpcode()) { + default: llvm_unreachable("Unknown type of instruction."); + // JMP_EQri + case Hexagon::JMP_EQriPt_nv_V4: + return Hexagon::JMP_EQriPt_ie_nv_V4; + case Hexagon::JMP_EQriNotPt_nv_V4: + return Hexagon::JMP_EQriNotPt_ie_nv_V4; + case Hexagon::JMP_EQriPnt_nv_V4: + return Hexagon::JMP_EQriPnt_ie_nv_V4; + case Hexagon::JMP_EQriNotPnt_nv_V4: + return Hexagon::JMP_EQriNotPnt_ie_nv_V4; + + // JMP_EQri -- with -1 + case Hexagon::JMP_EQriPtneg_nv_V4: + return Hexagon::JMP_EQriPtneg_ie_nv_V4; + case Hexagon::JMP_EQriNotPtneg_nv_V4: + return Hexagon::JMP_EQriNotPtneg_ie_nv_V4; + case Hexagon::JMP_EQriPntneg_nv_V4: + return Hexagon::JMP_EQriPntneg_ie_nv_V4; + case Hexagon::JMP_EQriNotPntneg_nv_V4: + return Hexagon::JMP_EQriNotPntneg_ie_nv_V4; + + // JMP_EQrr + case Hexagon::JMP_EQrrPt_nv_V4: + return Hexagon::JMP_EQrrPt_ie_nv_V4; + case Hexagon::JMP_EQrrNotPt_nv_V4: + return Hexagon::JMP_EQrrNotPt_ie_nv_V4; + case Hexagon::JMP_EQrrPnt_nv_V4: + return Hexagon::JMP_EQrrPnt_ie_nv_V4; + case Hexagon::JMP_EQrrNotPnt_nv_V4: + return Hexagon::JMP_EQrrNotPnt_ie_nv_V4; + + // JMP_GTri + case Hexagon::JMP_GTriPt_nv_V4: + return Hexagon::JMP_GTriPt_ie_nv_V4; + case Hexagon::JMP_GTriNotPt_nv_V4: + return Hexagon::JMP_GTriNotPt_ie_nv_V4; + case Hexagon::JMP_GTriPnt_nv_V4: + return Hexagon::JMP_GTriPnt_ie_nv_V4; + case Hexagon::JMP_GTriNotPnt_nv_V4: + return Hexagon::JMP_GTriNotPnt_ie_nv_V4; + + // JMP_GTri -- with -1 + case Hexagon::JMP_GTriPtneg_nv_V4: + return Hexagon::JMP_GTriPtneg_ie_nv_V4; + case Hexagon::JMP_GTriNotPtneg_nv_V4: + return Hexagon::JMP_GTriNotPtneg_ie_nv_V4; + case Hexagon::JMP_GTriPntneg_nv_V4: + return Hexagon::JMP_GTriPntneg_ie_nv_V4; + case Hexagon::JMP_GTriNotPntneg_nv_V4: + return Hexagon::JMP_GTriNotPntneg_ie_nv_V4; + + // JMP_GTrr + case Hexagon::JMP_GTrrPt_nv_V4: + return Hexagon::JMP_GTrrPt_ie_nv_V4; + case Hexagon::JMP_GTrrNotPt_nv_V4: + return Hexagon::JMP_GTrrNotPt_ie_nv_V4; + case Hexagon::JMP_GTrrPnt_nv_V4: + return Hexagon::JMP_GTrrPnt_ie_nv_V4; + case Hexagon::JMP_GTrrNotPnt_nv_V4: + return Hexagon::JMP_GTrrNotPnt_ie_nv_V4; + + // JMP_GTrrdn + case Hexagon::JMP_GTrrdnPt_nv_V4: + return Hexagon::JMP_GTrrdnPt_ie_nv_V4; + case Hexagon::JMP_GTrrdnNotPt_nv_V4: + return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4; + case Hexagon::JMP_GTrrdnPnt_nv_V4: + return Hexagon::JMP_GTrrdnPnt_ie_nv_V4; + case Hexagon::JMP_GTrrdnNotPnt_nv_V4: + return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4; + + // JMP_GTUri + case Hexagon::JMP_GTUriPt_nv_V4: + return Hexagon::JMP_GTUriPt_ie_nv_V4; + case Hexagon::JMP_GTUriNotPt_nv_V4: + return Hexagon::JMP_GTUriNotPt_ie_nv_V4; + case Hexagon::JMP_GTUriPnt_nv_V4: + return Hexagon::JMP_GTUriPnt_ie_nv_V4; + case Hexagon::JMP_GTUriNotPnt_nv_V4: + return Hexagon::JMP_GTUriNotPnt_ie_nv_V4; + + // JMP_GTUrr + case Hexagon::JMP_GTUrrPt_nv_V4: + return Hexagon::JMP_GTUrrPt_ie_nv_V4; + case Hexagon::JMP_GTUrrNotPt_nv_V4: + return Hexagon::JMP_GTUrrNotPt_ie_nv_V4; + case Hexagon::JMP_GTUrrPnt_nv_V4: + return Hexagon::JMP_GTUrrPnt_ie_nv_V4; + case Hexagon::JMP_GTUrrNotPnt_nv_V4: + return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4; + + // JMP_GTUrrdn + case Hexagon::JMP_GTUrrdnPt_nv_V4: + return Hexagon::JMP_GTUrrdnPt_ie_nv_V4; + case Hexagon::JMP_GTUrrdnNotPt_nv_V4: + return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4; + case Hexagon::JMP_GTUrrdnPnt_nv_V4: + return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4; + case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: + return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4; + + case Hexagon::TFR_FI: + return Hexagon::TFR_FI_immext_V4; + + case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDi_indexed_MEM_V4 : + case Hexagon::MEMw_SUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDr_indexed_MEM_V4 : + case Hexagon::MEMw_SUBr_indexed_MEM_V4 : + case Hexagon::MEMw_ANDr_indexed_MEM_V4 : + case Hexagon::MEMw_ORr_indexed_MEM_V4 : + case Hexagon::MEMw_ADDSUBi_MEM_V4 : + case Hexagon::MEMw_ADDi_MEM_V4 : + case Hexagon::MEMw_SUBi_MEM_V4 : + case Hexagon::MEMw_ADDr_MEM_V4 : + case Hexagon::MEMw_SUBr_MEM_V4 : + case Hexagon::MEMw_ANDr_MEM_V4 : + case Hexagon::MEMw_ORr_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDi_indexed_MEM_V4 : + case Hexagon::MEMh_SUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDr_indexed_MEM_V4 : + case Hexagon::MEMh_SUBr_indexed_MEM_V4 : + case Hexagon::MEMh_ANDr_indexed_MEM_V4 : + case Hexagon::MEMh_ORr_indexed_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_MEM_V4 : + case Hexagon::MEMh_ADDi_MEM_V4 : + case Hexagon::MEMh_SUBi_MEM_V4 : + case Hexagon::MEMh_ADDr_MEM_V4 : + case Hexagon::MEMh_SUBr_MEM_V4 : + case Hexagon::MEMh_ANDr_MEM_V4 : + case Hexagon::MEMh_ORr_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDi_indexed_MEM_V4 : + case Hexagon::MEMb_SUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDr_indexed_MEM_V4 : + case Hexagon::MEMb_SUBr_indexed_MEM_V4 : + case Hexagon::MEMb_ANDr_indexed_MEM_V4 : + case Hexagon::MEMb_ORr_indexed_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_MEM_V4 : + case Hexagon::MEMb_ADDi_MEM_V4 : + case Hexagon::MEMb_SUBi_MEM_V4 : + case Hexagon::MEMb_ADDr_MEM_V4 : + case Hexagon::MEMb_SUBr_MEM_V4 : + case Hexagon::MEMb_ANDr_MEM_V4 : + case Hexagon::MEMb_ORr_MEM_V4 : + llvm_unreachable("Needs implementing."); + } +} + +unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const { + switch(MI->getOpcode()) { + default: llvm_unreachable("Unknown type of jump instruction."); + // JMP_EQri + case Hexagon::JMP_EQriPt_ie_nv_V4: + return Hexagon::JMP_EQriPt_nv_V4; + case Hexagon::JMP_EQriNotPt_ie_nv_V4: + return Hexagon::JMP_EQriNotPt_nv_V4; + case Hexagon::JMP_EQriPnt_ie_nv_V4: + return Hexagon::JMP_EQriPnt_nv_V4; + case Hexagon::JMP_EQriNotPnt_ie_nv_V4: + return Hexagon::JMP_EQriNotPnt_nv_V4; + + // JMP_EQri -- with -1 + case Hexagon::JMP_EQriPtneg_ie_nv_V4: + return Hexagon::JMP_EQriPtneg_nv_V4; + case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: + return Hexagon::JMP_EQriNotPtneg_nv_V4; + case Hexagon::JMP_EQriPntneg_ie_nv_V4: + return Hexagon::JMP_EQriPntneg_nv_V4; + case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: + return Hexagon::JMP_EQriNotPntneg_nv_V4; + + // JMP_EQrr + case Hexagon::JMP_EQrrPt_ie_nv_V4: + return Hexagon::JMP_EQrrPt_nv_V4; + case Hexagon::JMP_EQrrNotPt_ie_nv_V4: + return Hexagon::JMP_EQrrNotPt_nv_V4; + case Hexagon::JMP_EQrrPnt_ie_nv_V4: + return Hexagon::JMP_EQrrPnt_nv_V4; + case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: + return Hexagon::JMP_EQrrNotPnt_nv_V4; + + // JMP_GTri + case Hexagon::JMP_GTriPt_ie_nv_V4: + return Hexagon::JMP_GTriPt_nv_V4; + case Hexagon::JMP_GTriNotPt_ie_nv_V4: + return Hexagon::JMP_GTriNotPt_nv_V4; + case Hexagon::JMP_GTriPnt_ie_nv_V4: + return Hexagon::JMP_GTriPnt_nv_V4; + case Hexagon::JMP_GTriNotPnt_ie_nv_V4: + return Hexagon::JMP_GTriNotPnt_nv_V4; + + // JMP_GTri -- with -1 + case Hexagon::JMP_GTriPtneg_ie_nv_V4: + return Hexagon::JMP_GTriPtneg_nv_V4; + case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: + return Hexagon::JMP_GTriNotPtneg_nv_V4; + case Hexagon::JMP_GTriPntneg_ie_nv_V4: + return Hexagon::JMP_GTriPntneg_nv_V4; + case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: + return Hexagon::JMP_GTriNotPntneg_nv_V4; + + // JMP_GTrr + case Hexagon::JMP_GTrrPt_ie_nv_V4: + return Hexagon::JMP_GTrrPt_nv_V4; + case Hexagon::JMP_GTrrNotPt_ie_nv_V4: + return Hexagon::JMP_GTrrNotPt_nv_V4; + case Hexagon::JMP_GTrrPnt_ie_nv_V4: + return Hexagon::JMP_GTrrPnt_nv_V4; + case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: + return Hexagon::JMP_GTrrNotPnt_nv_V4; + + // JMP_GTrrdn + case Hexagon::JMP_GTrrdnPt_ie_nv_V4: + return Hexagon::JMP_GTrrdnPt_nv_V4; + case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: + return Hexagon::JMP_GTrrdnNotPt_nv_V4; + case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: + return Hexagon::JMP_GTrrdnPnt_nv_V4; + case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: + return Hexagon::JMP_GTrrdnNotPnt_nv_V4; + + // JMP_GTUri + case Hexagon::JMP_GTUriPt_ie_nv_V4: + return Hexagon::JMP_GTUriPt_nv_V4; + case Hexagon::JMP_GTUriNotPt_ie_nv_V4: + return Hexagon::JMP_GTUriNotPt_nv_V4; + case Hexagon::JMP_GTUriPnt_ie_nv_V4: + return Hexagon::JMP_GTUriPnt_nv_V4; + case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: + return Hexagon::JMP_GTUriNotPnt_nv_V4; + + // JMP_GTUrr + case Hexagon::JMP_GTUrrPt_ie_nv_V4: + return Hexagon::JMP_GTUrrPt_nv_V4; + case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: + return Hexagon::JMP_GTUrrNotPt_nv_V4; + case Hexagon::JMP_GTUrrPnt_ie_nv_V4: + return Hexagon::JMP_GTUrrPnt_nv_V4; + case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: + return Hexagon::JMP_GTUrrNotPnt_nv_V4; + + // JMP_GTUrrdn + case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: + return Hexagon::JMP_GTUrrdnPt_nv_V4; + case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: + return Hexagon::JMP_GTUrrdnNotPt_nv_V4; + case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: + return Hexagon::JMP_GTUrrdnPnt_nv_V4; + case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: + return Hexagon::JMP_GTUrrdnNotPnt_nv_V4; + } +} + + +bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + // Store Byte + case Hexagon::STrib_nv_V4: + case Hexagon::STrib_indexed_nv_V4: + case Hexagon::STrib_indexed_shl_nv_V4: + case Hexagon::STrib_shl_nv_V4: + case Hexagon::STrib_GP_nv_V4: + case Hexagon::STb_GP_nv_V4: + case Hexagon::POST_STbri_nv_V4: + case Hexagon::STrib_cPt_nv_V4: + case Hexagon::STrib_cdnPt_nv_V4: + case Hexagon::STrib_cNotPt_nv_V4: + case Hexagon::STrib_cdnNotPt_nv_V4: + case Hexagon::STrib_indexed_cPt_nv_V4: + case Hexagon::STrib_indexed_cdnPt_nv_V4: + case Hexagon::STrib_indexed_cNotPt_nv_V4: + case Hexagon::STrib_indexed_cdnNotPt_nv_V4: + case Hexagon::STrib_indexed_shl_cPt_nv_V4: + case Hexagon::STrib_indexed_shl_cdnPt_nv_V4: + case Hexagon::STrib_indexed_shl_cNotPt_nv_V4: + case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4: + case Hexagon::POST_STbri_cPt_nv_V4: + case Hexagon::POST_STbri_cdnPt_nv_V4: + case Hexagon::POST_STbri_cNotPt_nv_V4: + case Hexagon::POST_STbri_cdnNotPt_nv_V4: + case Hexagon::STb_GP_cPt_nv_V4: + case Hexagon::STb_GP_cNotPt_nv_V4: + case Hexagon::STb_GP_cdnPt_nv_V4: + case Hexagon::STb_GP_cdnNotPt_nv_V4: + case Hexagon::STrib_GP_cPt_nv_V4: + case Hexagon::STrib_GP_cNotPt_nv_V4: + case Hexagon::STrib_GP_cdnPt_nv_V4: + case Hexagon::STrib_GP_cdnNotPt_nv_V4: + case Hexagon::STrib_abs_nv_V4: + case Hexagon::STrib_abs_cPt_nv_V4: + case Hexagon::STrib_abs_cdnPt_nv_V4: + case Hexagon::STrib_abs_cNotPt_nv_V4: + case Hexagon::STrib_abs_cdnNotPt_nv_V4: + case Hexagon::STrib_imm_abs_nv_V4: + case Hexagon::STrib_imm_abs_cPt_nv_V4: + case Hexagon::STrib_imm_abs_cdnPt_nv_V4: + case Hexagon::STrib_imm_abs_cNotPt_nv_V4: + case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4: + + // Store Halfword + case Hexagon::STrih_nv_V4: + case Hexagon::STrih_indexed_nv_V4: + case Hexagon::STrih_indexed_shl_nv_V4: + case Hexagon::STrih_shl_nv_V4: + case Hexagon::STrih_GP_nv_V4: + case Hexagon::STh_GP_nv_V4: + case Hexagon::POST_SThri_nv_V4: + case Hexagon::STrih_cPt_nv_V4: + case Hexagon::STrih_cdnPt_nv_V4: + case Hexagon::STrih_cNotPt_nv_V4: + case Hexagon::STrih_cdnNotPt_nv_V4: + case Hexagon::STrih_indexed_cPt_nv_V4: + case Hexagon::STrih_indexed_cdnPt_nv_V4: + case Hexagon::STrih_indexed_cNotPt_nv_V4: + case Hexagon::STrih_indexed_cdnNotPt_nv_V4: + case Hexagon::STrih_indexed_shl_cPt_nv_V4: + case Hexagon::STrih_indexed_shl_cdnPt_nv_V4: + case Hexagon::STrih_indexed_shl_cNotPt_nv_V4: + case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4: + case Hexagon::POST_SThri_cPt_nv_V4: + case Hexagon::POST_SThri_cdnPt_nv_V4: + case Hexagon::POST_SThri_cNotPt_nv_V4: + case Hexagon::POST_SThri_cdnNotPt_nv_V4: + case Hexagon::STh_GP_cPt_nv_V4: + case Hexagon::STh_GP_cNotPt_nv_V4: + case Hexagon::STh_GP_cdnPt_nv_V4: + case Hexagon::STh_GP_cdnNotPt_nv_V4: + case Hexagon::STrih_GP_cPt_nv_V4: + case Hexagon::STrih_GP_cNotPt_nv_V4: + case Hexagon::STrih_GP_cdnPt_nv_V4: + case Hexagon::STrih_GP_cdnNotPt_nv_V4: + case Hexagon::STrih_abs_nv_V4: + case Hexagon::STrih_abs_cPt_nv_V4: + case Hexagon::STrih_abs_cdnPt_nv_V4: + case Hexagon::STrih_abs_cNotPt_nv_V4: + case Hexagon::STrih_abs_cdnNotPt_nv_V4: + case Hexagon::STrih_imm_abs_nv_V4: + case Hexagon::STrih_imm_abs_cPt_nv_V4: + case Hexagon::STrih_imm_abs_cdnPt_nv_V4: + case Hexagon::STrih_imm_abs_cNotPt_nv_V4: + case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4: + + // Store Word + case Hexagon::STriw_nv_V4: + case Hexagon::STriw_indexed_nv_V4: + case Hexagon::STriw_indexed_shl_nv_V4: + case Hexagon::STriw_shl_nv_V4: + case Hexagon::STriw_GP_nv_V4: + case Hexagon::STw_GP_nv_V4: + case Hexagon::POST_STwri_nv_V4: + case Hexagon::STriw_cPt_nv_V4: + case Hexagon::STriw_cdnPt_nv_V4: + case Hexagon::STriw_cNotPt_nv_V4: + case Hexagon::STriw_cdnNotPt_nv_V4: + case Hexagon::STriw_indexed_cPt_nv_V4: + case Hexagon::STriw_indexed_cdnPt_nv_V4: + case Hexagon::STriw_indexed_cNotPt_nv_V4: + case Hexagon::STriw_indexed_cdnNotPt_nv_V4: + case Hexagon::STriw_indexed_shl_cPt_nv_V4: + case Hexagon::STriw_indexed_shl_cdnPt_nv_V4: + case Hexagon::STriw_indexed_shl_cNotPt_nv_V4: + case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4: + case Hexagon::POST_STwri_cPt_nv_V4: + case Hexagon::POST_STwri_cdnPt_nv_V4: + case Hexagon::POST_STwri_cNotPt_nv_V4: + case Hexagon::POST_STwri_cdnNotPt_nv_V4: + case Hexagon::STw_GP_cPt_nv_V4: + case Hexagon::STw_GP_cNotPt_nv_V4: + case Hexagon::STw_GP_cdnPt_nv_V4: + case Hexagon::STw_GP_cdnNotPt_nv_V4: + case Hexagon::STriw_GP_cPt_nv_V4: + case Hexagon::STriw_GP_cNotPt_nv_V4: + case Hexagon::STriw_GP_cdnPt_nv_V4: + case Hexagon::STriw_GP_cdnNotPt_nv_V4: + case Hexagon::STriw_abs_nv_V4: + case Hexagon::STriw_abs_cPt_nv_V4: + case Hexagon::STriw_abs_cdnPt_nv_V4: + case Hexagon::STriw_abs_cNotPt_nv_V4: + case Hexagon::STriw_abs_cdnNotPt_nv_V4: + case Hexagon::STriw_imm_abs_nv_V4: + case Hexagon::STriw_imm_abs_cPt_nv_V4: + case Hexagon::STriw_imm_abs_cdnPt_nv_V4: + case Hexagon::STriw_imm_abs_cNotPt_nv_V4: + case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4: + return true; + } +} + +bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { + switch (MI->getOpcode()) + { + default: return false; + // Load Byte + case Hexagon::POST_LDrib: + case Hexagon::POST_LDrib_cPt: + case Hexagon::POST_LDrib_cNotPt: + case Hexagon::POST_LDrib_cdnPt_V4: + case Hexagon::POST_LDrib_cdnNotPt_V4: + + // Load unsigned byte + case Hexagon::POST_LDriub: + case Hexagon::POST_LDriub_cPt: + case Hexagon::POST_LDriub_cNotPt: + case Hexagon::POST_LDriub_cdnPt_V4: + case Hexagon::POST_LDriub_cdnNotPt_V4: + + // Load halfword + case Hexagon::POST_LDrih: + case Hexagon::POST_LDrih_cPt: + case Hexagon::POST_LDrih_cNotPt: + case Hexagon::POST_LDrih_cdnPt_V4: + case Hexagon::POST_LDrih_cdnNotPt_V4: + + // Load unsigned halfword + case Hexagon::POST_LDriuh: + case Hexagon::POST_LDriuh_cPt: + case Hexagon::POST_LDriuh_cNotPt: + case Hexagon::POST_LDriuh_cdnPt_V4: + case Hexagon::POST_LDriuh_cdnNotPt_V4: + + // Load word + case Hexagon::POST_LDriw: + case Hexagon::POST_LDriw_cPt: + case Hexagon::POST_LDriw_cNotPt: + case Hexagon::POST_LDriw_cdnPt_V4: + case Hexagon::POST_LDriw_cdnNotPt_V4: + + // Load double word + case Hexagon::POST_LDrid: + case Hexagon::POST_LDrid_cPt: + case Hexagon::POST_LDrid_cNotPt: + case Hexagon::POST_LDrid_cdnPt_V4: + case Hexagon::POST_LDrid_cdnNotPt_V4: + + // Store byte + case Hexagon::POST_STbri: + case Hexagon::POST_STbri_cPt: + case Hexagon::POST_STbri_cNotPt: + case Hexagon::POST_STbri_cdnPt_V4: + case Hexagon::POST_STbri_cdnNotPt_V4: + + // Store halfword + case Hexagon::POST_SThri: + case Hexagon::POST_SThri_cPt: + case Hexagon::POST_SThri_cNotPt: + case Hexagon::POST_SThri_cdnPt_V4: + case Hexagon::POST_SThri_cdnNotPt_V4: + + // Store word + case Hexagon::POST_STwri: + case Hexagon::POST_STwri_cPt: + case Hexagon::POST_STwri_cNotPt: + case Hexagon::POST_STwri_cdnPt_V4: + case Hexagon::POST_STwri_cdnNotPt_V4: + + // Store double word + case Hexagon::POST_STdri: + case Hexagon::POST_STdri_cPt: + case Hexagon::POST_STdri_cNotPt: + case Hexagon::POST_STdri_cdnPt_V4: + case Hexagon::POST_STdri_cdnNotPt_V4: + return true; + } +} + +bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { + return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; +} bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { bool isPred = MI->getDesc().isPredicable(); @@ -548,7 +1388,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { case Hexagon::SXTH: case Hexagon::ZXTB: case Hexagon::ZXTH: - return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + return Subtarget.hasV4TOps(); case Hexagon::JMPR: return false; @@ -557,8 +1397,27 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { return true; } +// This function performs the following inversiones: +// +// cPt ---> cNotPt +// cNotPt ---> cPt +// +// however, these inversiones are NOT included: +// +// cdnPt -X-> cdnNotPt +// cdnNotPt -X-> cdnPt +// cPt_nv -X-> cNotPt_nv (new value stores) +// cNotPt_nv -X-> cPt_nv (new value stores) +// +// because only the following transformations are allowed: +// +// cNotPt ---> cdnNotPt +// cPt ---> cdnPt +// cNotPt ---> cNotPt_nv +// cPt ---> cPt_nv unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { switch(Opc) { + default: llvm_unreachable("Unexpected predicated instruction"); case Hexagon::TFR_cPt: return Hexagon::TFR_cNotPt; case Hexagon::TFR_cNotPt: @@ -805,6 +1664,47 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { case Hexagon::STrid_indexed_shl_cNotPt_V4: return Hexagon::STrid_indexed_shl_cPt_V4; + // V4 Store to global address. + case Hexagon::STd_GP_cPt_V4: + return Hexagon::STd_GP_cNotPt_V4; + case Hexagon::STd_GP_cNotPt_V4: + return Hexagon::STd_GP_cPt_V4; + + case Hexagon::STb_GP_cPt_V4: + return Hexagon::STb_GP_cNotPt_V4; + case Hexagon::STb_GP_cNotPt_V4: + return Hexagon::STb_GP_cPt_V4; + + case Hexagon::STh_GP_cPt_V4: + return Hexagon::STh_GP_cNotPt_V4; + case Hexagon::STh_GP_cNotPt_V4: + return Hexagon::STh_GP_cPt_V4; + + case Hexagon::STw_GP_cPt_V4: + return Hexagon::STw_GP_cNotPt_V4; + case Hexagon::STw_GP_cNotPt_V4: + return Hexagon::STw_GP_cPt_V4; + + case Hexagon::STrid_GP_cPt_V4: + return Hexagon::STrid_GP_cNotPt_V4; + case Hexagon::STrid_GP_cNotPt_V4: + return Hexagon::STrid_GP_cPt_V4; + + case Hexagon::STrib_GP_cPt_V4: + return Hexagon::STrib_GP_cNotPt_V4; + case Hexagon::STrib_GP_cNotPt_V4: + return Hexagon::STrib_GP_cPt_V4; + + case Hexagon::STrih_GP_cPt_V4: + return Hexagon::STrih_GP_cNotPt_V4; + case Hexagon::STrih_GP_cNotPt_V4: + return Hexagon::STrih_GP_cPt_V4; + + case Hexagon::STriw_GP_cPt_V4: + return Hexagon::STriw_GP_cNotPt_V4; + case Hexagon::STriw_GP_cNotPt_V4: + return Hexagon::STriw_GP_cPt_V4; + // Load. case Hexagon::LDrid_cPt: return Hexagon::LDrid_cNotPt; @@ -1009,9 +1909,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { return Hexagon::JMP_GTUrrdnNotPnt_nv_V4; case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: return Hexagon::JMP_GTUrrdnPnt_nv_V4; - - default: - llvm_unreachable("Unexpected predicated instruction"); } } @@ -1022,12 +1919,21 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::TFR: return !invertPredicate ? Hexagon::TFR_cPt : Hexagon::TFR_cNotPt; + case Hexagon::TFRI_f: + return !invertPredicate ? Hexagon::TFRI_cPt_f : + Hexagon::TFRI_cNotPt_f; case Hexagon::TFRI: return !invertPredicate ? Hexagon::TFRI_cPt : Hexagon::TFRI_cNotPt; case Hexagon::JMP: return !invertPredicate ? Hexagon::JMP_c : Hexagon::JMP_cNot; + case Hexagon::JMP_EQrrPt_nv_V4: + return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 : + Hexagon::JMP_EQrrNotPt_nv_V4; + case Hexagon::JMP_EQriPt_nv_V4: + return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 : + Hexagon::JMP_EQriNotPt_nv_V4; case Hexagon::ADD_ri: return !invertPredicate ? Hexagon::ADD_ri_cPt : Hexagon::ADD_ri_cNotPt; @@ -1121,6 +2027,46 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::LDriw_indexed_shl_V4: return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 : Hexagon::LDriw_indexed_shl_cNotPt_V4; + + // V4 Load from global address + case Hexagon::LDrid_GP_V4: + return !invertPredicate ? Hexagon::LDrid_GP_cPt_V4 : + Hexagon::LDrid_GP_cNotPt_V4; + case Hexagon::LDrib_GP_V4: + return !invertPredicate ? Hexagon::LDrib_GP_cPt_V4 : + Hexagon::LDrib_GP_cNotPt_V4; + case Hexagon::LDriub_GP_V4: + return !invertPredicate ? Hexagon::LDriub_GP_cPt_V4 : + Hexagon::LDriub_GP_cNotPt_V4; + case Hexagon::LDrih_GP_V4: + return !invertPredicate ? Hexagon::LDrih_GP_cPt_V4 : + Hexagon::LDrih_GP_cNotPt_V4; + case Hexagon::LDriuh_GP_V4: + return !invertPredicate ? Hexagon::LDriuh_GP_cPt_V4 : + Hexagon::LDriuh_GP_cNotPt_V4; + case Hexagon::LDriw_GP_V4: + return !invertPredicate ? Hexagon::LDriw_GP_cPt_V4 : + Hexagon::LDriw_GP_cNotPt_V4; + + case Hexagon::LDd_GP_V4: + return !invertPredicate ? Hexagon::LDd_GP_cPt_V4 : + Hexagon::LDd_GP_cNotPt_V4; + case Hexagon::LDb_GP_V4: + return !invertPredicate ? Hexagon::LDb_GP_cPt_V4 : + Hexagon::LDb_GP_cNotPt_V4; + case Hexagon::LDub_GP_V4: + return !invertPredicate ? Hexagon::LDub_GP_cPt_V4 : + Hexagon::LDub_GP_cNotPt_V4; + case Hexagon::LDh_GP_V4: + return !invertPredicate ? Hexagon::LDh_GP_cPt_V4 : + Hexagon::LDh_GP_cNotPt_V4; + case Hexagon::LDuh_GP_V4: + return !invertPredicate ? Hexagon::LDuh_GP_cPt_V4 : + Hexagon::LDuh_GP_cNotPt_V4; + case Hexagon::LDw_GP_V4: + return !invertPredicate ? Hexagon::LDw_GP_cPt_V4 : + Hexagon::LDw_GP_cNotPt_V4; + // Byte. case Hexagon::POST_STbri: return !invertPredicate ? Hexagon::POST_STbri_cPt : @@ -1182,6 +2128,34 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::STrid_indexed_shl_V4: return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 : Hexagon::STrid_indexed_shl_cNotPt_V4; + + // V4 Store to global address + case Hexagon::STrid_GP_V4: + return !invertPredicate ? Hexagon::STrid_GP_cPt_V4 : + Hexagon::STrid_GP_cNotPt_V4; + case Hexagon::STrib_GP_V4: + return !invertPredicate ? Hexagon::STrib_GP_cPt_V4 : + Hexagon::STrib_GP_cNotPt_V4; + case Hexagon::STrih_GP_V4: + return !invertPredicate ? Hexagon::STrih_GP_cPt_V4 : + Hexagon::STrih_GP_cNotPt_V4; + case Hexagon::STriw_GP_V4: + return !invertPredicate ? Hexagon::STriw_GP_cPt_V4 : + Hexagon::STriw_GP_cNotPt_V4; + + case Hexagon::STd_GP_V4: + return !invertPredicate ? Hexagon::STd_GP_cPt_V4 : + Hexagon::STd_GP_cNotPt_V4; + case Hexagon::STb_GP_V4: + return !invertPredicate ? Hexagon::STb_GP_cPt_V4 : + Hexagon::STb_GP_cNotPt_V4; + case Hexagon::STh_GP_V4: + return !invertPredicate ? Hexagon::STh_GP_cPt_V4 : + Hexagon::STh_GP_cNotPt_V4; + case Hexagon::STw_GP_V4: + return !invertPredicate ? Hexagon::STw_GP_cPt_V4 : + Hexagon::STw_GP_cNotPt_V4; + // Load. case Hexagon::LDrid: return !invertPredicate ? Hexagon::LDrid_cPt : @@ -1201,9 +2175,6 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { case Hexagon::LDriub: return !invertPredicate ? Hexagon::LDriub_cPt : Hexagon::LDriub_cNotPt; - case Hexagon::LDriubit: - return !invertPredicate ? Hexagon::LDriub_cPt : - Hexagon::LDriub_cNotPt; // Load Indexed. case Hexagon::LDrid_indexed: return !invertPredicate ? Hexagon::LDrid_indexed_cPt : @@ -1297,7 +2268,7 @@ PredicateInstruction(MachineInstr *MI, bool HexagonInstrInfo:: isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, unsigned ExtraPredCycles, const BranchProbability &Probability) const { return true; @@ -1323,7 +2294,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } - bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector &Pred) const { @@ -1331,7 +2301,7 @@ HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, MachineOperand MO = MI->getOperand(oper); if (MO.isReg() && MO.isDef()) { const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); - if (RC == Hexagon::PredRegsRegisterClass) { + if (RC == &Hexagon::PredRegsRegClass) { Pred.push_back(MO); return true; } @@ -1373,6 +2343,7 @@ isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { switch (MI->getOpcode()) { + default: return false; case Hexagon::DEALLOC_RET_V4 : case Hexagon::DEALLOC_RET_cPt_V4 : case Hexagon::DEALLOC_RET_cNotPt_V4 : @@ -1382,7 +2353,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { case Hexagon::DEALLOC_RET_cNotdnPt_V4 : return true; } - return false; } @@ -1396,13 +2366,17 @@ isValidOffset(const int Opcode, const int Offset) const { switch(Opcode) { case Hexagon::LDriw: + case Hexagon::LDriw_f: case Hexagon::STriw: + case Hexagon::STriw_f: assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); case Hexagon::LDrid: + case Hexagon::LDrid_f: case Hexagon::STrid: + case Hexagon::STrid_f: assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); @@ -1410,7 +2384,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - case Hexagon::LDrih_ae: assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -1418,9 +2391,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::LDrib: case Hexagon::STrib: case Hexagon::LDriub: - case Hexagon::LDriubit: - case Hexagon::LDrib_ae: - case Hexagon::LDriub_ae: return (Offset >= Hexagon_MEMB_OFFSET_MIN) && (Offset <= Hexagon_MEMB_OFFSET_MAX); @@ -1528,6 +2498,7 @@ bool HexagonInstrInfo:: isMemOp(const MachineInstr *MI) const { switch (MI->getOpcode()) { + default: return false; case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : case Hexagon::MEMw_ADDi_indexed_MEM_V4 : case Hexagon::MEMw_SUBi_indexed_MEM_V4 : @@ -1570,28 +2541,59 @@ isMemOp(const MachineInstr *MI) const { case Hexagon::MEMb_SUBr_MEM_V4 : case Hexagon::MEMb_ANDr_MEM_V4 : case Hexagon::MEMb_ORr_MEM_V4 : - return true; + return true; } - return false; } bool HexagonInstrInfo:: isSpillPredRegOp(const MachineInstr *MI) const { - switch (MI->getOpcode()) - { + switch (MI->getOpcode()) { + default: return false; case Hexagon::STriw_pred : case Hexagon::LDriw_pred : - return true; + return true; + } +} + +bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::CMPEQrr: + case Hexagon::CMPEQri: + case Hexagon::CMPLTrr: + case Hexagon::CMPGTrr: + case Hexagon::CMPGTri: + case Hexagon::CMPLTUrr: + case Hexagon::CMPGTUrr: + case Hexagon::CMPGTUri: + case Hexagon::CMPGEri: + case Hexagon::CMPGEUri: + return true; } - return false; } +bool HexagonInstrInfo:: +isConditionalTransfer (const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: return false; + case Hexagon::TFR_cPt: + case Hexagon::TFR_cNotPt: + case Hexagon::TFRI_cPt: + case Hexagon::TFRI_cNotPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::TFRI_cdnNotPt: + return true; + } +} bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { + default: return false; case Hexagon::ADD_ri_cPt: case Hexagon::ADD_ri_cNotPt: case Hexagon::ADD_rr_cPt: @@ -1619,19 +2621,16 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { case Hexagon::ZXTB_cNotPt_V4: case Hexagon::ZXTH_cPt_V4: case Hexagon::ZXTH_cNotPt_V4: - return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; - - default: - return false; + return QRI.Subtarget.hasV4TOps(); } } - bool HexagonInstrInfo:: isConditionalLoad (const MachineInstr* MI) const { const HexagonRegisterInfo& QRI = getRegisterInfo(); switch (MI->getOpcode()) { + default: return false; case Hexagon::LDrid_cPt : case Hexagon::LDrid_cNotPt : case Hexagon::LDrid_indexed_cPt : @@ -1669,7 +2668,7 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::POST_LDriuh_cNotPt : case Hexagon::POST_LDriub_cPt : case Hexagon::POST_LDriub_cNotPt : - return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + return QRI.Subtarget.hasV4TOps(); case Hexagon::LDrid_indexed_cPt_V4 : case Hexagon::LDrid_indexed_cNotPt_V4 : case Hexagon::LDrid_indexed_shl_cPt_V4 : @@ -1694,12 +2693,136 @@ isConditionalLoad (const MachineInstr* MI) const { case Hexagon::LDriw_indexed_cNotPt_V4 : case Hexagon::LDriw_indexed_shl_cPt_V4 : case Hexagon::LDriw_indexed_shl_cNotPt_V4 : - return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; - default: - return false; + return QRI.Subtarget.hasV4TOps(); } } +// Returns true if an instruction is a conditional store. +// +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +// +// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] +// ^ ^ +// / \ (not OK. it will cause new-value store to be +// / X conditional on p0.new while R2 producer is +// / \ on p0) +// / \. +// p.new store p.old NV store +// [if(p0.new)memw(R0+#0)=R2] [if(p0)memw(R0+#0)=R2.new] +// ^ ^ +// \ / +// \ / +// \ / +// p.old store +// [if (p0)memw(R0+#0)=R2] +// +// The above diagram shows the steps involoved in the conversion of a predicated +// store instruction to its .new predicated new-value form. +// +// The following set of instructions further explains the scenario where +// conditional new-value store becomes invalid when promoted to .new predicate +// form. +// +// { 1) if (p0) r0 = add(r1, r2) +// 2) p0 = cmp.eq(r3, #0) } +// +// 3) if (p0) memb(r1+#0) = r0 --> this instruction can't be grouped with +// the first two instructions because in instr 1, r0 is conditional on old value +// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which +// is not valid for new-value stores. +bool HexagonInstrInfo:: +isConditionalStore (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + default: return false; + case Hexagon::STrib_imm_cPt_V4 : + case Hexagon::STrib_imm_cNotPt_V4 : + case Hexagon::STrib_indexed_shl_cPt_V4 : + case Hexagon::STrib_indexed_shl_cNotPt_V4 : + case Hexagon::STrib_cPt : + case Hexagon::STrib_cNotPt : + case Hexagon::POST_STbri_cPt : + case Hexagon::POST_STbri_cNotPt : + case Hexagon::STrid_indexed_cPt : + case Hexagon::STrid_indexed_cNotPt : + case Hexagon::STrid_indexed_shl_cPt_V4 : + case Hexagon::POST_STdri_cPt : + case Hexagon::POST_STdri_cNotPt : + case Hexagon::STrih_cPt : + case Hexagon::STrih_cNotPt : + case Hexagon::STrih_indexed_cPt : + case Hexagon::STrih_indexed_cNotPt : + case Hexagon::STrih_imm_cPt_V4 : + case Hexagon::STrih_imm_cNotPt_V4 : + case Hexagon::STrih_indexed_shl_cPt_V4 : + case Hexagon::STrih_indexed_shl_cNotPt_V4 : + case Hexagon::POST_SThri_cPt : + case Hexagon::POST_SThri_cNotPt : + case Hexagon::STriw_cPt : + case Hexagon::STriw_cNotPt : + case Hexagon::STriw_indexed_cPt : + case Hexagon::STriw_indexed_cNotPt : + case Hexagon::STriw_imm_cPt_V4 : + case Hexagon::STriw_imm_cNotPt_V4 : + case Hexagon::STriw_indexed_shl_cPt_V4 : + case Hexagon::STriw_indexed_shl_cNotPt_V4 : + case Hexagon::POST_STwri_cPt : + case Hexagon::POST_STwri_cNotPt : + return QRI.Subtarget.hasV4TOps(); + + // V4 global address store before promoting to dot new. + case Hexagon::STrid_GP_cPt_V4 : + case Hexagon::STrid_GP_cNotPt_V4 : + case Hexagon::STrib_GP_cPt_V4 : + case Hexagon::STrib_GP_cNotPt_V4 : + case Hexagon::STrih_GP_cPt_V4 : + case Hexagon::STrih_GP_cNotPt_V4 : + case Hexagon::STriw_GP_cPt_V4 : + case Hexagon::STriw_GP_cNotPt_V4 : + case Hexagon::STd_GP_cPt_V4 : + case Hexagon::STd_GP_cNotPt_V4 : + case Hexagon::STb_GP_cPt_V4 : + case Hexagon::STb_GP_cNotPt_V4 : + case Hexagon::STh_GP_cPt_V4 : + case Hexagon::STh_GP_cNotPt_V4 : + case Hexagon::STw_GP_cPt_V4 : + case Hexagon::STw_GP_cNotPt_V4 : + return QRI.Subtarget.hasV4TOps(); + + // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded + // from the "Conditional Store" list. Because a predicated new value store + // would NOT be promoted to a double dot new store. See diagram below: + // This function returns yes for those stores that are predicated but not + // yet promoted to predicate dot new instructions. + // + // +---------------------+ + // /-----| if (p0) memw(..)=r0 |---------\~ + // || +---------------------+ || + // promote || /\ /\ || promote + // || /||\ /||\ || + // \||/ demote || \||/ + // \/ || || \/ + // +-------------------------+ || +-------------------------+ + // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new | + // +-------------------------+ || +-------------------------+ + // || || || + // || demote \||/ + // promote || \/ NOT possible + // || || /\~ + // \||/ || /||\~ + // \/ || || + // +-----------------------------+ + // | if (p0.new) memw(..)=r0.new | + // +-----------------------------+ + // Double Dot New Store + // + } +} + + + DFAPacketizer *HexagonInstrInfo:: CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 7306870..2bb53f8 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -112,7 +112,7 @@ public: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Cond) const; - virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, const BranchProbability &Probability) const; @@ -160,10 +160,21 @@ public: bool isS8_Immediate(const int value) const; bool isS6_Immediate(const int value) const; + bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const; + bool isConditionalTransfer(const MachineInstr* MI) const; bool isConditionalALU32 (const MachineInstr* MI) const; bool isConditionalLoad (const MachineInstr* MI) const; + bool isConditionalStore(const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; + bool isExtendable(const MachineInstr* MI) const; + bool isExtended(const MachineInstr* MI) const; + bool isPostIncrement(const MachineInstr* MI) const; + bool isNewValueStore(const MachineInstr* MI) const; + bool isNewValueJump(const MachineInstr* MI) const; + bool isNewValueJumpCandidate(const MachineInstr *MI) const; + unsigned getImmExtForm(const MachineInstr* MI) const; + unsigned getNormalBranchForm(const MachineInstr* MI) const; private: int getMatchingCondBranchOpcode(int Opc, bool sense) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index b563ac3..c0c0df6 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -25,7 +25,10 @@ def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; def HasV4T : Predicate<"Subtarget.hasV4TOps()">; def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; +def HasV5T : Predicate<"Subtarget.hasV5TOps()">; +def NoV5T : Predicate<"!Subtarget.hasV5TOps()">; def UseMEMOP : Predicate<"Subtarget.useMemOps()">; +def IEEERndNearV5T : Predicate<"Subtarget.modeIEEERndNear()">; // Addressing modes. def ADDRrr : ComplexPattern; @@ -84,10 +87,12 @@ def symbolLo32 : Operand { multiclass ALU32_rr_ri { def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$b), + (i32 IntRegs:$c)))]>; def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")), - [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>; + [(set (i32 IntRegs:$dst), (OpNode s10Imm:$b, + (i32 IntRegs:$c)))]>; } // Multi-class for compare ops. @@ -95,111 +100,114 @@ let isCompare = 1 in { multiclass CMP64_rr { def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>; } multiclass CMP32_rr { def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; } multiclass CMP32_rr_ri_s10 { def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), s10ImmPred:$c))]>; } multiclass CMP32_rr_ri_u9 { def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>; def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; + [(set (i1 PredRegs:$dst), + (OpNode (i32 IntRegs:$b), u9ImmPred:$c))]>; } -multiclass CMP32_ri_u9 { - def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), +multiclass CMP32_ri_u8 { + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Imm:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; + [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b), + u8ImmPred:$c))]>; } multiclass CMP32_ri_s8 { def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c), !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), - [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>; + [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b), + s8ImmPred:$c))]>; } } //===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html -// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html -// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html -// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html -// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// // ALU32/ALU + //===----------------------------------------------------------------------===// // Add. -let isPredicable = 1 in +let isCommutable = 1, isPredicable = 1 in def ADD_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = add($src1, $src2)", - [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; let isPredicable = 1 in def ADD_ri : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), "$dst = add($src1, #$src2)", - [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), + s16ImmPred:$src2))]>; // Logical operations. let isPredicable = 1 in def XOR_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = xor($src1, $src2)", - [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; -let isPredicable = 1 in +let isCommutable = 1, isPredicable = 1 in def AND_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = and($src1, $src2)", - [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; def OR_ri : ALU32_ri<(outs IntRegs:$dst), - (ins IntRegs:$src1, s8Imm:$src2), + (ins IntRegs:$src1, s10Imm:$src2), "$dst = or($src1, #$src2)", - [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), + s10ImmPred:$src2))]>; def NOT_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = not($src1)", - [(set IntRegs:$dst, (not IntRegs:$src1))]>; + [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>; def AND_ri : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), "$dst = and($src1, #$src2)", - [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), + s10ImmPred:$src2))]>; -let isPredicable = 1 in +let isCommutable = 1, isPredicable = 1 in def OR_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = or($src1, $src2)", - [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; // Negate. def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = neg($src1)", - [(set IntRegs:$dst, (ineg IntRegs:$src1))]>; + [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>; // Nop. let neverHasSideEffects = 1 in def NOP : ALU32_rr<(outs), (ins), @@ -211,13 +219,20 @@ let isPredicable = 1 in def SUB_rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = sub($src1, $src2)", - [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +// Rd32=sub(#s10,Rs32) +def SUB_ri : ALU32_ri<(outs IntRegs:$dst), + (ins s10Imm:$src1, IntRegs:$src2), + "$dst = sub(#$src1, $src2)", + [(set IntRegs:$dst, (sub s10ImmPred:$src1, IntRegs:$src2))]>; // Transfer immediate. -let isReMaterializable = 1, isPredicable = 1 in +let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1), "$dst = #$src1", - [(set IntRegs:$dst, s16ImmPred:$src1)]>; + [(set (i32 IntRegs:$dst), s16ImmPred:$src1)]>; // Transfer register. let neverHasSideEffects = 1, isPredicable = 1 in @@ -225,6 +240,11 @@ def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = $src1", []>; +let neverHasSideEffects = 1, isPredicable = 1 in +def TFR64 : ALU32_ri<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = $src1", + []>; + // Transfer control register. let neverHasSideEffects = 1 in def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), @@ -246,6 +266,12 @@ def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), "$dst = combine($src1, $src2)", []>; +let neverHasSideEffects = 1 in +def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, s8Imm:$src2), + "$dst = combine(#$src1, #$src2)", + []>; + // Mux. def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, @@ -256,48 +282,52 @@ def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = mux($src1, $src2, $src3)", - [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; + [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), + (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))]>; def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, IntRegs:$src3), "$dst = mux($src1, #$src2, $src3)", - [(set IntRegs:$dst, (select PredRegs:$src1, - s8ImmPred:$src2, IntRegs:$src3))]>; + [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), + s8ImmPred:$src2, + (i32 IntRegs:$src3))))]>; def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), "$dst = mux($src1, $src2, #$src3)", - [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, - s8ImmPred:$src3))]>; + [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), + (i32 IntRegs:$src2), + s8ImmPred:$src3)))]>; def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, s8Imm:$src3), "$dst = mux($src1, #$src2, #$src3)", - [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2, - s8ImmPred:$src3))]>; + [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1), + s8ImmPred:$src2, + s8ImmPred:$src3)))]>; // Shift halfword. let isPredicable = 1 in def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = aslh($src1)", - [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>; + [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>; let isPredicable = 1 in def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = asrh($src1)", - [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>; + [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>; // Sign extend. let isPredicable = 1 in def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = sxtb($src1)", - [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>; + [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>; let isPredicable = 1 in def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = sxth($src1)", - [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>; + [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>; // Zero extend. let isPredicable = 1, neverHasSideEffects = 1 in @@ -321,25 +351,25 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), // Conditional add. let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), "if ($src1) $dst = add($src2, #$src3)", []>; let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), "if (!$src1) $dst = add($src2, #$src3)", []>; let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), "if ($src1.new) $dst = add($src2, #$src3)", []>; let neverHasSideEffects = 1, isPredicated = 1 in def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + (ins PredRegs:$src1, IntRegs:$src2, s8Imm:$src3), "if (!$src1.new) $dst = add($src2, #$src3)", []>; @@ -497,7 +527,6 @@ def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), // Conditional transfer. - let neverHasSideEffects = 1, isPredicated = 1 in def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1) $dst = $src2", @@ -510,6 +539,18 @@ def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, []>; let neverHasSideEffects = 1, isPredicated = 1 in +def TFR64_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2), + "if ($src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def TFR64_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2), + "if (!$src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), "if ($src1) $dst = #$src2", []>; @@ -548,25 +589,14 @@ def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>; defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>; defm CMPLT : CMP32_rr<"cmp.lt", setlt>; +defm CMPLTU : CMP32_rr<"cmp.ltu", setult>; defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>; defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>; -defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>; +defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>; //===----------------------------------------------------------------------===// // ALU32/PRED - //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// ALU32/VH + -//===----------------------------------------------------------------------===// -// Vector add halfwords - -// Vector averagehalfwords - -// Vector subtract halfwords -//===----------------------------------------------------------------------===// -// ALU32/VH - -//===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// // ALU64/ALU + @@ -575,8 +605,8 @@ defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>; def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = add($src1, $src2)", - [(set DoubleRegs:$dst, (add DoubleRegs:$src1, - DoubleRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (add (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; // Add halfword. @@ -589,40 +619,93 @@ defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>; def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = and($src1, $src2)", - [(set DoubleRegs:$dst, (and DoubleRegs:$src1, - DoubleRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = or($src1, $src2)", - [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (or (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = xor($src1, $src2)", - [(set DoubleRegs:$dst, (xor DoubleRegs:$src1, - DoubleRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; // Maximum. def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = max($src2, $src1)", - [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setlt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MAXUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = maxu($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setult (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MAXd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = max($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +def MAXUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = maxu($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setult (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; // Minimum. def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = min($src2, $src1)", - [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2, - IntRegs:$src1)), - IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setgt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MINUw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = minu($src2, $src1)", + [(set (i32 IntRegs:$dst), + (i32 (select (i1 (setugt (i32 IntRegs:$src2), + (i32 IntRegs:$src1))), + (i32 IntRegs:$src1), (i32 IntRegs:$src2))))]>; + +def MINd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = min($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; + +def MINUd_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = minu($src2, $src1)", + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setugt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>; // Subtract. def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = sub($src1, $src2)", - [(set DoubleRegs:$dst, (sub DoubleRegs:$src1, - DoubleRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (sub (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))]>; // Subtract halfword. @@ -652,30 +735,6 @@ def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// ALU64/VB + -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// -// ALU64/VB - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ALU64/VH + -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// -// ALU64/VH - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ALU64/VW + -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// -// ALU64/VW - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// // CR + //===----------------------------------------------------------------------===// // Logical reductions on predicates. @@ -687,7 +746,8 @@ def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), // Logical operations on predicates. def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), "$dst = and($src1, $src2)", - [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>; + [(set (i1 PredRegs:$dst), (and (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; let neverHasSideEffects = 1 in def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, @@ -726,15 +786,17 @@ def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), "$dst = not($src1)", - [(set PredRegs:$dst, (not PredRegs:$src1))]>; + [(set (i1 PredRegs:$dst), (not (i1 PredRegs:$src1)))]>; def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), "$dst = or($src1, $src2)", - [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>; + [(set (i1 PredRegs:$dst), (or (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), "$dst = xor($src1, $src2)", - [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>; + [(set (i1 PredRegs:$dst), (xor (i1 PredRegs:$src1), + (i1 PredRegs:$src2)))]>; // User control register transfer. @@ -760,7 +822,7 @@ let isBranch = 1, isTerminator=1, Defs = [PC], def JMP_c : JInst< (outs), (ins PredRegs:$src, brtarget:$offset), "if ($src) jump $offset", - [(brcond PredRegs:$src, bb:$offset)]>; + [(brcond (i1 PredRegs:$src), bb:$offset)]>; } // if (!p0) jump @@ -826,7 +888,7 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; // Jump to address from register. -let isReturn = 1, isTerminator = 1, isBarrier = 1, +let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1, Defs = [PC], Uses = [R31] in { def JMPR: JRInst<(outs), (ins), "jumpr r31", @@ -834,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, } // Jump to address from register. -let isReturn = 1, isTerminator = 1, isBarrier = 1, +let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, Defs = [PC], Uses = [R31] in { def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1), "if ($src1) jumpr r31", @@ -842,7 +904,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, } // Jump to address from register. -let isReturn = 1, isTerminator = 1, isBarrier = 1, +let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1, Defs = [PC], Uses = [R31] in { def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1), "if (!$src1) jumpr r31", @@ -865,96 +927,99 @@ let isPredicable = 1 in def LDrid : LDInst<(outs DoubleRegs:$dst), (ins MEMri:$addr), "$dst = memd($addr)", - [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>; + [(set (i64 DoubleRegs:$dst), (i64 (load ADDRriS11_3:$addr)))]>; let isPredicable = 1, AddedComplexity = 20 in def LDrid_indexed : LDInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s11_3Imm:$offset), - "$dst=memd($src1+#$offset)", - [(set DoubleRegs:$dst, (load (add IntRegs:$src1, - s11_3ImmPred:$offset)))]>; + "$dst = memd($src1+#$offset)", + [(set (i64 DoubleRegs:$dst), + (i64 (load (add (i32 IntRegs:$src1), + s11_3ImmPred:$offset))))]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_GP : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1 in +def LDrid_GP : LDInst2<(outs DoubleRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memd(#$global+$offset)", - []>; + "$dst = memd(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDd_GP : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1 in +def LDd_GP : LDInst2<(outs DoubleRegs:$dst), (ins globaladdress:$global), - "$dst=memd(#$global)", - []>; + "$dst = memd(#$global)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid : LDInst2PI<(outs DoubleRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memd($src1++#$offset)", [], "$src1 = $dst2">; // Load doubleword conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_cPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_cPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memd($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_cNotPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memd($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_indexed_cPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), - "if ($src1) $dst=memd($src2+#$src3)", + "if ($src1) $dst = memd($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), - "if (!$src1) $dst=memd($src2+#$src3)", + "if (!$src1) $dst = memd($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrid_cPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if ($src1) $dst1 = memd($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrid_cNotPt : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if (!$src1) $dst1 = memd($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memd($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_cdnNotPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memd($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_indexed_cdnPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), - "if ($src1.new) $dst=memd($src2+#$src3)", + "if ($src1.new) $dst = memd($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_indexed_cdnNotPt : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), - "if (!$src1.new) $dst=memd($src2+#$src3)", + "if (!$src1.new) $dst = memd($src2+#$src3)", []>; @@ -963,114 +1028,113 @@ let isPredicable = 1 in def LDrib : LDInst<(outs IntRegs:$dst), (ins MEMri:$addr), "$dst = memb($addr)", - [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>; + [(set (i32 IntRegs:$dst), (i32 (sextloadi8 ADDRriS11_0:$addr)))]>; -def LDrib_ae : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), - "$dst = memb($addr)", - [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; +// Load byte any-extend. +def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)), + (i32 (LDrib ADDRriS11_0:$addr)) >; // Indexed load byte. let isPredicable = 1, AddedComplexity = 20 in def LDrib_indexed : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s11_0Imm:$offset), - "$dst=memb($src1+#$offset)", - [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, - s11_0ImmPred:$offset)))]>; - + "$dst = memb($src1+#$offset)", + [(set (i32 IntRegs:$dst), + (i32 (sextloadi8 (add (i32 IntRegs:$src1), + s11_0ImmPred:$offset))))]>; // Indexed load byte any-extend. let AddedComplexity = 20 in -def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_0Imm:$offset), - "$dst=memb($src1+#$offset)", - [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, - s11_0ImmPred:$offset)))]>; +def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), + (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDrib_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memb(#$global+$offset)", - []>; + "$dst = memb(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDb_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDb_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst=memb(#$global)", - []>; + "$dst = memb(#$global)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDub_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDub_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst=memub(#$global)", - []>; + "$dst = memub(#$global)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memb($src1++#$offset)", [], "$src1 = $dst2">; // Load byte conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memb($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memb($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_indexed_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if ($src1) $dst = memb($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if (!$src1) $dst = memb($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrib_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1) $dst1 = memb($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrib_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1) $dst1 = memb($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memb($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memb($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_indexed_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if ($src1.new) $dst = memb($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if (!$src1.new) $dst = memb($src2+#$src3)", []>; @@ -1081,112 +1145,110 @@ let isPredicable = 1 in def LDrih : LDInst<(outs IntRegs:$dst), (ins MEMri:$addr), "$dst = memh($addr)", - [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>; + [(set (i32 IntRegs:$dst), (i32 (sextloadi16 ADDRriS11_1:$addr)))]>; let isPredicable = 1, AddedComplexity = 20 in def LDrih_indexed : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s11_1Imm:$offset), - "$dst=memh($src1+#$offset)", - [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, - s11_1ImmPred:$offset)))] >; + "$dst = memh($src1+#$offset)", + [(set (i32 IntRegs:$dst), + (i32 (sextloadi16 (add (i32 IntRegs:$src1), + s11_1ImmPred:$offset))))]>; -def LDrih_ae : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), - "$dst = memh($addr)", - [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; +def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), + (i32 (LDrih ADDRriS11_1:$addr))>; let AddedComplexity = 20 in -def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_1Imm:$offset), - "$dst=memh($src1+#$offset)", - [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, - s11_1ImmPred:$offset)))] >; +def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), + (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDrih_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memh(#$global+$offset)", - []>; + "$dst = memh(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDh_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDh_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst=memh(#$global)", - []>; + "$dst = memh(#$global)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDuh_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDuh_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst=memuh(#$global)", - []>; - + "$dst = memuh(#$global)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memh($src1++#$offset)", [], "$src1 = $dst2">; // Load halfword conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_indexed_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if ($src1) $dst = memh($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if (!$src1) $dst = memh($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrih_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1) $dst1 = memh($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrih_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1) $dst1 = memh($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_indexed_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if ($src1.new) $dst = memh($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if (!$src1.new) $dst = memh($src2+#$src3)", []>; @@ -1196,113 +1258,96 @@ let isPredicable = 1 in def LDriub : LDInst<(outs IntRegs:$dst), (ins MEMri:$addr), "$dst = memub($addr)", - [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>; + [(set (i32 IntRegs:$dst), (i32 (zextloadi8 ADDRriS11_0:$addr)))]>; -let isPredicable = 1 in -def LDriubit : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), - "$dst = memub($addr)", - [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>; +def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), + (i32 (LDriub ADDRriS11_0:$addr))>; let isPredicable = 1, AddedComplexity = 20 in def LDriub_indexed : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s11_0Imm:$offset), - "$dst=memub($src1+#$offset)", - [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, - s11_0ImmPred:$offset)))]>; - -let AddedComplexity = 20 in -def LDriubit_indexed : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_0Imm:$offset), - "$dst=memub($src1+#$offset)", - [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1, - s11_0ImmPred:$offset)))]>; - -def LDriub_ae : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), - "$dst = memub($addr)", - [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; - + "$dst = memub($src1+#$offset)", + [(set (i32 IntRegs:$dst), + (i32 (zextloadi8 (add (i32 IntRegs:$src1), + s11_0ImmPred:$offset))))]>; let AddedComplexity = 20 in -def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_0Imm:$offset), - "$dst=memub($src1+#$offset)", - [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, - s11_0ImmPred:$offset)))]>; +def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), + (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDriub_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memub(#$global+$offset)", - []>; + "$dst = memub(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memub($src1++#$offset)", [], "$src1 = $dst2">; // Load unsigned byte conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memub($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memub($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_indexed_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if ($src1) $dst = memub($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if (!$src1) $dst = memub($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriub_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1) $dst1 = memub($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriub_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1) $dst1 = memub($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memub($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memub($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_indexed_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if ($src1.new) $dst = memub($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), "if (!$src1.new) $dst = memub($src2+#$src3)", []>; @@ -1312,102 +1357,90 @@ let isPredicable = 1 in def LDriuh : LDInst<(outs IntRegs:$dst), (ins MEMri:$addr), "$dst = memuh($addr)", - [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>; + [(set (i32 IntRegs:$dst), (i32 (zextloadi16 ADDRriS11_1:$addr)))]>; // Indexed load unsigned halfword. let isPredicable = 1, AddedComplexity = 20 in def LDriuh_indexed : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s11_1Imm:$offset), - "$dst=memuh($src1+#$offset)", - [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, - s11_1ImmPred:$offset)))]>; - -def LDriuh_ae : LDInst<(outs IntRegs:$dst), - (ins MEMri:$addr), - "$dst = memuh($addr)", - [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; + "$dst = memuh($src1+#$offset)", + [(set (i32 IntRegs:$dst), + (i32 (zextloadi16 (add (i32 IntRegs:$src1), + s11_1ImmPred:$offset))))]>; - -// Indexed load unsigned halfword any-extend. -let AddedComplexity = 20 in -def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, s11_1Imm:$offset), - "$dst=memuh($src1+#$offset)", - [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, - s11_1ImmPred:$offset)))] >; - -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDriuh_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memuh(#$global+$offset)", - []>; + "$dst = memuh(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memuh($src1++#$offset)", [], "$src1 = $dst2">; // Load unsigned halfword conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memuh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memuh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_indexed_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if ($src1) $dst = memuh($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if (!$src1) $dst = memuh($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriuh_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1) $dst1 = memuh($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriuh_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1) $dst1 = memuh($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memuh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memuh($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_indexed_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if ($src1.new) $dst = memuh($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), "if (!$src1.new) $dst = memuh($src2+#$src3)", []>; @@ -1417,10 +1450,10 @@ def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), let isPredicable = 1 in def LDriw : LDInst<(outs IntRegs:$dst), (ins MEMri:$addr), "$dst = memw($addr)", - [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>; + [(set IntRegs:$dst, (i32 (load ADDRriS11_2:$addr)))]>; // Load predicate. -let mayLoad = 1, Defs = [R10,R11] in +let Defs = [R10,R11,D5], neverHasSideEffects = 1 in def LDriw_pred : LDInst<(outs PredRegs:$dst), (ins MEMri:$addr), "Error; should not emit", @@ -1430,24 +1463,26 @@ def LDriw_pred : LDInst<(outs PredRegs:$dst), let isPredicable = 1, AddedComplexity = 20 in def LDriw_indexed : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s11_2Imm:$offset), - "$dst=memw($src1+#$offset)", - [(set IntRegs:$dst, (load (add IntRegs:$src1, - s11_2ImmPred:$offset)))]>; + "$dst = memw($src1+#$offset)", + [(set IntRegs:$dst, (i32 (load (add IntRegs:$src1, + s11_2ImmPred:$offset))))]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDriw_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global, u16Imm:$offset), - "$dst=memw(#$global+$offset)", - []>; + "$dst = memw(#$global+$offset)", + []>, + Requires<[NoV4T]>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDw_GP : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1 in +def LDw_GP : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), - "$dst=memw(#$global)", - []>; + "$dst = memw(#$global)", + []>, + Requires<[NoV4T]>; -let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), +let isPredicable = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw : LDInst2PI<(outs IntRegs:$dst, IntRegs:$dst2), (ins IntRegs:$src1, s4Imm:$offset), "$dst = memw($src1++#$offset)", [], @@ -1455,71 +1490,71 @@ def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), // Load word conditionally. -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1) $dst = memw($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1) $dst = memw($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_indexed_cPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), - "if ($src1) $dst=memw($src2+#$src3)", + "if ($src1) $dst = memw($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), - "if (!$src1) $dst=memw($src2+#$src3)", + "if (!$src1) $dst = memw($src2+#$src3)", []>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriw_cPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if ($src1) $dst1 = memw($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in -def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriw_cNotPt : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if (!$src1) $dst1 = memw($src2++#$src3)", [], "$src2 = $dst2">; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if ($src1.new) $dst = memw($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, MEMri:$addr), "if (!$src1.new) $dst = memw($addr)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_indexed_cdnPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), - "if ($src1.new) $dst=memw($src2+#$src3)", + "if ($src1.new) $dst = memw($src2+#$src3)", []>; -let mayLoad = 1, neverHasSideEffects = 1 in -def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), - "if (!$src1.new) $dst=memw($src2+#$src3)", + "if (!$src1.new) $dst = memw($src2+#$src3)", []>; // Deallocate stack frame. let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { - def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1), + def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1), "deallocframe", []>; } @@ -1550,13 +1585,14 @@ let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { // Rd=+mpyi(Rs,#u8) def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst =+ mpyi($src1, #$src2)", - [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + u8ImmPred:$src2))]>; // Rd=-mpyi(Rs,#u8) def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), "$dst =- mpyi($src1, #$src2)", - [(set IntRegs:$dst, - (mul IntRegs:$src1, n8ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + n8ImmPred:$src2))]>; // Rd=mpyi(Rs,#m9) // s9 is NOT the same as m9 - but it works.. so far. @@ -1564,35 +1600,40 @@ def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), // depending on the value of m9. See Arch Spec. def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), "$dst = mpyi($src1, #$src2)", - [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + s9ImmPred:$src2))]>; // Rd=mpyi(Rs,Rt) def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpyi($src1, $src2)", - [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; // Rx+=mpyi(Rs,#u8) def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), "$dst += mpyi($src2, #$src3)", - [(set IntRegs:$dst, - (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))], + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), u8ImmPred:$src3), + (i32 IntRegs:$src1)))], "$src1 = $dst">; // Rx+=mpyi(Rs,Rt) def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpyi($src2, $src3)", - [(set IntRegs:$dst, - (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))], + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))], "$src1 = $dst">; // Rx-=mpyi(Rs,#u8) def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), "$dst -= mpyi($src2, #$src3)", - [(set IntRegs:$dst, - (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))], + [(set (i32 IntRegs:$dst), + (sub (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + u8ImmPred:$src3)))], "$src1 = $dst">; // Multiply and use upper result. @@ -1601,27 +1642,30 @@ def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), // Rd=mpy(Rs,Rt) def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpy($src1, $src2)", - [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (mulhs (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; // Rd=mpy(Rs,Rt):rnd // Rd=mpyu(Rs,Rt) def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpyu($src1, $src2)", - [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (mulhu (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; // Multiply and use full result. // Rdd=mpyu(Rs,Rt) def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpyu($src1, $src2)", - [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)), - (i64 (anyext IntRegs:$src2))))]>; + [(set (i64 DoubleRegs:$dst), + (mul (i64 (anyext (i32 IntRegs:$src1))), + (i64 (anyext (i32 IntRegs:$src2)))))]>; // Rdd=mpy(Rs,Rt) def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = mpy($src1, $src2)", - [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)), - (i64 (sext IntRegs:$src2))))]>; - + [(set (i64 DoubleRegs:$dst), + (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2)))))]>; // Multiply and accumulate, use full result. // Rxx[+-]=mpy(Rs,Rt) @@ -1629,18 +1673,20 @@ def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpy($src2, $src3)", - [(set DoubleRegs:$dst, - (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))), - DoubleRegs:$src1))], + [(set (i64 DoubleRegs:$dst), + (add (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3)))), + (i64 DoubleRegs:$src1)))], "$src1 = $dst">; // Rxx-=mpy(Rs,Rt) def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst -= mpy($src2, $src3)", - [(set DoubleRegs:$dst, - (sub DoubleRegs:$src1, - (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))], + [(set (i64 DoubleRegs:$dst), + (sub (i64 DoubleRegs:$src1), + (mul (i64 (sext (i32 IntRegs:$src2))), + (i64 (sext (i32 IntRegs:$src3))))))], "$src1 = $dst">; // Rxx[+-]=mpyu(Rs,Rt) @@ -1648,47 +1694,52 @@ def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpyu($src2, $src3)", - [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)), - (i64 (anyext IntRegs:$src3))), - DoubleRegs:$src1))],"$src1 = $dst">; + [(set (i64 DoubleRegs:$dst), + (add (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3)))), + (i64 DoubleRegs:$src1)))], "$src1 = $dst">; // Rxx-=mpyu(Rs,Rt) def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += mpyu($src2, $src3)", - [(set DoubleRegs:$dst, - (sub DoubleRegs:$src1, - (mul (i64 (anyext IntRegs:$src2)), - (i64 (anyext IntRegs:$src3)))))], + [(set (i64 DoubleRegs:$dst), + (sub (i64 DoubleRegs:$src1), + (mul (i64 (anyext (i32 IntRegs:$src2))), + (i64 (anyext (i32 IntRegs:$src3))))))], "$src1 = $dst">; def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst += add($src2, $src3)", - [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3), - IntRegs:$src1))], + [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), + (i32 IntRegs:$src3)), + (i32 IntRegs:$src1)))], "$src1 = $dst">; def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, s8Imm:$src3), "$dst += add($src2, #$src3)", - [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3), - IntRegs:$src1))], + [(set (i32 IntRegs:$dst), (add (add (i32 IntRegs:$src2), + s8ImmPred:$src3), + (i32 IntRegs:$src1)))], "$src1 = $dst">; def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst -= add($src2, $src3)", - [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2, - IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (sub (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">; def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, IntRegs:$src2, s8Imm:$src3), "$dst -= add($src2, #$src3)", - [(set IntRegs:$dst, (sub IntRegs:$src1, - (add IntRegs:$src2, s8ImmPred:$src3)))], + [(set (i32 IntRegs:$dst), (sub (i32 IntRegs:$src1), + (add (i32 IntRegs:$src2), + s8ImmPred:$src3)))], "$src1 = $dst">; //===----------------------------------------------------------------------===// @@ -1731,57 +1782,70 @@ let isPredicable = 1 in def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), "memd($addr) = $src1", - [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>; + [(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr)]>; // Indexed store double word. let AddedComplexity = 10, isPredicable = 1 in def STrid_indexed : STInst<(outs), (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), "memd($src1+#$src2) = $src3", - [(store DoubleRegs:$src3, - (add IntRegs:$src1, s11_3ImmPred:$src2))]>; + [(store (i64 DoubleRegs:$src3), + (add (i32 IntRegs:$src1), s11_3ImmPred:$src2))]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STrid_GP : STInst<(outs), +let neverHasSideEffects = 1 in +def STrid_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), "memd(#$global+$offset) = $src", - []>; + []>, + Requires<[NoV4T]>; + +let neverHasSideEffects = 1 in +def STd_GP : STInst2<(outs), + (ins globaladdress:$global, DoubleRegs:$src), + "memd(#$global) = $src", + []>, + Requires<[NoV4T]>; let hasCtrlDep = 1, isPredicable = 1 in def POST_STdri : STInstPI<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset), "memd($src2++#$offset) = $src1", [(set IntRegs:$dst, - (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))], + (post_store (i64 DoubleRegs:$src1), (i32 IntRegs:$src2), + s4_3ImmPred:$offset))], "$src2 = $dst">; // Store doubleword conditionally. // if ([!]Pv) memd(Rs+#u6:3)=Rtt // if (Pv) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_cPt : STInst<(outs), +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_cPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), "if ($src1) memd($addr) = $src2", []>; // if (!Pv) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_cNotPt : STInst<(outs), +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_cNotPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), "if (!$src1) memd($addr) = $src2", []>; // if (Pv) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_cPt : STInst<(outs), +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_cPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, DoubleRegs:$src4), "if ($src1) memd($src2+#$src3) = $src4", []>; // if (!Pv) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_cNotPt : STInst<(outs), +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_cNotPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, DoubleRegs:$src4), "if (!$src1) memd($src2+#$src3) = $src4", @@ -1789,8 +1853,9 @@ def STrid_indexed_cNotPt : STInst<(outs), // if ([!]Pv) memd(Rx++#s4:3)=Rtt // if (Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst), +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def POST_STdri_cPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, s4_3Imm:$offset), "if ($src1) memd($src3++#$offset) = $src2", @@ -1798,9 +1863,9 @@ def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst), "$src3 = $dst">; // if (!Pv) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1, +let AddedComplexity = 10, neverHasSideEffects = 1, isPredicated = 1, isPredicated = 1 in -def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst), +def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, s4_3Imm:$offset), "if (!$src1) memd($src3++#$offset) = $src2", @@ -1814,27 +1879,30 @@ let isPredicable = 1 in def STrib : STInst<(outs), (ins MEMri:$addr, IntRegs:$src1), "memb($addr) = $src1", - [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>; + [(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr)]>; let AddedComplexity = 10, isPredicable = 1 in def STrib_indexed : STInst<(outs), (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3), "memb($src1+#$src2) = $src3", - [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1, - s11_0ImmPred:$src2))]>; + [(truncstorei8 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1), + s11_0ImmPred:$src2))]>; // memb(gp+#u16:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_GP : STInst<(outs), +let neverHasSideEffects = 1 in +def STrib_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), "memb(#$global+$offset) = $src", - []>; + []>, + Requires<[NoV4T]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STb_GP : STInst<(outs), +// memb(#global)=Rt +let neverHasSideEffects = 1 in +def STb_GP : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memb(#$global) = $src", - []>; + []>, + Requires<[NoV4T]>; // memb(Rx++#s4:0)=Rt let hasCtrlDep = 1, isPredicable = 1 in @@ -1843,51 +1911,51 @@ def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, s4Imm:$offset), "memb($src2++#$offset) = $src1", [(set IntRegs:$dst, - (post_truncsti8 IntRegs:$src1, IntRegs:$src2, + (post_truncsti8 (i32 IntRegs:$src1), (i32 IntRegs:$src2), s4_0ImmPred:$offset))], "$src2 = $dst">; // Store byte conditionally. // if ([!]Pv) memb(Rs+#u6:0)=Rt // if (Pv) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_cPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memb($addr) = $src2", []>; // if (!Pv) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_cNotPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memb($addr) = $src2", []>; // if (Pv) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_indexed_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_indexed_cPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if ($src1) memb($src2+#$src3) = $src4", []>; // if (!Pv) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_indexed_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_indexed_cNotPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if (!$src1) memb($src2+#$src3) = $src4", []>; // if ([!]Pv) memb(Rx++#s4:0)=Rt // if (Pv) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_STbri_cPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if ($src1) memb($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if (!$src1) memb($src3++#$offset) = $src2", [],"$src3 = $dst">; @@ -1899,27 +1967,29 @@ let isPredicable = 1 in def STrih : STInst<(outs), (ins MEMri:$addr, IntRegs:$src1), "memh($addr) = $src1", - [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>; + [(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr)]>; let AddedComplexity = 10, isPredicable = 1 in def STrih_indexed : STInst<(outs), (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3), "memh($src1+#$src2) = $src3", - [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1, - s11_1ImmPred:$src2))]>; + [(truncstorei16 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1), + s11_1ImmPred:$src2))]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_GP : STInst<(outs), +let neverHasSideEffects = 1 in +def STrih_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), "memh(#$global+$offset) = $src", - []>; + []>, + Requires<[NoV4T]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STh_GP : STInst<(outs), +let neverHasSideEffects = 1 in +def STh_GP : STInst2<(outs), (ins globaladdress:$global, IntRegs:$src), "memh(#$global) = $src", - []>; + []>, + Requires<[NoV4T]>; // memh(Rx++#s4:1)=Rt.H // memh(Rx++#s4:1)=Rt @@ -1928,51 +1998,51 @@ def POST_SThri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), "memh($src2++#$offset) = $src1", [(set IntRegs:$dst, - (post_truncsti16 IntRegs:$src1, IntRegs:$src2, + (post_truncsti16 (i32 IntRegs:$src1), (i32 IntRegs:$src2), s4_1ImmPred:$offset))], "$src2 = $dst">; // Store halfword conditionally. // if ([!]Pv) memh(Rs+#u6:1)=Rt // if (Pv) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_cPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memh($addr) = $src2", []>; // if (!Pv) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_cNotPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memh($addr) = $src2", []>; // if (Pv) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_indexed_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_indexed_cPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if ($src1) memh($src2+#$src3) = $src4", []>; // if (!Pv) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_indexed_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_indexed_cNotPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if (!$src1) memh($src2+#$src3) = $src4", []>; // if ([!]Pv) memh(Rx++#s4:1)=Rt // if (Pv) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_SThri_cPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if ($src1) memh($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_SThri_cNotPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if (!$src1) memh($src3++#$offset) = $src2", [],"$src3 = $dst">; @@ -1980,8 +2050,8 @@ def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst), // Store word. // Store predicate. -let Defs = [R10,R11] in -def STriw_pred : STInst<(outs), +let Defs = [R10,R11,D5], neverHasSideEffects = 1 in +def STriw_pred : STInst2<(outs), (ins MEMri:$addr, PredRegs:$src1), "Error; should not emit", []>; @@ -1991,69 +2061,79 @@ let isPredicable = 1 in def STriw : STInst<(outs), (ins MEMri:$addr, IntRegs:$src1), "memw($addr) = $src1", - [(store IntRegs:$src1, ADDRriS11_2:$addr)]>; + [(store (i32 IntRegs:$src1), ADDRriS11_2:$addr)]>; let AddedComplexity = 10, isPredicable = 1 in def STriw_indexed : STInst<(outs), (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), "memw($src1+#$src2) = $src3", - [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>; + [(store (i32 IntRegs:$src3), + (add (i32 IntRegs:$src1), s11_2ImmPred:$src2))]>; -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_GP : STInst<(outs), +let neverHasSideEffects = 1 in +def STriw_GP : STInst2<(outs), (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), "memw(#$global+$offset) = $src", - []>; + []>, + Requires<[NoV4T]>; + +let neverHasSideEffects = 1 in +def STw_GP : STInst2<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memw(#$global) = $src", + []>, + Requires<[NoV4T]>; let hasCtrlDep = 1, isPredicable = 1 in def POST_STwri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), "memw($src2++#$offset) = $src1", [(set IntRegs:$dst, - (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))], + (post_store (i32 IntRegs:$src1), (i32 IntRegs:$src2), + s4_2ImmPred:$offset))], "$src2 = $dst">; // Store word conditionally. // if ([!]Pv) memw(Rs+#u6:2)=Rt // if (Pv) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_cPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memw($addr) = $src2", []>; // if (!Pv) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_cNotPt : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memw($addr) = $src2", []>; // if (Pv) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_indexed_cPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_indexed_cPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if ($src1) memw($src2+#$src3) = $src4", []>; // if (!Pv) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_indexed_cNotPt : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_indexed_cNotPt : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if (!$src1) memw($src2+#$src3) = $src4", []>; // if ([!]Pv) memw(Rx++#s4:2)=Rt // if (Pv) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_STwri_cPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), "if ($src1) memw($src3++#$offset) = $src2", [],"$src3 = $dst">; // if (!Pv) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in -def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, isPredicated = 1 in +def POST_STwri_cNotPt : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), "if (!$src1) memw($src3++#$offset) = $src2", [],"$src3 = $dst">; @@ -2062,7 +2142,7 @@ def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst), // Allocate stack frame. let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { - def ALLOCFRAME : STInst<(outs), + def ALLOCFRAME : STInst2<(outs), (ins i32imm:$amt), "allocframe(#$amt)", []>; @@ -2077,13 +2157,13 @@ let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { // Logical NOT. def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), "$dst = not($src1)", - [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>; + [(set (i64 DoubleRegs:$dst), (not (i64 DoubleRegs:$src1)))]>; // Sign extend word to doubleword. def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), "$dst = sxtw($src1)", - [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>; + [(set (i64 DoubleRegs:$dst), (sext (i32 IntRegs:$src1)))]>; //===----------------------------------------------------------------------===// // STYPE/ALU - //===----------------------------------------------------------------------===// @@ -2091,37 +2171,58 @@ def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // STYPE/BIT + //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// STYPE/BIT - -//===----------------------------------------------------------------------===// +// clrbit. +def CLRBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = clrbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1), + (not + (shl 1, u5ImmPred:$src2))))]>; +def CLRBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = clrbit($src1, #$src2)", + []>; -//===----------------------------------------------------------------------===// -// STYPE/COMPLEX + -//===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// STYPE/COMPLEX - -//===----------------------------------------------------------------------===// +// Map from r0 = and(r1, 2147483647) to r0 = clrbit(r1, #31). +def : Pat <(and (i32 IntRegs:$src1), 2147483647), + (CLRBIT_31 (i32 IntRegs:$src1), 31)>; -//===----------------------------------------------------------------------===// -// STYPE/PERM + -//===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// STYPE/PERM - -//===----------------------------------------------------------------------===// +// setbit. +def SETBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1), + (shl 1, u5ImmPred:$src2)))]>; + +// Map from r0 = or(r1, -2147483648) to r0 = setbit(r1, #31). +def SETBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + []>; + +def : Pat <(or (i32 IntRegs:$src1), -2147483648), + (SETBIT_31 (i32 IntRegs:$src1), 31)>; + +// togglebit. +def TOGBIT : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = setbit($src1, #$src2)", + [(set (i32 IntRegs:$dst), (xor (i32 IntRegs:$src1), + (shl 1, u5ImmPred:$src2)))]>; + +// Map from r0 = xor(r1, -2147483648) to r0 = togglebit(r1, #31). +def TOGBIT_31 : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = togglebit($src1, #$src2)", + []>; + +def : Pat <(xor (i32 IntRegs:$src1), -2147483648), + (TOGBIT_31 (i32 IntRegs:$src1), 31)>; -//===----------------------------------------------------------------------===// -// STYPE/PRED + -//===----------------------------------------------------------------------===// // Predicate transfer. let neverHasSideEffects = 1 in def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1), - "$dst = $src1 // Should almost never emit this", + "$dst = $src1 /* Should almost never emit this. */", []>; def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), - "$dst = $src1 // Should almost never emit!", - [(set PredRegs:$dst, (trunc IntRegs:$src1))]>; + "$dst = $src1 /* Should almost never emit this. */", + [(set (i1 PredRegs:$dst), (trunc (i32 IntRegs:$src1)))]>; //===----------------------------------------------------------------------===// // STYPE/PRED - //===----------------------------------------------------------------------===// @@ -2132,75 +2233,85 @@ def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), // Shift by immediate. def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), "$dst = asr($src1, #$src2)", - [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), "$dst = asr($src1, #$src2)", - [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>; + [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), "$dst = asl($src1, #$src2)", - [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; + +def ASLd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asl($src1, #$src2)", + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), "$dst = lsr($src1, #$src2)", - [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), + u5ImmPred:$src2))]>; def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), "$dst = lsr($src1, #$src2)", - [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>; - -def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, - DoubleRegs:$src2, - u6Imm:$src3), - "$dst += lsr($src2, #$src3)", - [(set DoubleRegs:$dst, (add DoubleRegs:$src1, - (srl DoubleRegs:$src2, - u6ImmPred:$src3)))], - "$src1 = $dst">; - -// Shift by immediate and accumulate. -def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, - IntRegs:$src2, - IntRegs:$src3), - "$dst += asr($src2, $src3)", - [], "$src1 = $dst">; + [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), + u6ImmPred:$src2))]>; // Shift by immediate and add. +let AddedComplexity = 100 in def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), "$dst = addasl($src1, $src2, #$src3)", - [(set IntRegs:$dst, (add IntRegs:$src1, - (shl IntRegs:$src2, - u3ImmPred:$src3)))]>; + [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u3ImmPred:$src3)))]>; // Shift by register. def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = asl($src1, $src2)", - [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = asr($src1, $src2)", - [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (sra (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; +def LSL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = lsl($src1, $src2)", + [(set (i32 IntRegs:$dst), (shl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = lsr($src1, $src2)", - [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>; + [(set (i32 IntRegs:$dst), (srl (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))]>; + +def ASLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + "$dst = asl($src1, $src2)", + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), "$dst = lsl($src1, $src2)", - [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (shl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), "$dst = asr($src1, $src2)", - [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (sra (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), "$dst = lsr($src1, $src2)", - [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>; + [(set (i64 DoubleRegs:$dst), (srl (i64 DoubleRegs:$src1), + (i32 IntRegs:$src2)))]>; //===----------------------------------------------------------------------===// // STYPE/SHIFT - @@ -2231,8 +2342,8 @@ def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, [SDNPHasChain]>; -let hasSideEffects = 1 in -def BARRIER : STInst<(outs), (ins), +let hasSideEffects = 1, isHexagonSolo = 1 in +def BARRIER : SYSInst<(outs), (ins), "barrier", [(HexagonBARRIER)]>; @@ -2244,47 +2355,50 @@ def BARRIER : STInst<(outs), (ins), let isReMaterializable = 1 in def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), "$dst = #$src1", - [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>; + [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>; // Pseudo instruction to encode a set of conditional transfers. // This instruction is used instead of a mux and trades-off codesize // for performance. We conduct this transformation optimistically in // the hope that these instructions get promoted to dot-new transfers. -let AddedComplexity = 100 in +let AddedComplexity = 100, isPredicated = 1 in def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "Error; should not emit", - [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, - IntRegs:$src3))]>; - -let AddedComplexity = 100 in + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), + (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))]>; +let AddedComplexity = 100, isPredicated = 1 in def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3), "Error; should not emit", - [(set IntRegs:$dst, - (select PredRegs:$src1, IntRegs:$src2, s12ImmPred:$src3))]>; + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2), + s12ImmPred:$src3)))]>; -let AddedComplexity = 100 in +let AddedComplexity = 100, isPredicated = 1 in def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3), "Error; should not emit", - [(set IntRegs:$dst, - (select PredRegs:$src1, s12ImmPred:$src2, IntRegs:$src3))]>; + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, + (i32 IntRegs:$src3))))]>; -let AddedComplexity = 100 in +let AddedComplexity = 100, isPredicated = 1 in def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), "Error; should not emit", - [(set IntRegs:$dst, (select PredRegs:$src1, - s12ImmPred:$src2, - s12ImmPred:$src3))]>; + [(set (i32 IntRegs:$dst), + (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2, + s12ImmPred:$src3)))]>; // Generate frameindex addresses. let isReMaterializable = 1 in def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), "$dst = add($src1)", - [(set IntRegs:$dst, ADDRri:$src1)]>; + [(set (i32 IntRegs:$dst), ADDRri:$src1)]>; // // CR - Type. @@ -2303,69 +2417,116 @@ def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, Defs = [PC, LC0], Uses = [SA0, LC0] in { -def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset), +def ENDLOOP0 : Marker<(outs), (ins brtarget:$offset), ":endloop0", []>; } // Support for generating global address. // Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, - SDTCisPtrTy<0>]>; +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>, + SDTCisVT<1, i32>, + SDTCisPtrTy<0>]>; def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; +// HI/LO Instructions +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.l = #LO($global)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HI : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst.h = #HI($global)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), + "$dst.l = #LO($imm_value)", + []>; + + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value), + "$dst.h = #HI($imm_value)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst.l = #LO($jt)", + []>; + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst.h = #HI($jt)", + []>; + + +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def LO_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.l = #LO($label)", + []>; + +let isReMaterializable = 1, isMoveImm = 1 , neverHasSideEffects = 1 in +def HI_label : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst.h = #HI($label)", + []>; + // This pattern is incorrect. When we add small data, we should change // this pattern to use memw(#foo). +// This is for sdata. let isMoveImm = 1 in def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", - [(set IntRegs:$dst, - (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; + [(set (i32 IntRegs:$dst), + (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; +// This is for non-sdata. let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), +def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", - [(set IntRegs:$dst, - (HexagonCONST32 tglobaladdr:$global))]>; + [(set (i32 IntRegs:$dst), + (HexagonCONST32 tglobaladdr:$global))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), +def CONST32_set_jt : LDInst2<(outs IntRegs:$dst), (ins jumptablebase:$jt), "$dst = CONST32(#$jt)", - [(set IntRegs:$dst, - (HexagonCONST32 tjumptable:$jt))]>; + [(set (i32 IntRegs:$dst), + (HexagonCONST32 tjumptable:$jt))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), +def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global), "$dst = CONST32(#$global)", - [(set IntRegs:$dst, - (HexagonCONST32_GP tglobaladdr:$global))]>; + [(set (i32 IntRegs:$dst), + (HexagonCONST32_GP tglobaladdr:$global))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global), +def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", - [(set IntRegs:$dst, imm:$global) ]>; + [(set (i32 IntRegs:$dst), imm:$global) ]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label), +def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), "$dst = CONST32($label)", - [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>; + [(set (i32 IntRegs:$dst), (HexagonCONST32 bbl:$label))]>; let isReMaterializable = 1, isMoveImm = 1 in -def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), +def CONST64_Int_Real : LDInst2<(outs DoubleRegs:$dst), (ins i64imm:$global), "$dst = CONST64(#$global)", - [(set DoubleRegs:$dst, imm:$global) ]>; + [(set (i64 DoubleRegs:$dst), imm:$global) ]>; def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "$dst = xor($dst, $dst)", - [(set PredRegs:$dst, 0)]>; + [(set (i1 PredRegs:$dst), 0)]>; def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), - "$dst = mpy($src1, $src2)", - [(set IntRegs:$dst, - (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)), - (i64 (sext IntRegs:$src2)))), - (i32 32)))))]>; + "$dst = mpy($src1, $src2)", + [(set (i32 IntRegs:$dst), + (trunc (i64 (srl (i64 (mul (i64 (sext (i32 IntRegs:$src1))), + (i64 (sext (i32 IntRegs:$src2))))), + (i32 32)))))]>; // Pseudo instructions. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; @@ -2405,7 +2566,7 @@ let Defs = [R29, R30, R31], Uses = [R29] in { let isCall = 1, neverHasSideEffects = 1, Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops), + def CALL : JInst<(outs), (ins calltarget:$dst), "call $dst", []>; } @@ -2413,34 +2574,28 @@ let isCall = 1, neverHasSideEffects = 1, let isCall = 1, neverHasSideEffects = 1, Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + def CALLR : JRInst<(outs), (ins IntRegs:$dst), "callr $dst", []>; } // Tail Calls. -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops), +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { + def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops), +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { + def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops), +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { + def TCRETURNR : JInst<(outs), (ins IntRegs:$dst), "jumpr $dst // TAILCALL", []>; } // Map call instruction. -def : Pat<(call IntRegs:$dst), - (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call (i32 IntRegs:$dst)), + (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>; def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; def : Pat<(call texternalsym:$dst), @@ -2450,309 +2605,516 @@ def : Pat<(HexagonTCRet tglobaladdr:$dst), (TCRETURNtg tglobaladdr:$dst)>; def : Pat<(HexagonTCRet texternalsym:$dst), (TCRETURNtext texternalsym:$dst)>; -def : Pat<(HexagonTCRet IntRegs:$dst), - (TCRETURNR IntRegs:$dst)>; +def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), + (TCRETURNR (i32 IntRegs:$dst))>; + +// Atomic load and store support +// 8 bit atomic load +def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDub_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_8 ADDRriS11_0:$src1), + (i32 (LDriub ADDRriS11_0:$src1))>; + +def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), + (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; + + + +// 16 bit atomic load +def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDuh_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_16 ADDRriS11_1:$src1), + (i32 (LDriuh ADDRriS11_1:$src1))>; + +def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), + (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; + + + +// 32 bit atomic load +def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDw_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_32 ADDRriS11_2:$src1), + (i32 (LDriw ADDRriS11_2:$src1))>; + +def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), + (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; + + +// 64 bit atomic load +def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), + (i64 (LDd_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_load_64 ADDRriS11_3:$src1), + (i64 (LDrid ADDRriS11_3:$src1))>; -// Map from r0 = and(r1, 65535) to r0 = zxth(r1). -def : Pat <(and IntRegs:$src1, 65535), - (ZXTH IntRegs:$src1)>; +def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), + (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; + + +// 64 bit atomic store +def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), + (i64 DoubleRegs:$src1)), + (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i64 DoubleRegs:$src1)), + (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, + (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; + +// 8 bit atomic store +def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, Requires<[NoV4T]>; + +def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), + (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrib_indexed (i32 IntRegs:$src2), s11_0ImmPred:$offset, + (i32 IntRegs:$src1))>; + + +// 16 bit atomic store +def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, Requires<[NoV4T]>; + +def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), + (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_16 (i32 IntRegs:$src1), + (add (i32 IntRegs:$src2), s11_1ImmPred:$offset)), + (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset, + (i32 IntRegs:$src1))>; + + +// 32 bit atomic store +def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; + +def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), + (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>; + +def : Pat<(atomic_store_32 (add (i32 IntRegs:$src2), s11_2ImmPred:$offset), + (i32 IntRegs:$src1)), + (STriw_indexed (i32 IntRegs:$src2), s11_2ImmPred:$offset, + (i32 IntRegs:$src1))>; + + + + +def : Pat<(atomic_store_64 ADDRriS11_3:$src2, (i64 DoubleRegs:$src1)), + (STrid ADDRriS11_3:$src2, (i64 DoubleRegs:$src1))>; + +def : Pat<(atomic_store_64 (add (i32 IntRegs:$src2), s11_3ImmPred:$offset), + (i64 DoubleRegs:$src1)), + (STrid_indexed (i32 IntRegs:$src2), s11_3ImmPred:$offset, + (i64 DoubleRegs:$src1))>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1) +def : Pat <(and (i32 IntRegs:$src1), 65535), + (ZXTH (i32 IntRegs:$src1))>; // Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def : Pat <(and IntRegs:$src1, 255), - (ZXTB IntRegs:$src1)>; +def : Pat <(and (i32 IntRegs:$src1), 255), + (ZXTB (i32 IntRegs:$src1))>; // Map Add(p1, true) to p1 = not(p1). // Add(p1, false) should never be produced, // if it does, it got to be mapped to NOOP. -def : Pat <(add PredRegs:$src1, -1), - (NOT_p PredRegs:$src1)>; +def : Pat <(add (i1 PredRegs:$src1), -1), + (NOT_p (i1 PredRegs:$src1))>; // Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => // p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). -def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3, - IntRegs:$src4), - (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4, - IntRegs:$src3)>, Requires<[HasV2TOnly]>; +def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i32 IntRegs:$src3), + (i32 IntRegs:$src4)), + (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>, + Requires<[HasV2TOnly]>; // Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3), - (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>; +def : Pat <(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ImmPred:$src3), + (i32 (TFR_condset_ii (i1 PredRegs:$src1), s8ImmPred:$src3, + s8ImmPred:$src2))>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = TFR_condset_ri(p0, r1, #i) +def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2, + (i32 IntRegs:$src3)), + (i32 (TFR_condset_ri (i1 PredRegs:$src1), (i32 IntRegs:$src3), + s12ImmPred:$src2))>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = TFR_condset_ir(p0, #i, r1) +def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3), + (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3, + (i32 IntRegs:$src2)))>; // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. def : Pat <(brcond (not PredRegs:$src1), bb:$offset), - (JMP_cNot PredRegs:$src1, bb:$offset)>; + (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>; // Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), - (AND_pnotp PredRegs:$src1, PredRegs:$src2)>; + (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; // Map from store(globaladdress + x) -> memd(#foo + x). let AddedComplexity = 100 in -def : Pat <(store DoubleRegs:$src1, +def : Pat <(store (i64 DoubleRegs:$src1), (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset)), - (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>; + (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, + (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; -// Map from store(globaladdress) -> memd(#foo + 0). +// Map from store(globaladdress) -> memd(#foo). let AddedComplexity = 100 in -def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), - (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>; +def : Pat <(store (i64 DoubleRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress + x) -> memw(#foo + x). let AddedComplexity = 100 in -def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global), +def : Pat <(store (i32 IntRegs:$src1), + (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset)), - (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress) -> memw(#foo + 0). let AddedComplexity = 100 in -def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; +def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>; -// Map from store(globaladdress) -> memw(#foo + 0). +// Map from store(globaladdress) -> memw(#foo). let AddedComplexity = 100 in -def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; +def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress + x) -> memh(#foo + x). let AddedComplexity = 100 in -def : Pat <(truncstorei16 IntRegs:$src1, +def : Pat <(truncstorei16 (i32 IntRegs:$src1), (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset)), - (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress) -> memh(#foo). let AddedComplexity = 100 in -def : Pat <(truncstorei16 IntRegs:$src1, +def : Pat <(truncstorei16 (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP tglobaladdr:$global, IntRegs:$src1)>; + (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress + x) -> memb(#foo + x). let AddedComplexity = 100 in -def : Pat <(truncstorei8 IntRegs:$src1, +def : Pat <(truncstorei8 (i32 IntRegs:$src1), (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset)), - (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from store(globaladdress) -> memb(#foo). let AddedComplexity = 100 in -def : Pat <(truncstorei8 IntRegs:$src1, +def : Pat <(truncstorei8 (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, IntRegs:$src1)>; + (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[NoV4T]>; // Map from load(globaladdress + x) -> memw(#foo + x). let AddedComplexity = 100 in -def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memw(#foo + 0). +// Map from load(globaladdress) -> memw(#foo). let AddedComplexity = 100 in -def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)), - (LDw_GP tglobaladdr:$global)>; +def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDw_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; // Map from load(globaladdress + x) -> memd(#foo + x). let AddedComplexity = 100 in def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), u16ImmPred:$offset))), - (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>; + (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; // Map from load(globaladdress) -> memw(#foo + 0). let AddedComplexity = 100 in def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (LDd_GP tglobaladdr:$global)>; - + (i64 (LDd_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd. +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd. let AddedComplexity = 100 in def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>; + (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>, + Requires<[NoV4T]>; // Map from load(globaladdress + x) -> memh(#foo + x). let AddedComplexity = 100 in -def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memh(#foo + 0). +// Map from load(globaladdress + x) -> memh(#foo + x). let AddedComplexity = 100 in -def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDrih_GP tglobaladdr:$global, 0)>; +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDrih_GP tglobaladdr:$global, 0))>, + Requires<[NoV4T]>; // Map from load(globaladdress + x) -> memuh(#foo + x). let AddedComplexity = 100 in -def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memuh(#foo + 0). +// Map from load(globaladdress) -> memuh(#foo). let AddedComplexity = 100 in -def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDriuh_GP tglobaladdr:$global, 0)>; +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDriuh_GP tglobaladdr:$global, 0))>, + Requires<[NoV4T]>; -// Map from load(globaladdress + x) -> memuh(#foo + x). +// Map from load(globaladdress) -> memh(#foo). let AddedComplexity = 100 in -def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memuh(#foo + 0). -let AddedComplexity = 100 in -def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDriuh_GP tglobaladdr:$global, 0)>; -// Map from load(globaladdress + x) -> memub(#foo + x). +// Map from load(globaladdress) -> memuh(#foo). let AddedComplexity = 100 in -def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDuh_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memuh(#foo + 0). +// Map from load(globaladdress + x) -> memb(#foo + x). let AddedComplexity = 100 in -def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), - (LDriub_GP tglobaladdr:$global, 0)>; +def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; // Map from load(globaladdress + x) -> memb(#foo + x). let AddedComplexity = 100 in -def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>; +def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; + +// Map from load(globaladdress + x) -> memub(#foo + x). +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[NoV4T]>; // Map from load(globaladdress) -> memb(#foo). let AddedComplexity = 100 in -def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)), - (LDb_GP tglobaladdr:$global)>; +def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; // Map from load(globaladdress) -> memb(#foo). let AddedComplexity = 100 in -def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), - (LDb_GP tglobaladdr:$global)>; +def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; // Map from load(globaladdress) -> memub(#foo). let AddedComplexity = 100 in -def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), - (LDub_GP tglobaladdr:$global)>; +def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; // When the Interprocedural Global Variable optimizer realizes that a // certain global variable takes only two constant values, it shrinks the // global to a boolean. Catch those loads here in the following 3 patterns. let AddedComplexity = 100 in -def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)), - (LDb_GP tglobaladdr:$global)>; +def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; let AddedComplexity = 100 in -def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), - (LDb_GP tglobaladdr:$global)>; - -let AddedComplexity = 100 in -def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), - (LDub_GP tglobaladdr:$global)>; - -// Map from load(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDh_GP tglobaladdr:$global)>; - -// Map from load(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDh_GP tglobaladdr:$global)>; +def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; -// Map from load(globaladdress) -> memuh(#foo). let AddedComplexity = 100 in -def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), - (LDuh_GP tglobaladdr:$global)>; +def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP tglobaladdr:$global))>, + Requires<[NoV4T]>; // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), - (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>; + (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>; // Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo). -def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)), - (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>; +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)), + (i64 (SXTW (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg))))>; // Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)). -def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)), - (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)), + (i64 (SXTW (i32 (SXTH (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg))))))>; // Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)). -def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)), - (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; +def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)), + (i64 (SXTW (i32 (SXTB (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg))))))>; -// We want to prevent emiting pnot's as much as possible. +// We want to prevent emitting pnot's as much as possible. // Map brcond with an unsupported setcc to a JMP_cNot. -def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_cNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; -def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset), - (JMP_cNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>; +def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), + bb:$offset), + (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>; -def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset), - (JMP_cNot PredRegs:$src1, bb:$offset)>; +def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset), + (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>; -def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset), - (JMP_c PredRegs:$src1, bb:$offset)>; +def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset), + (JMP_c (i1 PredRegs:$src1), bb:$offset)>; -def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset), - (JMP_cNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>; +def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), + bb:$offset), + (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>; -def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_c (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; +def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>; -def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), +def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_cNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1), + (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)), bb:$offset)>; -def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset), - (JMP_cNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; +def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + bb:$offset), + (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), + bb:$offset)>; -def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), +def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), bb:$offset), - (JMP_cNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2), - bb:$offset)>; + (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + bb:$offset)>; // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. -def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), - (COMBINE_rr - (MUX_rr PredRegs:$src1, - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)), - (MUX_rr PredRegs:$src1, - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>; +def : Pat <(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3)), + (i64 (COMBINE_rr (i32 (MUX_rr (i1 PredRegs:$src1), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), + subreg_hireg)))), + (i32 (MUX_rr (i1 PredRegs:$src1), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src3), + subreg_loreg))))))>; // Map from a 1-bit select to logical ops. // From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), - (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2), - (AND_pp (NOT_p PredRegs:$src1), PredRegs:$src3))>; +def : Pat <(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), + (i1 PredRegs:$src3)), + (OR_pp (AND_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)), + (AND_pp (NOT_p (i1 PredRegs:$src1)), (i1 PredRegs:$src3)))>; // Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. def : Pat<(i1 (load ADDRriS11_2:$addr)), (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>; // Map for truncating from 64 immediates to 32 bit immediates. -def : Pat<(i32 (trunc DoubleRegs:$src)), - (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>; +def : Pat<(i32 (trunc (i64 DoubleRegs:$src))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg))>; // Map for truncating from i64 immediates to i1 bit immediates. -def : Pat<(i1 (trunc DoubleRegs:$src)), - (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>; +def : Pat<(i1 (trunc (i64 DoubleRegs:$src))), + (i1 (TFR_PdRs (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg))))>; // Map memb(Rs) = Rdd -> memb(Rs) = Rt. -def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr), - (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, +def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map memh(Rs) = Rdd -> memh(Rs) = Rt. -def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr), - (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, +def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), + subreg_loreg)))>; +// Map memw(Rs) = Rdd -> memw(Rs) = Rt +def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map memw(Rs) = Rdd -> memw(Rs) = Rt. -def : Pat<(truncstorei32 DoubleRegs:$src, ADDRriS11_0:$addr), - (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, +def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), + (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src), subreg_loreg)))>; // Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. @@ -2763,118 +3125,134 @@ let AddedComplexity = 100 in // Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1; // memw(#foo) = r0 def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, (TFRI 1))>; - + (STb_GP tglobaladdr:$global, (TFRI 1))>, + Requires<[NoV4T]>; // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), (STrib ADDRriS11_2:$addr, (TFRI 1))>; // Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. -def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr), - (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>; +def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0)) )>; // Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs). // Hexagon_TODO: We can probably use combine but that will cost 2 instructions. // Better way to do this? -def : Pat<(i64 (anyext IntRegs:$src1)), - (i64 (SXTW IntRegs:$src1))>; +def : Pat<(i64 (anyext (i32 IntRegs:$src1))), + (i64 (SXTW (i32 IntRegs:$src1)))>; // Map cmple -> cmpgt. // rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)), - (i1 (NOT_p (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>; +def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)), + (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>; // rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_p (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>; +def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; // Rss <= Rtt -> !(Rss > Rtt). -def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_p (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; +def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGT64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; // Map cmpne -> cmpeq. // Hexagon_TODO: We should improve on this. // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)), - (i1 (NOT_p(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>; +def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)), + (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>; // Map cmpne(Rs) -> !cmpeqe(Rs). // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_p(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>; +def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>; // Convert setne back to xor for hexagon since we compute w/ pred registers. -def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)), - (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>; +def : Pat <(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))), + (i1 (XOR_pp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; // Map cmpne(Rss) -> !cmpew(Rss). // rs != rt -> !(rs == rt). -def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_p(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>; +def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (i1 (CMPEHexagon4rr (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2)))))>; // Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). // rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_p(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>; +def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>; -def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)), - (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>; +def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)), + (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). -def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_p(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>; +def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (i1 (CMPGT64rr (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1)))))>; // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). // rs < rt -> !(rs >= rt). -def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), - (i1 (NOT_p (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>; +def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), + (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>; -// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt). -// rs < rt -> rs < rt. Let assembler map it. -def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)), - (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>; +// Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs). +// rs < rt -> rt > rs. +// We can let assembler map it, or we can do in the compiler itself. +def : Pat <(i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; // Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). // rss < rtt -> (rtt > rss). -def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>; +def : Pat <(i1 (setlt (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (CMPGT64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; -// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1). +// Map from cmpltu(Rs, Rd) -> cmpgtu(Rd, Rs) // rs < rt -> rt > rs. -def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)), - (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>; +// We can let assembler map it, or we can do in the compiler itself. +def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))>; -// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1). +// Map from cmpltu(Rss, Rdd) -> cmpgtu(Rdd, Rss). // rs < rt -> rt > rs. -def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>; +def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>; + +// Generate cmpgeu(Rs, #u8) +def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)), + (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>; + +// Generate cmpgtu(Rs, #u9) +def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)), + (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_p (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>; +def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src2), (i32 IntRegs:$src1))))>; // Map from Rs >= Rt -> !(Rt > Rs). // rs >= rt -> !(rt > rs). -def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>; +def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>; // Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs). // Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)), - (i1 (NOT_p (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>; +def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))), + (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>; // Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). // Map from (Rs <= Rt) -> !(Rs > Rt). -def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), - (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; +def : Pat <(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))), + (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))))>; // Sign extends. // i1 -> i32 -def : Pat <(i32 (sext PredRegs:$src1)), - (i32 (MUX_ii PredRegs:$src1, -1, 0))>; +def : Pat <(i32 (sext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), -1, 0))>; + +// i1 -> i64 +def : Pat <(i64 (sext (i1 PredRegs:$src1))), + (i64 (COMBINE_rr (TFRI -1), (MUX_ii (i1 PredRegs:$src1), -1, 0)))>; // Convert sign-extended load back to load and sign extend. // i8 -> i64 @@ -2899,16 +3277,16 @@ def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), // Zero extends. // i1 -> i32 -def : Pat <(i32 (zext PredRegs:$src1)), - (i32 (MUX_ii PredRegs:$src1, 1, 0))>; +def : Pat <(i32 (zext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; // i1 -> i64 -def : Pat <(i64 (zext PredRegs:$src1)), - (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>; +def : Pat <(i64 (zext (i1 PredRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>; // i32 -> i64 -def : Pat <(i64 (zext IntRegs:$src1)), - (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; +def : Pat <(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; // i8 -> i64 def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), @@ -2926,16 +3304,16 @@ def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; // Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (zext PredRegs:$src1)), - (i32 (MUX_ii PredRegs:$src1, 1, 0))>; +def : Pat <(i32 (zext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; // Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def : Pat <(i32 (anyext PredRegs:$src1)), - (i32 (MUX_ii PredRegs:$src1, 1, 0))>; +def : Pat <(i32 (anyext (i1 PredRegs:$src1))), + (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))>; // Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) -def : Pat <(i64 (anyext PredRegs:$src1)), - (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>; +def : Pat <(i64 (anyext (i1 PredRegs:$src1))), + (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>; // Any extended 64-bit load. @@ -2948,75 +3326,103 @@ def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). -def : Pat<(i64 (zext IntRegs:$src1)), - (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; +def : Pat<(i64 (zext (i32 IntRegs:$src1))), + (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; // Multiply 64-bit unsigned and use upper result. -def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2), - (MPYU64_acc(COMBINE_rr (TFRI 0), - (EXTRACT_SUBREG - (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0), - (EXTRACT_SUBREG (LSRd_ri(MPYU64 - (EXTRACT_SUBREG DoubleRegs:$src1, - subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, - subreg_loreg)), - 32) ,subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, - subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src2, - subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), - 32),subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) - )>; +def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (i64 + (MPYU64_acc + (i64 + (COMBINE_rr + (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64_acc + (i64 + (MPYU64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), 32)), + subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), + 32)), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; // Multiply 64-bit signed and use upper result. -def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2), - (MPY64_acc(COMBINE_rr (TFRI 0), - (EXTRACT_SUBREG - (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0), - (EXTRACT_SUBREG (LSRd_ri(MPYU64 - (EXTRACT_SUBREG DoubleRegs:$src1, - subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, - subreg_loreg)), - 32) ,subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, - subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src2, - subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), - 32),subreg_loreg)), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) - )>; +def : Pat <(mulhs (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), + (i64 + (MPY64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPY64_acc + (i64 + (MPY64_acc + (i64 + (COMBINE_rr (TFRI 0), + (i32 + (EXTRACT_SUBREG + (i64 + (LSRd_ri + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), 32)), + subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg)))), + 32)), subreg_loreg)))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))))>; // Hexagon specific ISD nodes. -def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; +//def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; +def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", - SDTHexagonADJDYNALLOC>; + SDTHexagonADJDYNALLOC>; // Needed to tag these instructions for stack layout. let usesCustomInserter = 1 in def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), "$dst = add($src1, #$src2)", - [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1, - s16ImmPred:$src2))]>; + [(set (i32 IntRegs:$dst), + (Hexagon_ADJDYNALLOC (i32 IntRegs:$src1), + s16ImmPred:$src2))]>; -def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>; +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>; def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), "$dst = $src1", - [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>; + [(set (i32 IntRegs:$dst), + (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>; let AddedComplexity = 100 in -def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)), - (TFR IntRegs:$src1)>; - +def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)), + (COPY (i32 IntRegs:$src1))>; def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; @@ -3024,12 +3430,91 @@ def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BR_JT : JRInst<(outs), (ins IntRegs:$src), "jumpr $src", - [(HexagonBR_JT IntRegs:$src)]>; + [(HexagonBR_JT (i32 IntRegs:$src))]>; + def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; def : Pat<(HexagonWrapperJT tjumptable:$dst), - (CONST32_set_jt tjumptable:$dst)>; + (i32 (CONST32_set_jt tjumptable:$dst))>; + +// XTYPE/SHIFT + +// Multi-class for logical operators : +// Shift by immediate/register and accumulate/logical +multiclass xtype_imm { + def _ri : SInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u5Imm:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), + [(set (i32 IntRegs:$dst), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$src2), + u5ImmPred:$src3)))], + "$src1 = $dst">; + + def d_ri : SInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, u6Imm:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, #$src3)")), + [(set (i64 DoubleRegs:$dst), (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$src2), u6ImmPred:$src3)))], + "$src1 = $dst">; +} + +// Multi-class for logical operators : +// Shift by register and accumulate/logical (32/64 bits) +multiclass xtype_reg { + def _rr : SInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), + [(set (i32 IntRegs:$dst), + (OpNode2 (i32 IntRegs:$src1), + (OpNode1 (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">; + + def d_rr : SInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + !strconcat("$dst ", !strconcat(OpcStr, "($src2, $src3)")), + [(set (i64 DoubleRegs:$dst), + (OpNode2 (i64 DoubleRegs:$src1), + (OpNode1 (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], + "$src1 = $dst">; + +} + +multiclass basic_xtype_imm { +let AddedComplexity = 100 in + defm _ADD : xtype_imm< !strconcat("+= ", OpcStr), OpNode, add>; + defm _SUB : xtype_imm< !strconcat("-= ", OpcStr), OpNode, sub>; + defm _AND : xtype_imm< !strconcat("&= ", OpcStr), OpNode, and>; + defm _OR : xtype_imm< !strconcat("|= ", OpcStr), OpNode, or>; +} + +multiclass basic_xtype_reg { +let AddedComplexity = 100 in + defm _ADD : xtype_reg< !strconcat("+= ", OpcStr), OpNode, add>; + defm _SUB : xtype_reg< !strconcat("-= ", OpcStr), OpNode, sub>; + defm _AND : xtype_reg< !strconcat("&= ", OpcStr), OpNode, and>; + defm _OR : xtype_reg< !strconcat("|= ", OpcStr), OpNode, or>; +} + +multiclass xtype_xor_imm { +let AddedComplexity = 100 in + defm _XOR : xtype_imm< !strconcat("^= ", OpcStr), OpNode, xor>; +} + +defm ASL : basic_xtype_imm<"asl", shl>, basic_xtype_reg<"asl", shl>, + xtype_xor_imm<"asl", shl>; +defm LSR : basic_xtype_imm<"lsr", srl>, basic_xtype_reg<"lsr", srl>, + xtype_xor_imm<"lsr", srl>; + +defm ASR : basic_xtype_imm<"asr", sra>, basic_xtype_reg<"asr", sra>; +defm LSL : basic_xtype_reg<"lsl", shl>; + +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def : Pat <(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)), + (i32 (MPYI_rin (i32 IntRegs:$src1), u8ImmPred:$src2))>; //===----------------------------------------------------------------------===// // V3 Instructions + @@ -3046,3 +3531,19 @@ include "HexagonInstrInfoV3.td" //===----------------------------------------------------------------------===// include "HexagonInstrInfoV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V5 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV5.td" + +//===----------------------------------------------------------------------===// +// V5 Instructions - +//===----------------------------------------------------------------------===// + + diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td index a73897e..157ab3d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV3.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -19,7 +19,7 @@ let isCall = 1, neverHasSideEffects = 1, Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops), + def CALLv3 : JInst<(outs), (ins calltarget:$dst), "call $dst", []>, Requires<[HasV3T]>; } @@ -35,16 +35,17 @@ let isCall = 1, neverHasSideEffects = 1, let isCall = 1, neverHasSideEffects = 1, Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { - def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst), "callr $dst", []>, Requires<[HasV3TOnly]>; } +// Jump to address from register // if(p?.new) jumpr:t r? let isReturn = 1, isTerminator = 1, isBarrier = 1, Defs = [PC], Uses = [R31] in { - def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) jumpr:t $src2", []>, Requires<[HasV3T]>; } @@ -52,7 +53,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, // if (!p?.new) jumpr:t r? let isReturn = 1, isTerminator = 1, isBarrier = 1, Defs = [PC], Uses = [R31] in { - def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) jumpr:t $src2", []>, Requires<[HasV3T]>; } @@ -61,7 +62,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, // if(p?.new) jumpr:nt r? let isReturn = 1, isTerminator = 1, isBarrier = 1, Defs = [PC], Uses = [R31] in { - def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), "if ($src1.new) jumpr:nt $src2", []>, Requires<[HasV3T]>; } @@ -69,7 +70,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, // if (!p?.new) jumpr:nt r? let isReturn = 1, isTerminator = 1, isBarrier = 1, Defs = [PC], Uses = [R31] in { - def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), "if (!$src1.new) jumpr:nt $src2", []>, Requires<[HasV3T]>; } @@ -86,20 +87,22 @@ let AddedComplexity = 200 in def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = max($src2, $src1)", - [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2, - DoubleRegs:$src1)), - DoubleRegs:$src1, - DoubleRegs:$src2))]>, + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setlt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>, Requires<[HasV3T]>; let AddedComplexity = 200 in def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = min($src2, $src1)", - [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2, - DoubleRegs:$src1)), - DoubleRegs:$src1, - DoubleRegs:$src2))]>, + [(set (i64 DoubleRegs:$dst), + (i64 (select (i1 (setgt (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src1))), + (i64 DoubleRegs:$src1), + (i64 DoubleRegs:$src2))))]>, Requires<[HasV3T]>; //===----------------------------------------------------------------------===// @@ -109,25 +112,25 @@ Requires<[HasV3T]>; -//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset), -// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; +//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; -//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset), -// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; +//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; -//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset), -// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; +//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; -//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset), -// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; +//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; -//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset), -// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; +//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset), +// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>; // Map call instruction -def : Pat<(call IntRegs:$dst), - (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>; +def : Pat<(call (i32 IntRegs:$dst)), + (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>; def : Pat<(call tglobaladdr:$dst), (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>; def : Pat<(call texternalsym:$dst), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 9e60cf2..70448fc 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -11,6 +11,12 @@ // //===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in +def IMMEXT : Immext<(outs), (ins), + "/* immext #... */", + []>, + Requires<[HasV4T]>; + // Hexagon V4 Architecture spec defines 8 instruction classes: // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the // compiler) @@ -250,23 +256,151 @@ def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), []>, Requires<[HasV4T]>; +// Generate frame index addresses. +let neverHasSideEffects = 1, isReMaterializable = 1 in +def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s32Imm:$offset), + "$dst = add($src1, ##$offset)", + []>, + Requires<[HasV4T]>; + //===----------------------------------------------------------------------===// // ALU32 - //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +// Combine +// Rdd=combine(Rs, #s8) +let neverHasSideEffects = 1 in +def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s8Imm:$src2), + "$dst = combine($src1, #$src2)", + []>, + Requires<[HasV4T]>; +// Rdd=combine(#s8, Rs) +let neverHasSideEffects = 1 in +def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst), + (ins s8Imm:$src1, IntRegs:$src2), + "$dst = combine(#$src1, $src2)", + []>, + Requires<[HasV4T]>; +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // LD + //===----------------------------------------------------------------------===// -/// -/// Make sure that in post increment load, the first operand is always the post -/// increment operand. -/// -//// Load doubleword. -// Rdd=memd(Re=#U6) +// +// These absolute set addressing mode instructions accept immediate as +// an operand. We have duplicated these patterns to take global address. + +let neverHasSideEffects = 1 in +def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memd($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memb(Re=#U6) +let neverHasSideEffects = 1 in +def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memb($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memh(Re=#U6) +let neverHasSideEffects = 1 in +def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memh($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memub(Re=#U6) +let neverHasSideEffects = 1 in +def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memub($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memuh(Re=#U6) +let neverHasSideEffects = 1 in +def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memuh($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memw(Re=#U6) +let neverHasSideEffects = 1 in +def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins u6Imm:$addr), + "$dst1 = memw($dst2=#$addr)", + []>, + Requires<[HasV4T]>; + +// Following patterns are defined for absolute set addressing mode +// instruction which take global address as operand. +let neverHasSideEffects = 1 in +def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memd($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memb(Re=#U6) +let neverHasSideEffects = 1 in +def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memb($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memh(Re=#U6) +let neverHasSideEffects = 1 in +def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memh($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memub(Re=#U6) +let neverHasSideEffects = 1 in +def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memub($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memuh(Re=#U6) +let neverHasSideEffects = 1 in +def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memuh($dst2=##$addr)", + []>, + Requires<[HasV4T]>; + +// Rd=memw(Re=#U6) +let neverHasSideEffects = 1 in +def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins globaladdress:$addr), + "$dst1 = memw($dst2=##$addr)", + []>, + Requires<[HasV4T]>; +// Load doubleword. +// +// Make sure that in post increment load, the first operand is always the post +// increment operand. +// // Rdd=memd(Rs+Rt<<#u2) // Special case pattern for indexed load without offset which is easier to // match. AddedComplexity of this pattern should be lower than base+offset load @@ -276,56 +410,58 @@ let AddedComplexity = 10, isPredicable = 1 in def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memd($src1+$src2<<#0)", - [(set DoubleRegs:$dst, (load (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i64 DoubleRegs:$dst), + (i64 (load (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memd($src1+$src2<<#$offset)", - [(set DoubleRegs:$dst, (load (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i64 DoubleRegs:$dst), + (i64 (load (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; //// Load doubleword conditionally. // if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) // if (Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrid_indexed_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memd($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrid_indexed_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memd($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrid_indexed_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memd($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memd($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrid_indexed_shl_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memd($src2+$src3<<#$offset)", @@ -333,8 +469,8 @@ def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrid_indexed_shl_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memd($src2+$src3<<#$offset)", @@ -342,8 +478,8 @@ def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrid_indexed_shl_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memd($src2+$src3<<#$offset)", @@ -351,8 +487,8 @@ def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memd(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrid_indexed_shl_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)", @@ -362,99 +498,101 @@ def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), // Rdd=memd(Rt<<#u2+#U6) //// Load byte. -// Rd=memb(Re=#U6) - // Rd=memb(Rs+Rt<<#u2) let AddedComplexity = 10, isPredicable = 1 in def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memb($src1+$src2<<#0)", - [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (sextloadi8 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 10, isPredicable = 1 in def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memub($src1+$src2<<#0)", - [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (zextloadi8 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 10, isPredicable = 1 in def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memub($src1+$src2<<#0)", - [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (extloadi8 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memb($src1+$src2<<#$offset)", - [(set IntRegs:$dst, - (sextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (sextloadi8 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memub($src1+$src2<<#$offset)", - [(set IntRegs:$dst, - (zextloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (zextloadi8 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memub($src1+$src2<<#$offset)", - [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (extloadi8 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; //// Load byte conditionally. // if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) // if (Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrib_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memb($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrib_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memb($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrib_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memb($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memb($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrib_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memb($src2+$src3<<#$offset)", @@ -462,8 +600,8 @@ def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrib_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memb($src2+$src3<<#$offset)", @@ -471,8 +609,8 @@ def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrib_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memb($src2+$src3<<#$offset)", @@ -480,8 +618,8 @@ def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memb(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrib_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)", @@ -491,40 +629,40 @@ def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), //// Load unsigned byte conditionally. // if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) // if (Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriub_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memub($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriub_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memub($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriub_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memub($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memub($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriub_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memub($src2+$src3<<#$offset)", @@ -532,8 +670,8 @@ def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriub_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memub($src2+$src3<<#$offset)", @@ -541,8 +679,8 @@ def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriub_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memub($src2+$src3<<#$offset)", @@ -550,8 +688,8 @@ def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memub(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriub_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)", @@ -561,31 +699,32 @@ def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), // Rd=memb(Rt<<#u2+#U6) //// Load halfword -// Rd=memh(Re=#U6) - // Rd=memh(Rs+Rt<<#u2) let AddedComplexity = 10, isPredicable = 1 in def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memh($src1+$src2<<#0)", - [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (sextloadi16 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 10, isPredicable = 1 in def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memuh($src1+$src2<<#0)", - [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (zextloadi16 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; let AddedComplexity = 10, isPredicable = 1 in def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memuh($src1+$src2<<#0)", - [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (extloadi16 (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; // Rd=memh(Rs+Rt<<#u2) @@ -593,69 +732,69 @@ let AddedComplexity = 40, isPredicable = 1 in def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memh($src1+$src2<<#$offset)", - [(set IntRegs:$dst, - (sextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (sextloadi16 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memuh($src1+$src2<<#$offset)", - [(set IntRegs:$dst, - (zextloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (zextloadi16 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; let AddedComplexity = 40, isPredicable = 1 in def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memuh($src1+$src2<<#$offset)", - [(set IntRegs:$dst, - (extloadi16 (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (extloadi16 (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; //// Load halfword conditionally. // if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrih_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrih_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrih_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrih_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memh($src2+$src3<<#$offset)", @@ -663,8 +802,8 @@ def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrih_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memh($src2+$src3<<#$offset)", @@ -672,8 +811,8 @@ def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrih_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memh($src2+$src3<<#$offset)", @@ -681,8 +820,8 @@ def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDrih_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)", @@ -692,40 +831,40 @@ def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), //// Load unsigned halfword conditionally. // if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) // if (Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriuh_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memuh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriuh_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memuh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriuh_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memuh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriuh_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memuh($src2+$src3<<#$offset)", @@ -733,8 +872,8 @@ def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriuh_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)", @@ -742,8 +881,8 @@ def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriuh_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memuh($src2+$src3<<#$offset)", @@ -751,8 +890,8 @@ def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)", @@ -762,6 +901,14 @@ def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), // Rd=memh(Rt<<#u2+#U6) //// Load word. +// Load predicate: Fix for bug 5279. +let neverHasSideEffects = 1 in +def LDriw_pred_V4 : LDInst2<(outs PredRegs:$dst), + (ins MEMri:$addr), + "Error; should not emit", + []>, + Requires<[HasV4T]>; + // Rd=memw(Re=#U6) // Rd=memw(Rs+Rt<<#u2) @@ -769,8 +916,9 @@ let AddedComplexity = 10, isPredicable = 1 in def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst=memw($src1+$src2<<#0)", - [(set IntRegs:$dst, (load (add IntRegs:$src1, - IntRegs:$src2)))]>, + [(set (i32 IntRegs:$dst), + (i32 (load (add (i32 IntRegs:$src1), + (i32 IntRegs:$src2)))))]>, Requires<[HasV4T]>; // Rd=memw(Rs+Rt<<#u2) @@ -778,48 +926,49 @@ let AddedComplexity = 40, isPredicable = 1 in def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), "$dst=memw($src1+$src2<<#$offset)", - [(set IntRegs:$dst, (load (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$offset))))]>, + [(set (i32 IntRegs:$dst), + (i32 (load (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$offset)))))]>, Requires<[HasV4T]>; //// Load word conditionally. // if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) // if (Pv) Rd=memw(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriw_indexed_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst=memw($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriw_indexed_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst=memw($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriw_indexed_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst=memw($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in -def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 15, isPredicated = 1 in +def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst=memw($src2+$src3<<#0)", []>, Requires<[HasV4T]>; // if (Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriw_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1) $dst=memw($src2+$src3<<#$offset)", @@ -827,8 +976,8 @@ def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriw_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if ($src1.new) $dst=memw($src2+$src3<<#$offset)", @@ -836,8 +985,8 @@ def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriw_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1) $dst=memw($src2+$src3<<#$offset)", @@ -845,8 +994,8 @@ def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) Rd=memh(Rs+Rt<<#u2) -let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in -def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), +let AddedComplexity = 45, isPredicated = 1 in +def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset), "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)", @@ -859,367 +1008,1063 @@ def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), // Post-inc Load, Predicated, Dot new -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrid_cdnPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if ($src1.new) $dst1 = memd($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrid_cdnNotPt_V4 : LDInst2PI<(outs DoubleRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), "if (!$src1.new) $dst1 = memd($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrib_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1.new) $dst1 = memb($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrib_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1.new) $dst1 = memb($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrih_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1.new) $dst1 = memh($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDrih_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1.new) $dst1 = memh($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriub_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if ($src1.new) $dst1 = memub($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriub_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), "if (!$src1.new) $dst1 = memub($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriuh_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if ($src1.new) $dst1 = memuh($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriuh_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), "if (!$src1.new) $dst1 = memuh($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriw_cdnPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if ($src1.new) $dst1 = memw($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; -let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in -def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), +let hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in +def POST_LDriw_cdnNotPt_V4 : LDInst2PI<(outs IntRegs:$dst1, IntRegs:$dst2), (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), "if (!$src1.new) $dst1 = memw($src2++#$src3)", [], "$src2 = $dst2">, Requires<[HasV4T]>; +/// Load from global offset -//===----------------------------------------------------------------------===// -// LD - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ST + -//===----------------------------------------------------------------------===// -/// -/// Assumptions::: ****** DO NOT IGNORE ******** -/// 1. Make sure that in post increment store, the zero'th operand is always the -/// post increment operand. -/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the -/// last operand. -/// - -// Store doubleword. -// memd(Re=#U6)=Rtt -// TODO: needs to be implemented - -// memd(Rs+#s11:3)=Rtt -// memd(Rs+Ru<<#u2)=Rtt -let AddedComplexity = 10, isPredicable = 1 in -def STrid_indexed_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4), - "memd($src1+$src2<<#$src3) = $src4", - [(store DoubleRegs:$src4, (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$src3)))]>, +let isPredicable = 1, neverHasSideEffects = 1 in +def LDrid_GP_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memd(#$global+$offset)", + []>, Requires<[HasV4T]>; -// memd(Ru<<#u2+#U6)=Rtt -let AddedComplexity = 10 in -def STrid_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), - "memd($src1<<#$src2+#$src3) = $src4", - [(store DoubleRegs:$src4, (shl IntRegs:$src1, - (add u2ImmPred:$src2, - u6ImmPred:$src3)))]>, +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memd(##$global+$offset)", + []>, Requires<[HasV4T]>; -// memd(Rx++#s4:3)=Rtt -// memd(Rx++#s4:3:circ(Mu))=Rtt -// memd(Rx++I:circ(Mu))=Rtt -// memd(Rx++Mu)=Rtt -// memd(Rx++Mu:brev)=Rtt -// memd(gp+#u16:3)=Rtt - -// Store doubleword conditionally. -// if ([!]Pv[.new]) memd(#u6)=Rtt -// TODO: needs to be implemented. - -// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt -// if (Pv) memd(Rs+#u6:3)=Rtt -// if (Pv.new) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), - "if ($src1.new) memd($addr) = $src2", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memd(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv) memd(Rs+#u6:3)=Rtt -// if (!Pv.new) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), - "if (!$src1.new) memd($addr) = $src2", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memd(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (Pv) memd(Rs+#u6:3)=Rtt -// if (Pv.new) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, - DoubleRegs:$src4), - "if ($src1.new) memd($src2+#$src3) = $src4", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrid_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memd(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv) memd(Rs+#u6:3)=Rtt -// if (!Pv.new) memd(Rs+#u6:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, - DoubleRegs:$src4), - "if (!$src1.new) memd($src2+#$src3) = $src4", +let isPredicable = 1, neverHasSideEffects = 1 in +def LDrib_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memb(#$global+$offset)", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt -// if (Pv) memd(Rs+Ru<<#u2)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_shl_cPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - DoubleRegs:$src5), - "if ($src1) memd($src2+$src3<<#$src4) = $src5", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memb(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_shl_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - DoubleRegs:$src5), - "if ($src1) memd($src2+$src3<<#$src4) = $src5", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memb(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv) memd(Rs+Ru<<#u2)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_shl_cNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - DoubleRegs:$src5), - "if (!$src1) memd($src2+$src3<<#$src4) = $src5", + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memb(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - DoubleRegs:$src5), - "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5", + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrib_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memb(##$global+$offset)", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt -// if (Pv) memd(Rx++#s4:3)=Rtt -// if (Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if ($src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, - Requires<[HasV4T]>; -// if (!Pv) memd(Rx++#s4:3)=Rtt -// if (!Pv.new) memd(Rx++#s4:3)=Rtt -let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in -def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, - s4_3Imm:$offset), - "if (!$src1.new) memd($src3++#$offset) = $src2", - [], - "$src3 = $dst">, +let isPredicable = 1, neverHasSideEffects = 1 in +def LDriub_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memub(#$global+$offset)", + []>, Requires<[HasV4T]>; -// Store byte. -// memb(Re=#U6)=Rt -// TODO: needs to be implemented. -// memb(Rs+#s11:0)=Rt -// memb(Rs+#u6:0)=#S8 -let AddedComplexity = 10, isPredicable = 1 in -def STrib_imm_V4 : STInst<(outs), - (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3), - "memb($src1+#$src2) = #$src3", - [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1, - u6_0ImmPred:$src2))]>, +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memub(##$global+$offset)", + []>, Requires<[HasV4T]>; -// memb(Rs+Ru<<#u2)=Rt -let AddedComplexity = 10, isPredicable = 1 in -def STrib_indexed_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), - "memb($src1+$src2<<#$src3) = $src4", - [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$src3)))]>, +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memub(##$global+$offset)", + []>, Requires<[HasV4T]>; -// memb(Ru<<#u2+#U6)=Rt -let AddedComplexity = 10 in -def STrib_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), - "memb($src1<<#$src2+#$src3) = $src4", - [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1, - (add u2ImmPred:$src2, - u6ImmPred:$src3)))]>, +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memub(##$global+$offset)", + []>, Requires<[HasV4T]>; -// memb(Rx++#s4:0:circ(Mu))=Rt -// memb(Rx++I:circ(Mu))=Rt -// memb(Rx++Mu)=Rt -// memb(Rx++Mu:brev)=Rt -// memb(gp+#u16:0)=Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memub(##$global+$offset)", + []>, + Requires<[HasV4T]>; -// Store byte conditionally. -// if ([!]Pv[.new]) memb(#u6)=Rt -// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6 -// if (Pv) memb(Rs+#u6:0)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_imm_cPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), - "if ($src1) memb($src2+#$src3) = #$src4", +let isPredicable = 1, neverHasSideEffects = 1 in +def LDrih_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memh(#$global+$offset)", []>, Requires<[HasV4T]>; -// if (Pv.new) memb(Rs+#u6:0)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_imm_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), - "if ($src1.new) memb($src2+#$src3) = #$src4", + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv) memb(Rs+#u6:0)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_imm_cNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), - "if (!$src1) memb($src2+#$src3) = #$src4", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv.new) memb(Rs+#u6:0)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_imm_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), - "if (!$src1.new) memb($src2+#$src3) = #$src4", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt -// if (Pv) memb(Rs+#u6:0)=Rt -// if (Pv.new) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if ($src1.new) memb($addr) = $src2", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDrih_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv) memb(Rs+#u6:0)=Rt -// if (!Pv.new) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), - "if (!$src1.new) memb($addr) = $src2", + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDriuh_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memuh(#$global+$offset)", []>, Requires<[HasV4T]>; -// if (Pv) memb(Rs+#u6:0)=Rt -// if (!Pv) memb(Rs+#u6:0)=Rt -// if (Pv.new) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_indexed_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), - "if ($src1.new) memb($src2+#$src3) = $src4", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memuh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if (!Pv.new) memb(Rs+#u6:0)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrib_indexed_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), - "if (!$src1.new) memb($src2+#$src3) = $src4", +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memuh(##$global+$offset)", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt -// if (Pv) memb(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrib_indexed_shl_cPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memuh(##$global+$offset)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memuh(##$global+$offset)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDriw_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memw(#$global+$offset)", + []>, + Requires<[HasV4T]>; + + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1) $dst=memw(##$global+$offset)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1) $dst=memw(##$global+$offset)", + []>, + Requires<[HasV4T]>; + + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if ($src1.new) $dst=memw(##$global+$offset)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def LDriw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset), + "if (!$src1.new) $dst=memw(##$global+$offset)", + []>, + Requires<[HasV4T]>; + + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins globaladdress:$global), + "$dst=memd(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rtt=memd(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memd(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rtt=memd(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memd(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rtt=memd(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memd(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rtt=memd(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memd(##$global)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memb(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memb(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memb(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memb(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memb(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memb(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memb(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memb(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memb(##$global)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memub(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memub(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memub(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rt=memub(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memub(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memub(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memub(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rt=memub(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memub(##$global)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memh(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memh(##$global)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memuh(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memuh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memuh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memuh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memuh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memuh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memuh(##$global)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rt=memuh(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memuh(##$global)", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memw(#$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memw(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1) $dst=memw(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rt=memw(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1) $dst=memw(##$global)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rt=memw(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if ($src1.new) $dst=memw(##$global)", + []>, + Requires<[HasV4T]>; + + +// if (!Pv) Rt=memw(##global) +let neverHasSideEffects = 1, isPredicated = 1 in +def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$global), + "if (!$src1.new) $dst=memw(##$global)", + []>, + Requires<[HasV4T]>; + + + +def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), + (i64 (LDd_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDw_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDub_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memw(#foo + 0) +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i64 (LDd_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>, + Requires<[HasV4T]>; + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memub(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memuh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDw_GP_V4 tglobaladdr:$global))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memub(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memuh(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + + +// Map from load(globaladdress + x) -> memuh(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + +// Map from load(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +/// Assumptions::: ****** DO NOT IGNORE ******** +/// 1. Make sure that in post increment store, the zero'th operand is always the +/// post increment operand. +/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the +/// last operand. +/// + +// memd(Re=#U6)=Rtt +def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), + (ins DoubleRegs:$src1, u6Imm:$src2), + "memd($dst1=#$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memb(Re=#U6)=Rs +def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, u6Imm:$src2), + "memb($dst1=#$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memh(Re=#U6)=Rs +def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, u6Imm:$src2), + "memh($dst1=#$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memw(Re=#U6)=Rs +def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, u6Imm:$src2), + "memw($dst1=#$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memd(Re=#U6)=Rtt +def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1), + (ins DoubleRegs:$src1, globaladdress:$src2), + "memd($dst1=##$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memb(Re=#U6)=Rs +def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, globaladdress:$src2), + "memb($dst1=##$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memh(Re=#U6)=Rs +def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, globaladdress:$src2), + "memh($dst1=##$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memw(Re=#U6)=Rs +def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1), + (ins IntRegs:$src1, globaladdress:$src2), + "memw($dst1=##$src2) = $src1", + []>, + Requires<[HasV4T]>; + +// memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, isPredicable = 1 in +def STrid_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4), + "memd($src1+$src2<<#$src3) = $src4", + [(store (i64 DoubleRegs:$src4), + (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memd(Ru<<#u2+#U6)=Rtt +let AddedComplexity = 10 in +def STrid_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), + "memd($src1<<#$src2+#$src3) = $src4", + [(store (i64 DoubleRegs:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u6ImmPred:$src3))]>, + Requires<[HasV4T]>; + +// memd(Rx++#s4:3)=Rtt +// memd(Rx++#s4:3:circ(Mu))=Rtt +// memd(Rx++I:circ(Mu))=Rtt +// memd(Rx++Mu)=Rtt +// memd(Rx++Mu:brev)=Rtt +// memd(gp+#u16:3)=Rtt + +// Store doubleword conditionally. +// if ([!]Pv[.new]) memd(#u6)=Rtt +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if ($src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if (!$src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if ($src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if (!$src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt +// if (Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_shl_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_shl_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1.new) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_shl_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def STrid_indexed_shl_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt +// if (Pv) memd(Rx++#s4:3)=Rtt +// if (Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def POST_STdri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if ($src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rx++#s4:3)=Rtt +// if (!Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, neverHasSideEffects = 1, + isPredicated = 1 in +def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if (!$src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store byte. +// memb(Rs+#u6:0)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STrib_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3), + "memb($src1+#$src2) = #$src3", + [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1), + u6_0ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memb(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memb($src1+$src2<<#$src3) = $src4", + [(truncstorei8 (i32 IntRegs:$src4), + (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memb(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STrib_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memb($src1<<#$src2+#$src3) = $src4", + [(truncstorei8 (i32 IntRegs:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u6ImmPred:$src3))]>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Rt +// memb(Rx++I:circ(Mu))=Rt +// memb(Rx++Mu)=Rt +// memb(Rx++Mu:brev)=Rt +// memb(gp+#u16:0)=Rt + + +// Store byte conditionally. +// if ([!]Pv[.new]) memb(#u6)=Rt +// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6 +// if (Pv) memb(Rs+#u6:0)=#S6 +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_imm_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=#S6 +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_imm_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=#S6 +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_imm_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=#S6 +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_imm_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt +// if (Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(Rs+#u6:0)=Rt +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_indexed_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrib_indexed_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt +// if (Pv) memb(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, + isPredicated = 1 in +def STrib_indexed_shl_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if ($src1) memb($src2+$src3<<#$src4) = $src5", @@ -1227,8 +2072,9 @@ def STrib_indexed_shl_cPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (Pv.new) memb(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrib_indexed_shl_cdnPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrib_indexed_shl_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if ($src1.new) memb($src2+$src3<<#$src4) = $src5", @@ -1236,8 +2082,9 @@ def STrib_indexed_shl_cdnPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (!Pv) memb(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrib_indexed_shl_cNotPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrib_indexed_shl_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if (!$src1) memb($src2+$src3<<#$src4) = $src5", @@ -1245,8 +2092,9 @@ def STrib_indexed_shl_cNotPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (!Pv.new) memb(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrib_indexed_shl_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5", @@ -1256,8 +2104,9 @@ def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs), // if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt // if (Pv) memb(Rx++#s4:0)=Rt // if (Pv.new) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_STbri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if ($src1.new) memb($src3++#$offset) = $src2", [],"$src3 = $dst">, @@ -1265,8 +2114,9 @@ def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), // if (!Pv) memb(Rx++#s4:0)=Rt // if (!Pv.new) memb(Rx++#s4:0)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if (!$src1.new) memb($src3++#$offset) = $src2", [],"$src3 = $dst">, @@ -1274,20 +2124,15 @@ def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), // Store halfword. -// memh(Re=#U6)=Rt.H -// TODO: needs to be implemented - -// memh(Re=#U6)=Rt // TODO: needs to be implemented - +// memh(Re=#U6)=Rt.H // memh(Rs+#s11:1)=Rt.H -// memh(Rs+#s11:1)=Rt // memh(Rs+#u6:1)=#S8 let AddedComplexity = 10, isPredicable = 1 in def STrih_imm_V4 : STInst<(outs), (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3), "memh($src1+#$src2) = #$src3", - [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1, + [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1), u6_1ImmPred:$src2))]>, Requires<[HasV4T]>; @@ -1299,9 +2144,10 @@ let AddedComplexity = 10, isPredicable = 1 in def STrih_indexed_shl_V4 : STInst<(outs), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), "memh($src1+$src2<<#$src3) = $src4", - [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1, - (shl IntRegs:$src2, - u2ImmPred:$src3)))]>, + [(truncstorei16 (i32 IntRegs:$src4), + (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$src3)))]>, Requires<[HasV4T]>; // memh(Ru<<#u2+#U6)=Rt.H @@ -1310,9 +2156,9 @@ let AddedComplexity = 10 in def STrih_shl_V4 : STInst<(outs), (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), "memh($src1<<#$src2+#$src3) = $src4", - [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1, - (add u2ImmPred:$src2, - u6ImmPred:$src3)))]>, + [(truncstorei16 (i32 IntRegs:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u6ImmPred:$src3))]>, Requires<[HasV4T]>; // memh(Rx++#s4:1:circ(Mu))=Rt.H @@ -1323,42 +2169,42 @@ def STrih_shl_V4 : STInst<(outs), // memh(Rx++Mu)=Rt // memh(Rx++Mu:brev)=Rt.H // memh(Rx++Mu:brev)=Rt -// memh(gp+#u16:1)=Rt.H // memh(gp+#u16:1)=Rt - - -// Store halfword conditionally. // if ([!]Pv[.new]) memh(#u6)=Rt.H // if ([!]Pv[.new]) memh(#u6)=Rt // if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6 // if (Pv) memh(Rs+#u6:1)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_imm_cPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_imm_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), "if ($src1) memh($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (Pv.new) memh(Rs+#u6:1)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_imm_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_imm_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), "if ($src1.new) memh($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (!Pv) memh(Rs+#u6:1)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_imm_cNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_imm_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), "if (!$src1) memh($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+#u6:1)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_imm_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_imm_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), "if (!$src1.new) memh($src2+#$src3) = #$src4", []>, @@ -1370,8 +2216,9 @@ def STrih_imm_cdnNotPt_V4 : STInst<(outs), // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt // if (Pv) memh(Rs+#u6:1)=Rt // if (Pv.new) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1.new) memh($addr) = $src2", []>, @@ -1379,24 +2226,27 @@ def STrih_cdnPt_V4 : STInst<(outs), // if (!Pv) memh(Rs+#u6:1)=Rt // if (!Pv.new) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1.new) memh($addr) = $src2", []>, Requires<[HasV4T]>; // if (Pv.new) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_indexed_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_indexed_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if ($src1.new) memh($src2+#$src3) = $src4", []>, Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+#u6:1)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STrih_indexed_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STrih_indexed_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if (!$src1.new) memh($src2+#$src3) = $src4", []>, @@ -1405,8 +2255,9 @@ def STrih_indexed_cdnNotPt_V4 : STInst<(outs), // if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H // if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt // if (Pv) memh(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrih_indexed_shl_cPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrih_indexed_shl_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if ($src1) memh($src2+$src3<<#$src4) = $src5", @@ -1414,7 +2265,9 @@ def STrih_indexed_shl_cPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (Pv.new) memh(Rs+Ru<<#u2)=Rt -def STrih_indexed_shl_cdnPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrih_indexed_shl_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if ($src1.new) memh($src2+$src3<<#$src4) = $src5", @@ -1422,8 +2275,9 @@ def STrih_indexed_shl_cdnPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (!Pv) memh(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrih_indexed_shl_cNotPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrih_indexed_shl_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if (!$src1) memh($src2+$src3<<#$src4) = $src5", @@ -1431,8 +2285,9 @@ def STrih_indexed_shl_cNotPt_V4 : STInst<(outs), Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs), +let AddedComplexity = 10, + isPredicated = 1 in +def STrih_indexed_shl_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5", @@ -1445,8 +2300,9 @@ def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs), // if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt // if (Pv) memh(Rx++#s4:1)=Rt // if (Pv.new) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_SThri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if ($src1.new) memh($src3++#$offset) = $src2", [],"$src3 = $dst">, @@ -1454,8 +2310,9 @@ def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), // if (!Pv) memh(Rx++#s4:1)=Rt // if (!Pv.new) memh(Rx++#s4:1)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_SThri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if (!$src1.new) memh($src3++#$offset) = $src2", [],"$src3 = $dst">, @@ -1466,13 +2323,22 @@ def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), // memw(Re=#U6)=Rt // TODO: Needs to be implemented. -// memw(Rs+#s11:2)=Rt +// Store predicate: +let neverHasSideEffects = 1 in +def STriw_pred_V4 : STInst2<(outs), + (ins MEMri:$addr, PredRegs:$src1), + "Error; should not emit", + []>, + Requires<[HasV4T]>; + + // memw(Rs+#u6:2)=#S8 let AddedComplexity = 10, isPredicable = 1 in def STriw_imm_V4 : STInst<(outs), (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3), "memw($src1+#$src2) = #$src3", - [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>, + [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1), + u6_2ImmPred:$src2))]>, Requires<[HasV4T]>; // memw(Rs+Ru<<#u2)=Rt @@ -1480,8 +2346,9 @@ let AddedComplexity = 10, isPredicable = 1 in def STriw_indexed_shl_V4 : STInst<(outs), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), "memw($src1+$src2<<#$src3) = $src4", - [(store IntRegs:$src4, (add IntRegs:$src1, - (shl IntRegs:$src2, u2ImmPred:$src3)))]>, + [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1), + (shl (i32 IntRegs:$src2), + u2ImmPred:$src3)))]>, Requires<[HasV4T]>; // memw(Ru<<#u2+#U6)=Rt @@ -1489,8 +2356,9 @@ let AddedComplexity = 10 in def STriw_shl_V4 : STInst<(outs), (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), "memw($src1<<#$src2+#$src3) = $src4", - [(store IntRegs:$src4, (shl IntRegs:$src1, - (add u2ImmPred:$src2, u6ImmPred:$src3)))]>, + [(store (i32 IntRegs:$src4), + (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), + u6ImmPred:$src3))]>, Requires<[HasV4T]>; // memw(Rx++#s4:2)=Rt @@ -1502,37 +2370,39 @@ def STriw_shl_V4 : STInst<(outs), // Store word conditionally. -// if ([!]Pv[.new]) memw(#u6)=Rt -// TODO: Needs to be implemented. // if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6 // if (Pv) memw(Rs+#u6:2)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_imm_cPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_imm_cPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), "if ($src1) memw($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (Pv.new) memw(Rs+#u6:2)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_imm_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_imm_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), "if ($src1.new) memw($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (!Pv) memw(Rs+#u6:2)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_imm_cNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_imm_cNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), "if (!$src1) memw($src2+#$src3) = #$src4", []>, Requires<[HasV4T]>; // if (!Pv.new) memw(Rs+#u6:2)=#S6 -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_imm_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_imm_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), "if (!$src1.new) memw($src2+#$src3) = #$src4", []>, @@ -1541,8 +2411,9 @@ def STriw_imm_cdnNotPt_V4 : STInst<(outs), // if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt // if (Pv) memw(Rs+#u6:2)=Rt // if (Pv.new) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1.new) memw($addr) = $src2", []>, @@ -1550,8 +2421,9 @@ def STriw_cdnPt_V4 : STInst<(outs), // if (!Pv) memw(Rs+#u6:2)=Rt // if (!Pv.new) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1.new) memw($addr) = $src2", []>, @@ -1560,77 +2432,526 @@ def STriw_cdnNotPt_V4 : STInst<(outs), // if (Pv) memw(Rs+#u6:2)=Rt // if (!Pv) memw(Rs+#u6:2)=Rt // if (Pv.new) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_indexed_cdnPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_indexed_cdnPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if ($src1.new) memw($src2+#$src3) = $src4", []>, Requires<[HasV4T]>; // if (!Pv.new) memw(Rs+#u6:2)=Rt -let mayStore = 1, neverHasSideEffects = 1 in -def STriw_indexed_cdnNotPt_V4 : STInst<(outs), +let neverHasSideEffects = 1, + isPredicated = 1 in +def STriw_indexed_cdnNotPt_V4 : STInst2<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if (!$src1.new) memw($src2+#$src3) = $src4", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt -// if (Pv) memw(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if ($src1) memw($src2+$src3<<#$src4) = $src5", +// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt +// if (Pv) memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt +// if (Pv) memw(Rx++#s4:2)=Rt +// if (Pv.new) memw(Rx++#s4:2)=Rt +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cdnPt_V4 : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Rt +// if (!Pv.new) memw(Rx++#s4:2)=Rt +let hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +/// store to global address + +let isPredicable = 1, neverHasSideEffects = 1 in +def STrid_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), + "memd(#$global+$offset) = $src", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrid_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + DoubleRegs:$src2), + "if ($src1) memd(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrid_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + DoubleRegs:$src2), + "if (!$src1) memd(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrid_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + DoubleRegs:$src2), + "if ($src1.new) memd(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrid_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + DoubleRegs:$src2), + "if (!$src1.new) memd(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def STrib_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memb(#$global+$offset) = $src", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memb(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memb(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memb(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrib_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memb(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def STrih_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memh(#$global+$offset) = $src", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memh(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memh(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memh(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STrih_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memh(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def STriw_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memw(#$global+$offset) = $src", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memw(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memw(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memw(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1, isPredicated = 1 in +def STriw_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memw(##$global+$offset) = $src2", + []>, + Requires<[HasV4T]>; + +// memd(#global)=Rtt +let isPredicable = 1, neverHasSideEffects = 1 in +def STd_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, DoubleRegs:$src), + "memd(#$global) = $src", []>, Requires<[HasV4T]>; -// if (Pv.new) memw(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cdnPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if ($src1.new) memw($src2+$src3<<#$src4) = $src5", +// if (Pv) memd(##global) = Rtt +let neverHasSideEffects = 1, isPredicated = 1 in +def STd_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), + "if ($src1) memd(##$global) = $src2", []>, Requires<[HasV4T]>; -// if (!Pv) memw(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if (!$src1) memw($src2+$src3<<#$src4) = $src5", +// if (!Pv) memd(##global) = Rtt +let neverHasSideEffects = 1, isPredicated = 1 in +def STd_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), + "if (!$src1) memd(##$global) = $src2", []>, - Requires<[HasV4T]>; + Requires<[HasV4T]>; -// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5", +// if (Pv) memd(##global) = Rtt +let neverHasSideEffects = 1, isPredicated = 1 in +def STd_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), + "if ($src1.new) memd(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(##global) = Rtt +let neverHasSideEffects = 1, isPredicated = 1 in +def STd_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), + "if (!$src1.new) memd(##$global) = $src2", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt -// if (Pv) memw(Rx++#s4:2)=Rt -// if (Pv.new) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, +// memb(#global)=Rt +let isPredicable = 1, neverHasSideEffects = 1 in +def STb_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memb(#$global) = $src", + []>, Requires<[HasV4T]>; -// if (!Pv) memw(Rx++#s4:2)=Rt -// if (!Pv.new) memw(Rx++#s4:2)=Rt -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2", - [],"$src3 = $dst">, +// if (Pv) memb(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STb_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memb(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STb_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memb(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STb_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memb(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STb_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memb(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// memh(#global)=Rt +let isPredicable = 1, neverHasSideEffects = 1 in +def STh_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memh(#$global) = $src", + []>, Requires<[HasV4T]>; +// if (Pv) memh(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STh_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memh(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STh_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memh(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memh(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STh_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memh(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STh_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memh(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// memw(#global)=Rt +let isPredicable = 1, neverHasSideEffects = 1 in +def STw_GP_V4 : STInst2<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memw(#$global) = $src", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STw_GP_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memw(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STw_GP_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memw(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STw_GP_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memw(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(##global) = Rt +let neverHasSideEffects = 1, isPredicated = 1 in +def STw_GP_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memw(##$global) = $src2", + []>, + Requires<[HasV4T]>; + +// 64 bit atomic store +def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), + (i64 DoubleRegs:$src1)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memd(#foo) +let AddedComplexity = 100 in +def : Pat <(store (i64 DoubleRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// 8 bit atomic store +def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" +// to "r0 = 1; memw(#foo) = r0" +let AddedComplexity = 100 in +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// 32 bit atomic store +def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i64 DoubleRegs:$src1)), + (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset), + (i32 IntRegs:$src1)), + (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memd(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i64 DoubleRegs:$src1), + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memb(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memh(#foo + x) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress + x) -> memw(#foo + x) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, + (i32 IntRegs:$src1))>, + Requires<[HasV4T]>; + + //===----------------------------------------------------------------------=== // ST - @@ -1696,11 +3017,19 @@ def STrib_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; +// memb(#global)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memb(#$global) = $src.new", + []>, + Requires<[HasV4T]>; // Store new-value byte conditionally. // if ([!]Pv[.new]) memb(#u6)=Nt.new // if (Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memb($addr) = $src2.new", @@ -1708,7 +3037,8 @@ def STrib_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1.new) memb($addr) = $src2.new", @@ -1716,7 +3046,8 @@ def STrib_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memb($addr) = $src2.new", @@ -1724,7 +3055,8 @@ def STrib_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1.new) memb($addr) = $src2.new", @@ -1732,7 +3064,8 @@ def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if ($src1) memb($src2+#$src3) = $src4.new", @@ -1740,7 +3073,8 @@ def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if ($src1.new) memb($src2+#$src3) = $src4.new", @@ -1748,7 +3082,8 @@ def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if (!$src1) memb($src2+#$src3) = $src4.new", @@ -1756,7 +3091,8 @@ def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memb(Rs+#u6:0)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), "if (!$src1.new) memb($src2+#$src3) = $src4.new", @@ -1766,7 +3102,8 @@ def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new // if (Pv) memb(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1775,7 +3112,8 @@ def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1784,7 +3122,8 @@ def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memb(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1793,7 +3132,8 @@ def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1803,7 +3143,8 @@ def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new // if (Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if ($src1) memb($src3++#$offset) = $src2.new", @@ -1811,7 +3152,8 @@ def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if ($src1.new) memb($src3++#$offset) = $src2.new", @@ -1819,7 +3161,8 @@ def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if (!$src1) memb($src3++#$offset) = $src2.new", @@ -1827,7 +3170,8 @@ def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) memb(Rx++#s4:0)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), "if (!$src1.new) memb($src3++#$offset) = $src2.new", @@ -1889,6 +3233,14 @@ def STrih_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; +// memh(#global)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memh(#$global) = $src.new", + []>, + Requires<[HasV4T]>; + // Store new-value halfword conditionally. @@ -1896,7 +3248,8 @@ def STrih_GP_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new // if (Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memh($addr) = $src2.new", @@ -1904,7 +3257,8 @@ def STrih_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1.new) memh($addr) = $src2.new", @@ -1912,7 +3266,8 @@ def STrih_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memh($addr) = $src2.new", @@ -1920,7 +3275,8 @@ def STrih_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1.new) memh($addr) = $src2.new", @@ -1928,7 +3284,8 @@ def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if ($src1) memh($src2+#$src3) = $src4.new", @@ -1936,7 +3293,8 @@ def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if ($src1.new) memh($src2+#$src3) = $src4.new", @@ -1944,7 +3302,8 @@ def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if (!$src1) memh($src2+#$src3) = $src4.new", @@ -1952,7 +3311,8 @@ def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+#u6:1)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), "if (!$src1.new) memh($src2+#$src3) = $src4.new", @@ -1961,7 +3321,8 @@ def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new // if (Pv) memh(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1970,7 +3331,8 @@ def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1979,7 +3341,8 @@ def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memh(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1988,7 +3351,8 @@ def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -1998,7 +3362,8 @@ def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new // if (Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if ($src1) memh($src3++#$offset) = $src2.new", @@ -2006,7 +3371,8 @@ def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if ($src1.new) memh($src3++#$offset) = $src2.new", @@ -2014,7 +3380,8 @@ def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if (!$src1) memh($src3++#$offset) = $src2.new", @@ -2022,7 +3389,8 @@ def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), Requires<[HasV4T]>; // if (!Pv.new) memh(Rx++#s4:1)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), "if (!$src1.new) memh($src3++#$offset) = $src2.new", @@ -2085,6 +3453,12 @@ def STriw_GP_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; +let mayStore = 1, neverHasSideEffects = 1 in +def STw_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memw(#$global) = $src.new", + []>, + Requires<[HasV4T]>; // Store new-value word conditionally. @@ -2092,7 +3466,8 @@ def STriw_GP_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new // if (Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1) memw($addr) = $src2.new", @@ -2100,7 +3475,8 @@ def STriw_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if ($src1.new) memw($addr) = $src2.new", @@ -2108,7 +3484,8 @@ def STriw_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1) memw($addr) = $src2.new", @@ -2116,7 +3493,8 @@ def STriw_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), "if (!$src1.new) memw($addr) = $src2.new", @@ -2124,7 +3502,8 @@ def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if ($src1) memw($src2+#$src3) = $src4.new", @@ -2132,7 +3511,8 @@ def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if ($src1.new) memw($src2+#$src3) = $src4.new", @@ -2140,7 +3520,8 @@ def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if (!$src1) memw($src2+#$src3) = $src4.new", @@ -2148,7 +3529,8 @@ def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (!Pv.new) memw(Rs+#u6:2)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in +let mayStore = 1, neverHasSideEffects = 1, + isPredicated = 1 in def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), "if (!$src1.new) memw($src2+#$src3) = $src4.new", @@ -2158,7 +3540,8 @@ def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), // if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new // if (Pv) memw(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -2167,7 +3550,8 @@ def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), Requires<[HasV4T]>; // if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, IntRegs:$src5), @@ -2175,58 +3559,257 @@ def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), []>, Requires<[HasV4T]>; -// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new", +// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, + isPredicated = 1 in +def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new +// if (Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, + isPredicated = 1 in +def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + + +// if (Pv) memb(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memb(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memb(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memb(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memb(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memh(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memh(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memh(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memh(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memh(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memh(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1) memw(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1) memw(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if ($src1.new) memw(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(##global) = Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), + "if (!$src1.new) memw(##$global) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memb(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memb(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memb(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memb(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memh(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memh(##$global+$offset) = $src2.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memh(##$global+$offset) = $src2.new", []>, Requires<[HasV4T]>; -// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new -let mayStore = 1, AddedComplexity = 10 in -def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, - IntRegs:$src5), - "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new", +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memh(##$global+$offset) = $src2.new", []>, Requires<[HasV4T]>; -// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new -// if (Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1) memw(##$global+$offset) = $src2.new", + []>, Requires<[HasV4T]>; -// if (Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if ($src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1) memw(##$global+$offset) = $src2.new", + []>, Requires<[HasV4T]>; -// if (!Pv) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if ($src1.new) memw(##$global+$offset) = $src2.new", + []>, Requires<[HasV4T]>; -// if (!Pv.new) memw(Rx++#s4:2)=Nt.new -let mayStore = 1, hasCtrlDep = 1 in -def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), - "if (!$src1.new) memw($src3++#$offset) = $src2.new", - [],"$src3 = $dst">, +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset, + IntRegs:$src2), + "if (!$src1.new) memw(##$global+$offset) = $src2.new", + []>, Requires<[HasV4T]>; - //===----------------------------------------------------------------------===// // NV/ST - //===----------------------------------------------------------------------===// @@ -2253,7 +3836,8 @@ multiclass NVJ_type_basic_reg { Requires<[HasV4T]>; } -multiclass NVJ_type_basic_2ndDotNew { +multiclass NVJ_type_basic_2ndDotNew { def _ie_nv_V4 : NVInst_V4<(outs), (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, @@ -2307,7 +3891,8 @@ multiclass NVJ_type_basic_neg { Requires<[HasV4T]>; } -multiclass NVJ_type_basic_tstbit { +multiclass NVJ_type_basic_tstbit { def _ie_nv_V4 : NVInst_V4<(outs), (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset), !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr, @@ -2416,16 +4001,18 @@ let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), "$dst = add($src1, add($src2, #$src3))", - [(set IntRegs:$dst, - (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>, + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2), + s6ImmPred:$src3)))]>, Requires<[HasV4T]>; // Rd=add(Rs,sub(#s6,Ru)) def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", - [(set IntRegs:$dst, - (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>, + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2, + (i32 IntRegs:$src3))))]>, Requires<[HasV4T]>; // Generates the same instruction as ADDr_SUBri_V4 but matches different @@ -2434,8 +4021,9 @@ def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), "$dst = add($src1, sub(#$src2, $src3))", - [(set IntRegs:$dst, - (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>, + [(set (i32 IntRegs:$dst), + (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2), + (i32 IntRegs:$src3)))]>, Requires<[HasV4T]>; @@ -2451,16 +4039,16 @@ def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = and($src1, ~$src2)", - [(set DoubleRegs:$dst, (and DoubleRegs:$src1, - (not DoubleRegs:$src2)))]>, + [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1), + (not (i64 DoubleRegs:$src2))))]>, Requires<[HasV4T]>; // Rdd=or(Rtt,~Rss) def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), "$dst = or($src1, ~$src2)", - [(set DoubleRegs:$dst, - (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>, + [(set (i64 DoubleRegs:$dst), + (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>, Requires<[HasV4T]>; @@ -2469,8 +4057,9 @@ def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), "$dst ^= xor($src2, $src3)", - [(set DoubleRegs:$dst, - (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))], + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2), + (i64 DoubleRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2480,8 +4069,9 @@ def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), "$dst = or($src1, and($src2, #$src3))", - [(set IntRegs:$dst, - (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ImmPred:$src3)))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2490,8 +4080,9 @@ def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, $src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2499,8 +4090,9 @@ def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, $src3)", - [(set IntRegs:$dst, - (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2508,8 +4100,9 @@ def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, $src3)", - [(set IntRegs:$dst, - (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2518,8 +4111,9 @@ def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= and($src2, ~$src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2527,8 +4121,9 @@ def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= and($src2, ~$src3)", - [(set IntRegs:$dst, - (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2536,8 +4131,9 @@ def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= and($src2, ~$src3)", - [(set IntRegs:$dst, - (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + (not (i32 IntRegs:$src3)))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2546,8 +4142,9 @@ def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= or($src2, $src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2555,8 +4152,9 @@ def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= or($src2, $src3)", - [(set IntRegs:$dst, - (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2564,8 +4162,9 @@ def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= or($src2, $src3)", - [(set IntRegs:$dst, - (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2574,8 +4173,9 @@ def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst &= xor($src2, $src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2583,8 +4183,9 @@ def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst |= xor($src2, $src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2592,8 +4193,9 @@ def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), "$dst ^= xor($src2, $src3)", - [(set IntRegs:$dst, - (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2601,8 +4203,9 @@ def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), "$dst |= and($src2, #$src3)", - [(set IntRegs:$dst, - (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ImmPred:$src3)))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2610,8 +4213,9 @@ def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), "$dst |= or($src2, #$src3)", - [(set IntRegs:$dst, - (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + [(set (i32 IntRegs:$dst), + (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2), + s10ImmPred:$src3)))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2663,8 +4267,9 @@ def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3), "$dst = add(#$src1, mpyi($src2, #$src3))", - [(set IntRegs:$dst, - (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>, + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3), + u6ImmPred:$src1))]>, Requires<[HasV4T]>; // Rd=add(#u6,mpyi(Rs,Rt)) @@ -2672,32 +4277,36 @@ def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add(#$src1, mpyi($src2, $src3))", - [(set IntRegs:$dst, - (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>, + [(set (i32 IntRegs:$dst), + (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)), + u6ImmPred:$src1))]>, Requires<[HasV4T]>; // Rd=add(Ru,mpyi(#u6:2,Rs)) def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), "$dst = add($src1, mpyi(#$src2, $src3))", - [(set IntRegs:$dst, - (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>, + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), + u6_2ImmPred:$src2)))]>, Requires<[HasV4T]>; // Rd=add(Ru,mpyi(Rs,#u6)) def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3), "$dst = add($src1, mpyi($src2, #$src3))", - [(set IntRegs:$dst, - (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>, + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + u6ImmPred:$src3)))]>, Requires<[HasV4T]>; // Rx=add(Ru,mpyi(Rx,Rs)) def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), "$dst = add($src1, mpyi($src2, $src3))", - [(set IntRegs:$dst, - (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))], + [(set (i32 IntRegs:$dst), + (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2), + (i32 IntRegs:$src3))))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2745,8 +4354,9 @@ def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, asl($src2, #$src3))", - [(set IntRegs:$dst, - (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2754,8 +4364,9 @@ def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = add(#$src1, lsr($src2, #$src3))", - [(set IntRegs:$dst, - (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2763,8 +4374,9 @@ def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, asl($src2, #$src3))", - [(set IntRegs:$dst, - (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2772,8 +4384,9 @@ def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = sub(#$src1, lsr($src2, #$src3))", - [(set IntRegs:$dst, - (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2783,8 +4396,9 @@ def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, asl($src2, #$src3))", - [(set IntRegs:$dst, - (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2792,26 +4406,31 @@ def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = and(#$src1, lsr($src2, #$src3))", - [(set IntRegs:$dst, - (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,asl(Rx,#U5)) +let AddedComplexity = 30 in def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, asl($src2, #$src3))", - [(set IntRegs:$dst, - (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; //Rx=or(#u8,lsr(Rx,#U5)) +let AddedComplexity = 30 in def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), "$dst = or(#$src1, lsr($src2, #$src3))", - [(set IntRegs:$dst, - (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + [(set (i32 IntRegs:$dst), + (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3), + u8ImmPred:$src1))], "$src2 = $dst">, Requires<[HasV4T]>; @@ -2820,7 +4439,8 @@ def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), //Rd=lsl(#s6,Rt) def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), "$dst = lsl(#$src1, $src2)", - [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>, + [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1, + (i32 IntRegs:$src2)))]>, Requires<[HasV4T]>; @@ -2829,8 +4449,9 @@ def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), "$dst ^= asl($src2, $src3)", - [(set DoubleRegs:$dst, - (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2838,8 +4459,9 @@ def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), "$dst ^= asr($src2, $src3)", - [(set DoubleRegs:$dst, - (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))], + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2847,8 +4469,9 @@ def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), "$dst ^= lsl($src2, $src3)", - [(set DoubleRegs:$dst, - (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1), + (shl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2856,8 +4479,9 @@ def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), "$dst ^= lsr($src2, $src3)", - [(set DoubleRegs:$dst, - (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))], + [(set (i64 DoubleRegs:$dst), + (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2), + (i32 IntRegs:$src3))))], "$src1 = $dst">, Requires<[HasV4T]>; @@ -2903,16 +4527,16 @@ let AddedComplexity = 30 in def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend), "Error; should not emit", - [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), -m6ImmPred:$addend), - (add IntRegs:$base, u6_2ImmPred:$offset))]>, + [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), + m6ImmPred:$addend), + (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memw(Rs+#u6:2) += #U5 let AddedComplexity = 30 in def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend), - "memw($base+#$offset) += $addend", + "memw($base+#$offset) += #$addend", []>, Requires<[HasV4T, UseMEMOP]>; @@ -2920,7 +4544,7 @@ def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), let AddedComplexity = 30 in def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend), - "memw($base+#$offset) -= $subend", + "memw($base+#$offset) -= #$subend", []>, Requires<[HasV4T, UseMEMOP]>; @@ -2929,9 +4553,9 @@ let AddedComplexity = 30 in def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend), "memw($base+#$offset) += $addend", - [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), -IntRegs:$addend), - (add IntRegs:$base, u6_2ImmPred:$offset))]>, + [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), + (i32 IntRegs:$addend)), + (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memw(Rs+#u6:2) -= Rt @@ -2939,19 +4563,19 @@ let AddedComplexity = 30 in def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend), "memw($base+#$offset) -= $subend", - [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)), -IntRegs:$subend), - (add IntRegs:$base, u6_2ImmPred:$offset))]>, + [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), + (i32 IntRegs:$subend)), + (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memw(Rs+#u6:2) &= Rt let AddedComplexity = 30 in def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend), - "memw($base+#$offset) += $andend", - [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)), -IntRegs:$andend), - (add IntRegs:$base, u6_2ImmPred:$offset))]>, + "memw($base+#$offset) &= $andend", + [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), + (i32 IntRegs:$andend)), + (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memw(Rs+#u6:2) |= Rt @@ -2959,9 +4583,9 @@ let AddedComplexity = 30 in def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend), "memw($base+#$offset) |= $orend", - [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)), - IntRegs:$orend), - (add IntRegs:$base, u6_2ImmPred:$offset))]>, + [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), + (i32 IntRegs:$orend)), + (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // MEMw_ADDSUBi_V4: @@ -2996,7 +4620,7 @@ let AddedComplexity = 30 in def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$addend), "memw($addr) += $addend", - [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend), + [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)), ADDRriU6_2:$addr)]>, Requires<[HasV4T, UseMEMOP]>; @@ -3005,7 +4629,7 @@ let AddedComplexity = 30 in def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$subend), "memw($addr) -= $subend", - [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend), + [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)), ADDRriU6_2:$addr)]>, Requires<[HasV4T, UseMEMOP]>; @@ -3014,7 +4638,7 @@ let AddedComplexity = 30 in def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$andend), "memw($addr) &= $andend", - [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend), + [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)), ADDRriU6_2:$addr)]>, Requires<[HasV4T, UseMEMOP]>; @@ -3023,8 +4647,8 @@ let AddedComplexity = 30 in def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$orend), "memw($addr) |= $orend", - [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend), -ADDRriU6_2:$addr)]>, + [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)), + ADDRriU6_2:$addr)]>, Requires<[HasV4T, UseMEMOP]>; //===----------------------------------------------------------------------===// @@ -3060,10 +4684,10 @@ let AddedComplexity = 30 in def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend), "Error; should not emit", - [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base), u6_1ImmPred:$offset)), m6ImmPred:$addend), - (add IntRegs:$base, u6_1ImmPred:$offset))]>, + (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) += #U5 @@ -3087,10 +4711,10 @@ let AddedComplexity = 30 in def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend), "memh($base+#$offset) += $addend", - [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base), u6_1ImmPred:$offset)), - IntRegs:$addend), - (add IntRegs:$base, u6_1ImmPred:$offset))]>, + (i32 IntRegs:$addend)), + (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) -= Rt @@ -3098,10 +4722,10 @@ let AddedComplexity = 30 in def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend), "memh($base+#$offset) -= $subend", - [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base, + [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base), u6_1ImmPred:$offset)), - IntRegs:$subend), - (add IntRegs:$base, u6_1ImmPred:$offset))]>, + (i32 IntRegs:$subend)), + (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) &= Rt @@ -3109,10 +4733,10 @@ let AddedComplexity = 30 in def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend), "memh($base+#$offset) += $andend", - [(truncstorei16 (and (sextloadi16 (add IntRegs:$base, + [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base), u6_1ImmPred:$offset)), - IntRegs:$andend), - (add IntRegs:$base, u6_1ImmPred:$offset))]>, + (i32 IntRegs:$andend)), + (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) |= Rt @@ -3120,10 +4744,10 @@ let AddedComplexity = 30 in def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend), "memh($base+#$offset) |= $orend", - [(truncstorei16 (or (sextloadi16 (add IntRegs:$base, + [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base), u6_1ImmPred:$offset)), - IntRegs:$orend), - (add IntRegs:$base, u6_1ImmPred:$offset))]>, + (i32 IntRegs:$orend)), + (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // MEMh_ADDSUBi_V4: @@ -3159,7 +4783,7 @@ def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$addend), "memh($addr) += $addend", [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), - IntRegs:$addend), ADDRriU6_1:$addr)]>, + (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) -= Rt @@ -3168,7 +4792,7 @@ def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$subend), "memh($addr) -= $subend", [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr), - IntRegs:$subend), ADDRriU6_1:$addr)]>, + (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) &= Rt @@ -3177,7 +4801,7 @@ def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$andend), "memh($addr) &= $andend", [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr), - IntRegs:$andend), ADDRriU6_1:$addr)]>, + (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memh(Rs+#u6:1) |= Rt @@ -3186,7 +4810,7 @@ def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$orend), "memh($addr) |= $orend", [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr), - IntRegs:$orend), ADDRriU6_1:$addr)]>, + (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>, Requires<[HasV4T, UseMEMOP]>; @@ -3223,10 +4847,10 @@ let AddedComplexity = 30 in def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend), "Error; should not emit", - [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base), u6_0ImmPred:$offset)), m6ImmPred:$addend), - (add IntRegs:$base, u6_0ImmPred:$offset))]>, + (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) += #U5 @@ -3250,10 +4874,10 @@ let AddedComplexity = 30 in def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend), "memb($base+#$offset) += $addend", - [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base), u6_0ImmPred:$offset)), - IntRegs:$addend), - (add IntRegs:$base, u6_0ImmPred:$offset))]>, + (i32 IntRegs:$addend)), + (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) -= Rt @@ -3261,10 +4885,10 @@ let AddedComplexity = 30 in def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend), "memb($base+#$offset) -= $subend", - [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base, + [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base), u6_0ImmPred:$offset)), - IntRegs:$subend), - (add IntRegs:$base, u6_0ImmPred:$offset))]>, + (i32 IntRegs:$subend)), + (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) &= Rt @@ -3272,10 +4896,10 @@ let AddedComplexity = 30 in def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend), "memb($base+#$offset) += $andend", - [(truncstorei8 (and (sextloadi8 (add IntRegs:$base, + [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base), u6_0ImmPred:$offset)), - IntRegs:$andend), - (add IntRegs:$base, u6_0ImmPred:$offset))]>, + (i32 IntRegs:$andend)), + (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) |= Rt @@ -3283,10 +4907,10 @@ let AddedComplexity = 30 in def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend), "memb($base+#$offset) |= $orend", - [(truncstorei8 (or (sextloadi8 (add IntRegs:$base, + [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base), u6_0ImmPred:$offset)), - IntRegs:$orend), - (add IntRegs:$base, u6_0ImmPred:$offset))]>, + (i32 IntRegs:$orend)), + (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, Requires<[HasV4T, UseMEMOP]>; // MEMb_ADDSUBi_V4: @@ -3322,7 +4946,7 @@ def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$addend), "memb($addr) += $addend", [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), - IntRegs:$addend), ADDRriU6_0:$addr)]>, + (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) -= Rt @@ -3331,7 +4955,7 @@ def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$subend), "memb($addr) -= $subend", [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr), - IntRegs:$subend), ADDRriU6_0:$addr)]>, + (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) &= Rt @@ -3340,7 +4964,7 @@ def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$andend), "memb($addr) &= $andend", [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr), - IntRegs:$andend), ADDRriU6_0:$addr)]>, + (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>, Requires<[HasV4T, UseMEMOP]>; // memb(Rs+#u6:0) |= Rt @@ -3349,7 +4973,7 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), (ins MEMri:$addr, IntRegs:$orend), "memb($addr) |= $orend", [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr), - IntRegs:$orend), ADDRriU6_0:$addr)]>, + (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>, Requires<[HasV4T, UseMEMOP]>; @@ -3364,13 +4988,16 @@ def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), // The implemented patterns are: EQ/GT/GTU. // Missing patterns are: GE/GEU/LT/LTU/LE/LEU. +// Following instruction is not being extended as it results into the +// incorrect code for negative numbers. // Pd=cmpb.eq(Rs,#u8) + let isCompare = 1 in def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), "$dst = cmpb.eq($src1, #$src2)", - [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255), - u8ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), + (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>, Requires<[HasV4T]>; // Pd=cmpb.eq(Rs,Rt) @@ -3378,10 +5005,9 @@ let isCompare = 1 in def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", - [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, - IntRegs:$src2), - 255), - 0))]>, + [(set (i1 PredRegs:$dst), + (seteq (and (xor (i32 IntRegs:$src1), + (i32 IntRegs:$src2)), 255), 0))]>, Requires<[HasV4T]>; // Pd=cmpb.eq(Rs,Rt) @@ -3389,17 +5015,9 @@ let isCompare = 1 in def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.eq($src1, $src2)", - [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)), - (shl IntRegs:$src2, (i32 24))))]>, - Requires<[HasV4T]>; - -// Pd=cmpb.gt(Rs,#s8) -let isCompare = 1 in -def CMPbGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s32Imm:$src2), - "$dst = cmpb.gt($src1, #$src2)", - [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), - s32_24ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), + (seteq (shl (i32 IntRegs:$src1), (i32 24)), + (shl (i32 IntRegs:$src2), (i32 24))))]>, Requires<[HasV4T]>; // Pd=cmpb.gt(Rs,Rt) @@ -3407,8 +5025,9 @@ let isCompare = 1 in def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gt($src1, $src2)", - [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), - (shl IntRegs:$src2, (i32 24))))]>, + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 24)), + (shl (i32 IntRegs:$src2), (i32 24))))]>, Requires<[HasV4T]>; // Pd=cmpb.gtu(Rs,#u7) @@ -3416,8 +5035,8 @@ let isCompare = 1 in def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u7Imm:$src2), "$dst = cmpb.gtu($src1, #$src2)", - [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), - u7ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), + u7ImmPred:$src2))]>, Requires<[HasV4T]>; // Pd=cmpb.gtu(Rs,Rt) @@ -3425,18 +5044,21 @@ let isCompare = 1 in def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmpb.gtu($src1, $src2)", - [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), - (and IntRegs:$src2, 255)))]>, + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255), + (and (i32 IntRegs:$src2), 255)))]>, Requires<[HasV4T]>; +// Following instruction is not being extended as it results into the incorrect +// code for negative numbers. + // Signed half compare(.eq) ri. // Pd=cmph.eq(Rs,#s8) let isCompare = 1 in def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, u16Imm:$src2), + (ins IntRegs:$src1, s8Imm:$src2), "$dst = cmph.eq($src1, #$src2)", - [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535), - u16_s8ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535), + s8ImmPred:$src2))]>, Requires<[HasV4T]>; // Signed half compare(.eq) rr. @@ -3449,10 +5071,9 @@ let isCompare = 1 in def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", - [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, - IntRegs:$src2), - 65535), - 0))]>, + [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1), + (i32 IntRegs:$src2)), + 65535), 0))]>, Requires<[HasV4T]>; // Signed half compare(.eq) rr. @@ -3465,19 +5086,25 @@ let isCompare = 1 in def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.eq($src1, $src2)", - [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)), - (shl IntRegs:$src2, (i32 16))))]>, + [(set (i1 PredRegs:$dst), + (seteq (shl (i32 IntRegs:$src1), (i32 16)), + (shl (i32 IntRegs:$src2), (i32 16))))]>, Requires<[HasV4T]>; +/* Incorrect Pattern -- immediate should be right shifted before being +used in the cmph.gt instruction. // Signed half compare(.gt) ri. // Pd=cmph.gt(Rs,#s8) + let isCompare = 1 in def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), - (ins IntRegs:$src1, s32Imm:$src2), + (ins IntRegs:$src1, s8Imm:$src2), "$dst = cmph.gt($src1, #$src2)", - [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), - s32_16s8ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 16)), + s8ImmPred:$src2))]>, Requires<[HasV4T]>; +*/ // Signed half compare(.gt) rr. // Pd=cmph.gt(Rs,Rt) @@ -3485,8 +5112,9 @@ let isCompare = 1 in def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gt($src1, $src2)", - [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), - (shl IntRegs:$src2, (i32 16))))]>, + [(set (i1 PredRegs:$dst), + (setgt (shl (i32 IntRegs:$src1), (i32 16)), + (shl (i32 IntRegs:$src2), (i32 16))))]>, Requires<[HasV4T]>; // Unsigned half compare rr (.gtu). @@ -3495,8 +5123,9 @@ let isCompare = 1 in def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), "$dst = cmph.gtu($src1, $src2)", - [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), - (and IntRegs:$src2, 65535)))]>, + [(set (i1 PredRegs:$dst), + (setugt (and (i32 IntRegs:$src1), 65535), + (and (i32 IntRegs:$src2), 65535)))]>, Requires<[HasV4T]>; // Unsigned half compare ri (.gtu). @@ -3505,8 +5134,8 @@ let isCompare = 1 in def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u7Imm:$src2), "$dst = cmph.gtu($src1, #$src2)", - [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), - u7ImmPred:$src2))]>, + [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535), + u7ImmPred:$src2))]>, Requires<[HasV4T]>; //===----------------------------------------------------------------------===// @@ -3523,10 +5152,42 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1, Requires<[HasV4T]>; } +// Restore registers and dealloc return function call. +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC] in { + def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), + (ins calltarget:$dst), + "jump $dst // Restore_and_dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// Restore registers and dealloc frame before a tail call. +let isCall = 1, isBarrier = 1, + Defs = [R29, R30, R31, PC] in { + def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), + (ins calltarget:$dst), + "call $dst // Restore_and_dealloc_before_tailcall", + []>, + Requires<[HasV4T]>; +} + +// Save registers function call. +let isCall = 1, isBarrier = 1, + Uses = [R29, R31] in { + def SAVE_REGISTERS_CALL_V4 : JInst<(outs), + (ins calltarget:$dst), + "call $dst // Save_calle_saved_registers", + []>, + Requires<[HasV4T]>; +} + // if (Ps) dealloc_return let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { - def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { + def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, i32imm:$amt1), "if ($src1) dealloc_return", []>, Requires<[HasV4T]>; @@ -3534,7 +5195,8 @@ let isReturn = 1, isTerminator = 1, // if (!Ps) dealloc_return let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), "if (!$src1) dealloc_return", @@ -3544,7 +5206,8 @@ let isReturn = 1, isTerminator = 1, // if (Ps.new) dealloc_return:nt let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), "if ($src1.new) dealloc_return:nt", @@ -3554,7 +5217,8 @@ let isReturn = 1, isTerminator = 1, // if (!Ps.new) dealloc_return:nt let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), "if (!$src1.new) dealloc_return:nt", @@ -3564,7 +5228,8 @@ let isReturn = 1, isTerminator = 1, // if (Ps.new) dealloc_return:t let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), "if ($src1.new) dealloc_return:t", @@ -3574,10 +5239,539 @@ let isReturn = 1, isTerminator = 1, // if (!Ps.new) dealloc_return:nt let isReturn = 1, isTerminator = 1, - Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1, + isPredicated = 1 in { def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), "if (!$src1.new) dealloc_return:t", []>, Requires<[HasV4T]>; } + + +// Load/Store with absolute addressing mode +// memw(#u6)=Rt + +multiclass ST_abs { + let isPredicable = 1 in + def _abs_V4 : STInst2<(outs), + (ins globaladdress:$absaddr, IntRegs:$src), + !strconcat(OpcStr, "(##$absaddr) = $src"), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if ($src1)", + !strconcat(OpcStr, "(##$absaddr) = $src2")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if (!$src1)", + !strconcat(OpcStr, "(##$absaddr) = $src2")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if ($src1.new)", + !strconcat(OpcStr, "(##$absaddr) = $src2")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if (!$src1.new)", + !strconcat(OpcStr, "(##$absaddr) = $src2")), + []>, + Requires<[HasV4T]>; + + def _abs_nv_V4 : STInst2<(outs), + (ins globaladdress:$absaddr, IntRegs:$src), + !strconcat(OpcStr, "(##$absaddr) = $src.new"), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if ($src1)", + !strconcat(OpcStr, "(##$absaddr) = $src2.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if (!$src1)", + !strconcat(OpcStr, "(##$absaddr) = $src2.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if ($src1.new)", + !strconcat(OpcStr, "(##$absaddr) = $src2.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2), + !strconcat("if (!$src1.new)", + !strconcat(OpcStr, "(##$absaddr) = $src2.new")), + []>, + Requires<[HasV4T]>; +} + +let AddedComplexity = 30, isPredicable = 1 in +def STrid_abs_V4 : STInst<(outs), + (ins globaladdress:$absaddr, DoubleRegs:$src), + "memd(##$absaddr) = $src", + [(store (i64 DoubleRegs:$src), + (HexagonCONST32 tglobaladdr:$absaddr))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def STrid_abs_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), + "if ($src1) memd(##$absaddr) = $src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def STrid_abs_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), + "if (!$src1) memd(##$absaddr) = $src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def STrid_abs_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), + "if ($src1.new) memd(##$absaddr) = $src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def STrid_abs_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2), + "if (!$src1.new) memd(##$absaddr) = $src2", + []>, + Requires<[HasV4T]>; + +defm STrib : ST_abs<"memb">; +defm STrih : ST_abs<"memh">; +defm STriw : ST_abs<"memw">; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32 tglobaladdr:$absaddr)), + (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)), + (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>; + + +multiclass LD_abs { + let isPredicable = 1 in + def _abs_V4 : LDInst2<(outs IntRegs:$dst), + (ins globaladdress:$absaddr), + !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + !strconcat("if ($src1) $dst = ", + !strconcat(OpcStr, "(##$absaddr)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + !strconcat("if (!$src1) $dst = ", + !strconcat(OpcStr, "(##$absaddr)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + !strconcat("if ($src1.new) $dst = ", + !strconcat(OpcStr, "(##$absaddr)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + !strconcat("if (!$src1.new) $dst = ", + !strconcat(OpcStr, "(##$absaddr)")), + []>, + Requires<[HasV4T]>; +} + +let AddedComplexity = 30 in +def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst), + (ins globaladdress:$absaddr), + "$dst = memd(##$absaddr)", + [(set (i64 DoubleRegs:$dst), + (load (HexagonCONST32 tglobaladdr:$absaddr)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def LDrid_abs_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + "if ($src1) $dst = memd(##$absaddr)", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def LDrid_abs_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + "if (!$src1) $dst = memd(##$absaddr)", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def LDrid_abs_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + "if ($src1.new) $dst = memd(##$absaddr)", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 30, isPredicated = 1 in +def LDrid_abs_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, globaladdress:$absaddr), + "if (!$src1.new) $dst = memd(##$absaddr)", + []>, + Requires<[HasV4T]>; + +defm LDrib : LD_abs<"memb">; +defm LDriub : LD_abs<"memub">; +defm LDrih : LD_abs<"memh">; +defm LDriuh : LD_abs<"memuh">; +defm LDriw : LD_abs<"memw">; + + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriw_abs_V4 tglobaladdr: $absaddr)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDrib_abs_V4 tglobaladdr:$absaddr)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriub_abs_V4 tglobaladdr:$absaddr)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDrih_abs_V4 tglobaladdr:$absaddr)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), + (LDriuh_abs_V4 tglobaladdr:$absaddr)>; + +// Transfer global address into a register +let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in +def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1), + "$dst = ##$src1", + [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, + Requires<[HasV4T]>; + +let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in +def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$src2), + "if($src1) $dst = ##$src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in +def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$src2), + "if(!$src1) $dst = ##$src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in +def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$src2), + "if($src1.new) $dst = ##$src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in +def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, globaladdress:$src2), + "if(!$src1.new) $dst = ##$src2", + []>, + Requires<[HasV4T]>; + +let AddedComplexity = 50, Predicates = [HasV4T] in +def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), + (TFRI_V4 tglobaladdr:$src1)>; + + +// Load - Indirect with long offset: These instructions take global address +// as an operand +let AddedComplexity = 10 in +def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + "$dst=memd($src1<<#$src2+##$offset)", + [(set (i64 DoubleRegs:$dst), + (load (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10 in +multiclass LD_indirect_lo { + def _lo_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + !strconcat("$dst = ", + !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), + [(set IntRegs:$dst, + (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$offset)))))]>, + Requires<[HasV4T]>; +} + +defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; +defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; +defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; +defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; +defm LDriw_ind : LD_indirect_lo<"memw", load>; + +// Store - Indirect with long offset: These instructions take global address +// as an operand +let AddedComplexity = 10 in +def STrid_ind_lo_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, + DoubleRegs:$src4), + "memd($src1<<#$src2+#$src3) = $src4", + [(store (i64 DoubleRegs:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10 in +multiclass ST_indirect_lo { + def _lo_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, + IntRegs:$src4), + !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"), + [(OpNode (i32 IntRegs:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (HexagonCONST32 tglobaladdr:$src3)))]>, + Requires<[HasV4T]>; +} + +defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>; +defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>; +defm STriw_ind : ST_indirect_lo<"memw", store>; + +// Store - absolute addressing mode: These instruction take constant +// value as the extended operand +multiclass ST_absimm { + let isPredicable = 1 in + def _abs_V4 : STInst2<(outs), + (ins u6Imm:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(#$src1) = $src2"), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if ($src1.new)", + !strconcat(OpcStr, "(#$src2) = $src3")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if (!$src1.new)", + !strconcat(OpcStr, "(#$src2) = $src3")), + []>, + Requires<[HasV4T]>; + + def _abs_nv_V4 : STInst2<(outs), + (ins u6Imm:$src1, IntRegs:$src2), + !strconcat(OpcStr, "(#$src1) = $src2.new"), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if ($src1)", + !strconcat(OpcStr, "(#$src2) = $src3.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if (!$src1)", + !strconcat(OpcStr, "(#$src2) = $src3.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if ($src1.new)", + !strconcat(OpcStr, "(#$src2) = $src3.new")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_nv_V4 : STInst2<(outs), + (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3), + !strconcat("if (!$src1.new)", + !strconcat(OpcStr, "(#$src2) = $src3.new")), + []>, + Requires<[HasV4T]>; +} + +defm STrib_imm : ST_absimm<"memb">; +defm STrih_imm : ST_absimm<"memh">; +defm STriw_imm : ST_absimm<"memw">; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2), + (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2), + (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2), + (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>; + + +// Load - absolute addressing mode: These instruction take constant +// value as the extended operand + +multiclass LD_absimm { + let isPredicable = 1 in + def _abs_V4 : LDInst2<(outs IntRegs:$dst), + (ins u6Imm:$src), + !strconcat("$dst = ", + !strconcat(OpcStr, "(#$src)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, u6Imm:$src2), + !strconcat("if ($src1) $dst = ", + !strconcat(OpcStr, "(#$src2)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, u6Imm:$src2), + !strconcat("if (!$src1) $dst = ", + !strconcat(OpcStr, "(#$src2)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, u6Imm:$src2), + !strconcat("if ($src1.new) $dst = ", + !strconcat(OpcStr, "(#$src2)")), + []>, + Requires<[HasV4T]>; + + let isPredicated = 1 in + def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), + (ins PredRegs:$src1, u6Imm:$src2), + !strconcat("if (!$src1.new) $dst = ", + !strconcat(OpcStr, "(#$src2)")), + []>, + Requires<[HasV4T]>; +} + +defm LDrib_imm : LD_absimm<"memb">; +defm LDriub_imm : LD_absimm<"memub">; +defm LDrih_imm : LD_absimm<"memh">; +defm LDriuh_imm : LD_absimm<"memuh">; +defm LDriw_imm : LD_absimm<"memw">; + +let Predicates = [HasV4T], AddedComplexity = 30 in +def : Pat<(i32 (load u6ImmPred:$src)), + (LDriw_imm_abs_V4 u6ImmPred:$src)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (sextloadi8 u6ImmPred:$src)), + (LDrib_imm_abs_V4 u6ImmPred:$src)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (zextloadi8 u6ImmPred:$src)), + (LDriub_imm_abs_V4 u6ImmPred:$src)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (sextloadi16 u6ImmPred:$src)), + (LDrih_imm_abs_V4 u6ImmPred:$src)>; + +let Predicates = [HasV4T], AddedComplexity=30 in +def : Pat<(i32 (zextloadi16 u6ImmPred:$src)), + (LDriuh_imm_abs_V4 u6ImmPred:$src)>; + + +// Indexed store double word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10 in +def STriw_offset_ext_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3), + "memw($src1+#$src2) = ##$src3", + [(store (HexagonCONST32 tglobaladdr:$src3), + (add IntRegs:$src1, u6_2ImmPred:$src2))]>, + Requires<[HasV4T]>; + + +// Indexed store double word - global address. +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10 in +def STrih_offset_ext_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3), + "memh($src1+#$src2) = ##$src3", + [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3), + (add IntRegs:$src1, u6_1ImmPred:$src2))]>, + Requires<[HasV4T]>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td new file mode 100644 index 0000000..92d098c --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td @@ -0,0 +1,626 @@ +def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [ + SDTCisVT<0, f32>, + SDTCisPtrTy<1>]>; +def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>; + +let isReMaterializable = 1, isMoveImm = 1 in +def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set (f32 IntRegs:$dst), + (HexagonFCONST32 tglobaladdr:$global))]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1), + "$dst = CONST64(#$src1)", + [(set DoubleRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1), + "$dst = CONST32(#$src1)", + [(set IntRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +// Transfer immediate float. +// Only works with single precision fp value. +// For double precision, use CONST64_float_real, as 64bit transfer +// can only hold 40-bit values - 32 from const ext + 8 bit immediate. +let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in +def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32imm:$src1), + "$dst = ##$src1", + [(set IntRegs:$dst, fpimm:$src1)]>, + Requires<[HasV5T]>; + +def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2), + "if ($src1) $dst = ##$src2", + []>, + Requires<[HasV5T]>; + +let isPredicated = 1 in +def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2), + "if (!$src1) $dst = ##$src2", + []>, + Requires<[HasV5T]>; + +// Convert single precision to double precision and vice-versa. +def CONVERT_sf2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2df($src)", + [(set DoubleRegs:$dst, (fextend IntRegs:$src))]>, + Requires<[HasV5T]>; + +def CONVERT_df2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2sf($src)", + [(set IntRegs:$dst, (fround DoubleRegs:$src))]>, + Requires<[HasV5T]>; + + +// Load. +def LDrid_f : LDInst<(outs DoubleRegs:$dst), + (ins MEMri:$addr), + "$dst = memd($addr)", + [(set DoubleRegs:$dst, (f64 (load ADDRriS11_3:$addr)))]>, + Requires<[HasV5T]>; + + +let AddedComplexity = 20 in +def LDrid_indexed_f : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s11_3Imm:$offset), + "$dst = memd($src1+#$offset)", + [(set DoubleRegs:$dst, (f64 (load (add IntRegs:$src1, + s11_3ImmPred:$offset))))]>, + Requires<[HasV5T]>; + +def LDriw_f : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), "$dst = memw($addr)", + [(set IntRegs:$dst, (f32 (load ADDRriS11_2:$addr)))]>, + Requires<[HasV5T]>; + + +let AddedComplexity = 20 in +def LDriw_indexed_f : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_2Imm:$offset), + "$dst = memw($src1+#$offset)", + [(set IntRegs:$dst, (f32 (load (add IntRegs:$src1, + s11_2ImmPred:$offset))))]>, + Requires<[HasV5T]>; + +// Store. +def STriw_f : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1", + [(store (f32 IntRegs:$src1), ADDRriS11_2:$addr)]>, + Requires<[HasV5T]>; + +let AddedComplexity = 10 in +def STriw_indexed_f : STInst<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3", + [(store (f32 IntRegs:$src3), + (add IntRegs:$src1, s11_2ImmPred:$src2))]>, + Requires<[HasV5T]>; + +def STrid_f : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memd($addr) = $src1", + [(store (f64 DoubleRegs:$src1), ADDRriS11_2:$addr)]>, + Requires<[HasV5T]>; + +// Indexed store double word. +let AddedComplexity = 10 in +def STrid_indexed_f : STInst<(outs), + (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), + "memd($src1+#$src2) = $src3", + [(store (f64 DoubleRegs:$src3), + (add IntRegs:$src1, s11_3ImmPred:$src2))]>, + Requires<[HasV5T]>; + + +// Add +let isCommutable = 1 in +def fADD_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfadd($src1, $src2)", + [(set IntRegs:$dst, (fadd IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfadd($src1, $src2)", + [(set DoubleRegs:$dst, (fadd DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +def fSUB_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfsub($src1, $src2)", + [(set IntRegs:$dst, (fsub IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +def fSUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfsub($src1, $src2)", + [(set DoubleRegs:$dst, (fsub DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fMUL_rr : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmpy($src1, $src2)", + [(set IntRegs:$dst, (fmul IntRegs:$src1, IntRegs:$src2))]>, + Requires<[HasV5T]>; + +let isCommutable = 1 in +def fMUL64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = dfmpy($src1, $src2)", + [(set DoubleRegs:$dst, (fmul DoubleRegs:$src1, + DoubleRegs:$src2))]>, + Requires<[HasV5T]>; + +// Compare. +let isCompare = 1 in { +multiclass FCMP64_rr { + def _rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, + (OpNode (f64 DoubleRegs:$b), (f64 DoubleRegs:$c)))]>, + Requires<[HasV5T]>; +} + +multiclass FCMP32_rr { + def _rr : ALU64_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, + (OpNode (f32 IntRegs:$b), (f32 IntRegs:$c)))]>, + Requires<[HasV5T]>; +} +} + +defm FCMPOEQ64 : FCMP64_rr<"dfcmp.eq", setoeq>; +defm FCMPUEQ64 : FCMP64_rr<"dfcmp.eq", setueq>; +defm FCMPOGT64 : FCMP64_rr<"dfcmp.gt", setogt>; +defm FCMPUGT64 : FCMP64_rr<"dfcmp.gt", setugt>; +defm FCMPOGE64 : FCMP64_rr<"dfcmp.ge", setoge>; +defm FCMPUGE64 : FCMP64_rr<"dfcmp.ge", setuge>; + +defm FCMPOEQ32 : FCMP32_rr<"sfcmp.eq", setoeq>; +defm FCMPUEQ32 : FCMP32_rr<"sfcmp.eq", setueq>; +defm FCMPOGT32 : FCMP32_rr<"sfcmp.gt", setogt>; +defm FCMPUGT32 : FCMP32_rr<"sfcmp.gt", setugt>; +defm FCMPOGE32 : FCMP32_rr<"sfcmp.ge", setoge>; +defm FCMPUGE32 : FCMP32_rr<"sfcmp.ge", setuge>; + +// olt. +def : Pat <(i1 (setolt (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPOGT32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPOGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setolt (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + (f64 DoubleRegs:$src1)))>, + Requires<[HasV5T]>; + +// gt. +def : Pat <(i1 (setugt (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT64_rr (f64 DoubleRegs:$src1), + (f64 (CONST64_Float_Real fpimm:$src2))))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setugt (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT32_rr (f32 IntRegs:$src1), (f32 (TFRI_f fpimm:$src2))))>, + Requires<[HasV5T]>; + +// ult. +def : Pat <(i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT32_rr (f32 (TFRI_f fpimm:$src2)), (f32 IntRegs:$src1)))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat <(i1 (setult (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGT64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + (f64 DoubleRegs:$src1)))>, + Requires<[HasV5T]>; + +// le. +// rs <= rt -> rt >= rs. +def : Pat<(i1 (setole (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPOGE32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setole (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, + Requires<[HasV5T]>; + + +// Rss <= Rtt -> Rtt >= Rss. +def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPOGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setole (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPOGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +// rs <= rt -> rt >= rs. +def : Pat<(i1 (setule (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (FCMPUGE32_rr IntRegs:$src2, IntRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setule (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGE32_rr (f32 (TFRI_f fpimm:$src2)), IntRegs:$src1))>, + Requires<[HasV5T]>; + +// Rss <= Rtt -> Rtt >= Rss. +def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (FCMPUGE64_rr DoubleRegs:$src2, DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setule (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (FCMPUGE64_rr (f64 (CONST64_Float_Real fpimm:$src2)), + DoubleRegs:$src1))>, + Requires<[HasV5T]>; + +// ne. +def : Pat<(i1 (setone (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, IntRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, DoubleRegs:$src2)))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPOEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setone (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPOEQ64_rr DoubleRegs:$src1, + (f64 (CONST64_Float_Real fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f32 IntRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPUEQ32_rr IntRegs:$src1, (f32 (TFRI_f fpimm:$src2)))))>, + Requires<[HasV5T]>; + +def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))), + (i1 (NOT_p (FCMPUEQ64_rr DoubleRegs:$src1, + (f64 (CONST64_Float_Real fpimm:$src2)))))>, + Requires<[HasV5T]>; + +// Convert Integer to Floating Point. +def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_d2sf($src)", + [(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_ud2sf($src)", + [(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_uw2sf($src)", + [(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_w2sf($src)", + [(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_d2df($src)", + [(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_ud2df($src)", + [(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_uw2df($src)", + [(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_w2df($src)", + [(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +// Convert Floating Point to Integer - default. +def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2uw($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2w($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2uw($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2w($src):chop", + [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2d($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2ud($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2d($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2ud($src):chop", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T]>; + +// Convert Floating Point to Integer: non-chopped. +let AddedComplexity = 20 in +def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2uw($src)", + [(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2w($src)", + [(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2uw($src)", + [(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2w($src)", + [(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2d($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + "$dst = convert_df2ud($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2d($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + +let AddedComplexity = 20 in +def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src), + "$dst = convert_sf2ud($src)", + [(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>, + Requires<[HasV5T, IEEERndNearV5T]>; + + + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +def : Pat <(i32 (bitconvert (f32 IntRegs:$src))), + (i32 (TFR IntRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(f32 (bitconvert (i32 IntRegs:$src))), + (f32 (TFR IntRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(i64 (bitconvert (f64 DoubleRegs:$src))), + (i64 (TFR64 DoubleRegs:$src))>, + Requires<[HasV5T]>; + +def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))), + (f64 (TFR64 DoubleRegs:$src))>, + Requires<[HasV5T]>; + +// Floating point fused multiply-add. +def FMADD_dp : ALU64_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + "$dst += dfmpy($src2, $src3)", + [(set (f64 DoubleRegs:$dst), + (fma DoubleRegs:$src2, DoubleRegs:$src3, DoubleRegs:$src1))], + "$src1 = $dst">, + Requires<[HasV5T]>; + +def FMADD_sp : ALU64_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += sfmpy($src2, $src3)", + [(set (f32 IntRegs:$dst), + (fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))], + "$src1 = $dst">, + Requires<[HasV5T]>; + + +// Floating point max/min. +let AddedComplexity = 100 in +def FMAX_dp : ALU64_rr<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = dfmax($src1, $src2)", + [(set DoubleRegs:$dst, (f64 (select (i1 (setolt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMAX_sp : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmax($src1, $src2)", + [(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMIN_dp : ALU64_rr<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = dfmin($src1, $src2)", + [(set DoubleRegs:$dst, (f64 (select (i1 (setogt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100 in +def FMIN_sp : ALU64_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sfmin($src1, $src2)", + [(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV5T]>; + +// Pseudo instruction to encode a set of conditional transfers. +// This instruction is used instead of a mux and trades-off codesize +// for performance. We conduct this transformation optimistically in +// the hope that these instructions get promoted to dot-new transfers. +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (f32 (select PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3), + "Error; should not emit", + [(set DoubleRegs:$dst, (f64 (select PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3)))]>, + Requires<[HasV5T]>; + + + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, + (f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>, + Requires<[HasV5T]>; + +let AddedComplexity = 100, isPredicated = 1 in +def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, f32imm:$src2, f32imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (f32 (select PredRegs:$src1, + fpimm:$src2, + fpimm:$src3)))]>, + Requires<[HasV5T]>; + + +def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))), + (f32 IntRegs:$src3), + (f32 IntRegs:$src4)), + (TFR_condset_rr_f (FCMPUGT32_rr IntRegs:$src2, IntRegs:$src1), IntRegs:$src4, + IntRegs:$src3)>, Requires<[HasV5T]>; + +def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))), + (f64 DoubleRegs:$src3), + (f64 DoubleRegs:$src4)), + (TFR_condset_rr64_f (FCMPUGT64_rr DoubleRegs:$src2, DoubleRegs:$src1), + DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3), + (TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>; + +// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) +// => r0 = TFR_condset_ri(p0, r1, #i) +def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3), + (TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>; + +// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) +// => r0 = TFR_condset_ir(p0, #i, r1) +def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3), + (TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>; + +def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))), + (i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>, + Requires<[HasV5T]>; + +def : Pat <(fabs (f32 IntRegs:$src1)), + (CLRBIT_31 (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fneg (f32 IntRegs:$src1)), + (TOGBIT_31 (f32 IntRegs:$src1), 31)>, + Requires<[HasV5T]>; + +/* +def : Pat <(fabs (f64 DoubleRegs:$src1)), + (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, + Requires<[HasV5T]>; + +def : Pat <(fabs (f64 DoubleRegs:$src1)), + (CLRBIT_31 (f32 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), 31)>, + Requires<[HasV5T]>; + */ diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index b15e293..99f59d5 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -551,13 +551,6 @@ class di_SInst_diu6u6 [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, imm:$src3))]>; -class di_SInst_didisi - : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3), - !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), - [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, - IntRegs:$src3))]>; - class di_SInst_didiqi : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), @@ -818,6 +811,11 @@ class di_MInst_s8s8 !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; +class si_MInst_sis9 + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + class si_MInst_sisi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), @@ -952,6 +950,17 @@ class si_SInst_sisi_sat !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; +class si_SInst_didi_sat + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_SInst_disi_s1_rnd_sat + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + class si_MInst_sisi_s1_rnd_sat : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), !strconcat("$dst = ", !strconcat(opc , @@ -1612,6 +1621,18 @@ class di_MInst_dididi_acc_rnd_sat DoubleRegs:$src2))], "$dst2 = $dst">; +class di_MInst_dididi_acc_s1 + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + + class di_MInst_dididi_acc_s1_sat : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, @@ -1822,53 +1843,63 @@ class si_MInst_didi !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; +// +// LDInst classes. +// +let mayLoad = 1, neverHasSideEffects = 1 in +class di_LDInstPI_diu4 + : LDInstPI<(outs IntRegs:$dst, DoubleRegs:$dst2), + (ins IntRegs:$src1, IntRegs:$src2, CRRegs:$src3, s4Imm:$offset), + "$dst2 = memd($src1++#$offset:circ($src3))", + [], + "$src1 = $dst">; /******************************************************************** * ALU32/ALU * *********************************************************************/ // ALU32 / ALU / Add. -def Hexagon_A2_add: +def HEXAGON_A2_add: si_ALU32_sisi <"add", int_hexagon_A2_add>; -def Hexagon_A2_addi: +def HEXAGON_A2_addi: si_ALU32_sis16 <"add", int_hexagon_A2_addi>; // ALU32 / ALU / Logical operations. -def Hexagon_A2_and: +def HEXAGON_A2_and: si_ALU32_sisi <"and", int_hexagon_A2_and>; -def Hexagon_A2_andir: +def HEXAGON_A2_andir: si_ALU32_sis10 <"and", int_hexagon_A2_andir>; -def Hexagon_A2_not: +def HEXAGON_A2_not: si_ALU32_si <"not", int_hexagon_A2_not>; -def Hexagon_A2_or: +def HEXAGON_A2_or: si_ALU32_sisi <"or", int_hexagon_A2_or>; -def Hexagon_A2_orir: +def HEXAGON_A2_orir: si_ALU32_sis10 <"or", int_hexagon_A2_orir>; -def Hexagon_A2_xor: +def HEXAGON_A2_xor: si_ALU32_sisi <"xor", int_hexagon_A2_xor>; // ALU32 / ALU / Negate. -def Hexagon_A2_neg: +def HEXAGON_A2_neg: si_ALU32_si <"neg", int_hexagon_A2_neg>; // ALU32 / ALU / Subtract. -def Hexagon_A2_sub: +def HEXAGON_A2_sub: si_ALU32_sisi <"sub", int_hexagon_A2_sub>; -def Hexagon_A2_subri: +def HEXAGON_A2_subri: si_ALU32_s10si <"sub", int_hexagon_A2_subri>; // ALU32 / ALU / Transfer Immediate. -def Hexagon_A2_tfril: +def HEXAGON_A2_tfril: si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; -def Hexagon_A2_tfrih: +def HEXAGON_A2_tfrih: si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; -def Hexagon_A2_tfrsi: +def HEXAGON_A2_tfrsi: si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; -def Hexagon_A2_tfrpi: +def HEXAGON_A2_tfrpi: di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; // ALU32 / ALU / Transfer Register. -def Hexagon_A2_tfr: +def HEXAGON_A2_tfr: si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; /******************************************************************** @@ -1876,45 +1907,45 @@ def Hexagon_A2_tfr: *********************************************************************/ // ALU32 / PERM / Combine. -def Hexagon_A2_combinew: +def HEXAGON_A2_combinew: di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; -def Hexagon_A2_combine_hh: +def HEXAGON_A2_combine_hh: si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; -def Hexagon_A2_combine_lh: +def HEXAGON_A2_combine_lh: si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; -def Hexagon_A2_combine_hl: +def HEXAGON_A2_combine_hl: si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; -def Hexagon_A2_combine_ll: +def HEXAGON_A2_combine_ll: si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; -def Hexagon_A2_combineii: +def HEXAGON_A2_combineii: di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; // ALU32 / PERM / Mux. -def Hexagon_C2_mux: +def HEXAGON_C2_mux: si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; -def Hexagon_C2_muxri: +def HEXAGON_C2_muxri: si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; -def Hexagon_C2_muxir: +def HEXAGON_C2_muxir: si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; -def Hexagon_C2_muxii: +def HEXAGON_C2_muxii: si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; // ALU32 / PERM / Shift halfword. -def Hexagon_A2_aslh: +def HEXAGON_A2_aslh: si_ALU32_si <"aslh", int_hexagon_A2_aslh>; -def Hexagon_A2_asrh: +def HEXAGON_A2_asrh: si_ALU32_si <"asrh", int_hexagon_A2_asrh>; def SI_to_SXTHI_asrh: si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; // ALU32 / PERM / Sign/zero extend. -def Hexagon_A2_sxth: +def HEXAGON_A2_sxth: si_ALU32_si <"sxth", int_hexagon_A2_sxth>; -def Hexagon_A2_sxtb: +def HEXAGON_A2_sxtb: si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; -def Hexagon_A2_zxth: +def HEXAGON_A2_zxth: si_ALU32_si <"zxth", int_hexagon_A2_zxth>; -def Hexagon_A2_zxtb: +def HEXAGON_A2_zxtb: si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; /******************************************************************** @@ -1922,25 +1953,25 @@ def Hexagon_A2_zxtb: *********************************************************************/ // ALU32 / PRED / Compare. -def Hexagon_C2_cmpeq: +def HEXAGON_C2_cmpeq: qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; -def Hexagon_C2_cmpeqi: +def HEXAGON_C2_cmpeqi: qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; -def Hexagon_C2_cmpgei: +def HEXAGON_C2_cmpgei: qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; -def Hexagon_C2_cmpgeui: +def HEXAGON_C2_cmpgeui: qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; -def Hexagon_C2_cmpgt: +def HEXAGON_C2_cmpgt: qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; -def Hexagon_C2_cmpgti: +def HEXAGON_C2_cmpgti: qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; -def Hexagon_C2_cmpgtu: +def HEXAGON_C2_cmpgtu: qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; -def Hexagon_C2_cmpgtui: +def HEXAGON_C2_cmpgtui: qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; -def Hexagon_C2_cmplt: +def HEXAGON_C2_cmplt: qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; -def Hexagon_C2_cmpltu: +def HEXAGON_C2_cmpltu: qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; /******************************************************************** @@ -1949,27 +1980,27 @@ def Hexagon_C2_cmpltu: // ALU32 / VH / Vector add halfwords. // Rd32=vadd[u]h(Rs32,Rt32:sat] -def Hexagon_A2_svaddh: +def HEXAGON_A2_svaddh: si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; -def Hexagon_A2_svaddhs: +def HEXAGON_A2_svaddhs: si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; -def Hexagon_A2_svadduhs: +def HEXAGON_A2_svadduhs: si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; // ALU32 / VH / Vector average halfwords. -def Hexagon_A2_svavgh: +def HEXAGON_A2_svavgh: si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; -def Hexagon_A2_svavghs: +def HEXAGON_A2_svavghs: si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; -def Hexagon_A2_svnavgh: +def HEXAGON_A2_svnavgh: si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; // ALU32 / VH / Vector subtract halfwords. -def Hexagon_A2_svsubh: +def HEXAGON_A2_svsubh: si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; -def Hexagon_A2_svsubhs: +def HEXAGON_A2_svsubhs: si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; -def Hexagon_A2_svsubuhs: +def HEXAGON_A2_svsubuhs: si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; /******************************************************************** @@ -1977,109 +2008,109 @@ def Hexagon_A2_svsubuhs: *********************************************************************/ // ALU64 / ALU / Add. -def Hexagon_A2_addp: +def HEXAGON_A2_addp: di_ALU64_didi <"add", int_hexagon_A2_addp>; -def Hexagon_A2_addsat: +def HEXAGON_A2_addsat: si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; // ALU64 / ALU / Add halfword. // Even though the definition says hl, it should be lh - //so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def Hexagon_A2_addh_l16_hl: +def HEXAGON_A2_addh_l16_hl: si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; -def Hexagon_A2_addh_l16_ll: +def HEXAGON_A2_addh_l16_ll: si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; -def Hexagon_A2_addh_l16_sat_hl: +def HEXAGON_A2_addh_l16_sat_hl: si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; -def Hexagon_A2_addh_l16_sat_ll: +def HEXAGON_A2_addh_l16_sat_ll: si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; -def Hexagon_A2_addh_h16_hh: +def HEXAGON_A2_addh_h16_hh: si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; -def Hexagon_A2_addh_h16_hl: +def HEXAGON_A2_addh_h16_hl: si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; -def Hexagon_A2_addh_h16_lh: +def HEXAGON_A2_addh_h16_lh: si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; -def Hexagon_A2_addh_h16_ll: +def HEXAGON_A2_addh_h16_ll: si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; -def Hexagon_A2_addh_h16_sat_hh: +def HEXAGON_A2_addh_h16_sat_hh: si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; -def Hexagon_A2_addh_h16_sat_hl: +def HEXAGON_A2_addh_h16_sat_hl: si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; -def Hexagon_A2_addh_h16_sat_lh: +def HEXAGON_A2_addh_h16_sat_lh: si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; -def Hexagon_A2_addh_h16_sat_ll: +def HEXAGON_A2_addh_h16_sat_ll: si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; // ALU64 / ALU / Compare. -def Hexagon_C2_cmpeqp: +def HEXAGON_C2_cmpeqp: qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; -def Hexagon_C2_cmpgtp: +def HEXAGON_C2_cmpgtp: qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; -def Hexagon_C2_cmpgtup: +def HEXAGON_C2_cmpgtup: qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; // ALU64 / ALU / Logical operations. -def Hexagon_A2_andp: +def HEXAGON_A2_andp: di_ALU64_didi <"and", int_hexagon_A2_andp>; -def Hexagon_A2_orp: +def HEXAGON_A2_orp: di_ALU64_didi <"or", int_hexagon_A2_orp>; -def Hexagon_A2_xorp: +def HEXAGON_A2_xorp: di_ALU64_didi <"xor", int_hexagon_A2_xorp>; // ALU64 / ALU / Maximum. -def Hexagon_A2_max: +def HEXAGON_A2_max: si_ALU64_sisi <"max", int_hexagon_A2_max>; -def Hexagon_A2_maxu: +def HEXAGON_A2_maxu: si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; // ALU64 / ALU / Minimum. -def Hexagon_A2_min: +def HEXAGON_A2_min: si_ALU64_sisi <"min", int_hexagon_A2_min>; -def Hexagon_A2_minu: +def HEXAGON_A2_minu: si_ALU64_sisi <"minu", int_hexagon_A2_minu>; // ALU64 / ALU / Subtract. -def Hexagon_A2_subp: +def HEXAGON_A2_subp: di_ALU64_didi <"sub", int_hexagon_A2_subp>; -def Hexagon_A2_subsat: +def HEXAGON_A2_subsat: si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; // ALU64 / ALU / Subtract halfword. // Even though the definition says hl, it should be lh - //so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. -def Hexagon_A2_subh_l16_hl: +def HEXAGON_A2_subh_l16_hl: si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; -def Hexagon_A2_subh_l16_ll: +def HEXAGON_A2_subh_l16_ll: si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; -def Hexagon_A2_subh_l16_sat_hl: +def HEXAGON_A2_subh_l16_sat_hl: si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; -def Hexagon_A2_subh_l16_sat_ll: +def HEXAGON_A2_subh_l16_sat_ll: si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; -def Hexagon_A2_subh_h16_hh: +def HEXAGON_A2_subh_h16_hh: si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; -def Hexagon_A2_subh_h16_hl: +def HEXAGON_A2_subh_h16_hl: si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; -def Hexagon_A2_subh_h16_lh: +def HEXAGON_A2_subh_h16_lh: si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; -def Hexagon_A2_subh_h16_ll: +def HEXAGON_A2_subh_h16_ll: si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; -def Hexagon_A2_subh_h16_sat_hh: +def HEXAGON_A2_subh_h16_sat_hh: si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; -def Hexagon_A2_subh_h16_sat_hl: +def HEXAGON_A2_subh_h16_sat_hl: si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; -def Hexagon_A2_subh_h16_sat_lh: +def HEXAGON_A2_subh_h16_sat_lh: si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; -def Hexagon_A2_subh_h16_sat_ll: +def HEXAGON_A2_subh_h16_sat_ll: si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; // ALU64 / ALU / Transfer register. -def Hexagon_A2_tfrp: +def HEXAGON_A2_tfrp: di_ALU64_di <"", int_hexagon_A2_tfrp>; /******************************************************************** @@ -2087,7 +2118,7 @@ def Hexagon_A2_tfrp: *********************************************************************/ // ALU64 / BIT / Masked parity. -def Hexagon_S2_parityp: +def HEXAGON_S2_parityp: si_ALU64_didi <"parity", int_hexagon_S2_parityp>; /******************************************************************** @@ -2095,7 +2126,7 @@ def Hexagon_S2_parityp: *********************************************************************/ // ALU64 / PERM / Vector pack high and low halfwords. -def Hexagon_S2_packhl: +def HEXAGON_S2_packhl: di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; /******************************************************************** @@ -2103,37 +2134,37 @@ def Hexagon_S2_packhl: *********************************************************************/ // ALU64 / VB / Vector add unsigned bytes. -def Hexagon_A2_vaddub: +def HEXAGON_A2_vaddub: di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; -def Hexagon_A2_vaddubs: +def HEXAGON_A2_vaddubs: di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; // ALU64 / VB / Vector average unsigned bytes. -def Hexagon_A2_vavgub: +def HEXAGON_A2_vavgub: di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; -def Hexagon_A2_vavgubr: +def HEXAGON_A2_vavgubr: di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; // ALU64 / VB / Vector compare unsigned bytes. -def Hexagon_A2_vcmpbeq: +def HEXAGON_A2_vcmpbeq: qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; -def Hexagon_A2_vcmpbgtu: +def HEXAGON_A2_vcmpbgtu: qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; // ALU64 / VB / Vector maximum/minimum unsigned bytes. -def Hexagon_A2_vmaxub: +def HEXAGON_A2_vmaxub: di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; -def Hexagon_A2_vminub: +def HEXAGON_A2_vminub: di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; // ALU64 / VB / Vector subtract unsigned bytes. -def Hexagon_A2_vsubub: +def HEXAGON_A2_vsubub: di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; -def Hexagon_A2_vsububs: +def HEXAGON_A2_vsububs: di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; // ALU64 / VB / Vector mux. -def Hexagon_C2_vmux: +def HEXAGON_C2_vmux: di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; @@ -2143,58 +2174,58 @@ def Hexagon_C2_vmux: // ALU64 / VH / Vector add halfwords. // Rdd64=vadd[u]h(Rss64,Rtt64:sat] -def Hexagon_A2_vaddh: +def HEXAGON_A2_vaddh: di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; -def Hexagon_A2_vaddhs: +def HEXAGON_A2_vaddhs: di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; -def Hexagon_A2_vadduhs: +def HEXAGON_A2_vadduhs: di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; // ALU64 / VH / Vector average halfwords. // Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] -def Hexagon_A2_vavgh: +def HEXAGON_A2_vavgh: di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; -def Hexagon_A2_vavghcr: +def HEXAGON_A2_vavghcr: di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; -def Hexagon_A2_vavghr: +def HEXAGON_A2_vavghr: di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; -def Hexagon_A2_vavguh: +def HEXAGON_A2_vavguh: di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; -def Hexagon_A2_vavguhr: +def HEXAGON_A2_vavguhr: di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; -def Hexagon_A2_vnavgh: +def HEXAGON_A2_vnavgh: di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; -def Hexagon_A2_vnavghcr: +def HEXAGON_A2_vnavghcr: di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; -def Hexagon_A2_vnavghr: +def HEXAGON_A2_vnavghr: di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; // ALU64 / VH / Vector compare halfwords. -def Hexagon_A2_vcmpheq: +def HEXAGON_A2_vcmpheq: qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; -def Hexagon_A2_vcmphgt: +def HEXAGON_A2_vcmphgt: qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; -def Hexagon_A2_vcmphgtu: +def HEXAGON_A2_vcmphgtu: qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; // ALU64 / VH / Vector maximum halfwords. -def Hexagon_A2_vmaxh: +def HEXAGON_A2_vmaxh: di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; -def Hexagon_A2_vmaxuh: +def HEXAGON_A2_vmaxuh: di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; // ALU64 / VH / Vector minimum halfwords. -def Hexagon_A2_vminh: +def HEXAGON_A2_vminh: di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; -def Hexagon_A2_vminuh: +def HEXAGON_A2_vminuh: di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; // ALU64 / VH / Vector subtract halfwords. -def Hexagon_A2_vsubh: +def HEXAGON_A2_vsubh: di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; -def Hexagon_A2_vsubhs: +def HEXAGON_A2_vsubhs: di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; -def Hexagon_A2_vsubuhs: +def HEXAGON_A2_vsubuhs: di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; @@ -2204,53 +2235,53 @@ def Hexagon_A2_vsubuhs: // ALU64 / VW / Vector add words. // Rdd32=vaddw(Rss32,Rtt32)[:sat] -def Hexagon_A2_vaddw: +def HEXAGON_A2_vaddw: di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; -def Hexagon_A2_vaddws: +def HEXAGON_A2_vaddws: di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; // ALU64 / VW / Vector average words. -def Hexagon_A2_vavguw: +def HEXAGON_A2_vavguw: di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; -def Hexagon_A2_vavguwr: +def HEXAGON_A2_vavguwr: di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; -def Hexagon_A2_vavgw: +def HEXAGON_A2_vavgw: di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; -def Hexagon_A2_vavgwcr: +def HEXAGON_A2_vavgwcr: di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; -def Hexagon_A2_vavgwr: +def HEXAGON_A2_vavgwr: di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; -def Hexagon_A2_vnavgw: +def HEXAGON_A2_vnavgw: di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; -def Hexagon_A2_vnavgwcr: +def HEXAGON_A2_vnavgwcr: di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; -def Hexagon_A2_vnavgwr: +def HEXAGON_A2_vnavgwr: di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; // ALU64 / VW / Vector compare words. -def Hexagon_A2_vcmpweq: +def HEXAGON_A2_vcmpweq: qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; -def Hexagon_A2_vcmpwgt: +def HEXAGON_A2_vcmpwgt: qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; -def Hexagon_A2_vcmpwgtu: +def HEXAGON_A2_vcmpwgtu: qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; // ALU64 / VW / Vector maximum words. -def Hexagon_A2_vmaxw: +def HEXAGON_A2_vmaxw: di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; -def Hexagon_A2_vmaxuw: +def HEXAGON_A2_vmaxuw: di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; // ALU64 / VW / Vector minimum words. -def Hexagon_A2_vminw: +def HEXAGON_A2_vminw: di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; -def Hexagon_A2_vminuw: +def HEXAGON_A2_vminuw: di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; // ALU64 / VW / Vector subtract words. -def Hexagon_A2_vsubw: +def HEXAGON_A2_vsubw: di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; -def Hexagon_A2_vsubws: +def HEXAGON_A2_vsubws: di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; @@ -2259,25 +2290,25 @@ def Hexagon_A2_vsubws: *********************************************************************/ // CR / Logical reductions on predicates. -def Hexagon_C2_all8: +def HEXAGON_C2_all8: qi_SInst_qi <"all8", int_hexagon_C2_all8>; -def Hexagon_C2_any8: +def HEXAGON_C2_any8: qi_SInst_qi <"any8", int_hexagon_C2_any8>; // CR / Logical operations on predicates. -def Hexagon_C2_pxfer_map: +def HEXAGON_C2_pxfer_map: qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; -def Hexagon_C2_and: +def HEXAGON_C2_and: qi_SInst_qiqi <"and", int_hexagon_C2_and>; -def Hexagon_C2_andn: +def HEXAGON_C2_andn: qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; -def Hexagon_C2_not: +def HEXAGON_C2_not: qi_SInst_qi <"not", int_hexagon_C2_not>; -def Hexagon_C2_or: +def HEXAGON_C2_or: qi_SInst_qiqi <"or", int_hexagon_C2_or>; -def Hexagon_C2_orn: +def HEXAGON_C2_orn: qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; -def Hexagon_C2_xor: +def HEXAGON_C2_xor: qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; @@ -2286,27 +2317,27 @@ def Hexagon_C2_xor: *********************************************************************/ // MTYPE / ALU / Add and accumulate. -def Hexagon_M2_acci: +def HEXAGON_M2_acci: si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; -def Hexagon_M2_accii: +def HEXAGON_M2_accii: si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; -def Hexagon_M2_nacci: +def HEXAGON_M2_nacci: si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; -def Hexagon_M2_naccii: +def HEXAGON_M2_naccii: si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; // MTYPE / ALU / Subtract and accumulate. -def Hexagon_M2_subacc: +def HEXAGON_M2_subacc: si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; // MTYPE / ALU / Vector absolute difference. -def Hexagon_M2_vabsdiffh: +def HEXAGON_M2_vabsdiffh: di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; -def Hexagon_M2_vabsdiffw: +def HEXAGON_M2_vabsdiffw: di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; // MTYPE / ALU / XOR and xor with destination. -def Hexagon_M2_xor_xacc: +def HEXAGON_M2_xor_xacc: si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; @@ -2316,91 +2347,91 @@ def Hexagon_M2_xor_xacc: // MTYPE / COMPLEX / Complex multiply. // Rdd[-+]=cmpy(Rs, Rt:<<1]:sat -def Hexagon_M2_cmpys_s1: +def HEXAGON_M2_cmpys_s1: di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; -def Hexagon_M2_cmpys_s0: +def HEXAGON_M2_cmpys_s0: di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; -def Hexagon_M2_cmpysc_s1: +def HEXAGON_M2_cmpysc_s1: di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; -def Hexagon_M2_cmpysc_s0: +def HEXAGON_M2_cmpysc_s0: di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; -def Hexagon_M2_cmacs_s1: +def HEXAGON_M2_cmacs_s1: di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; -def Hexagon_M2_cmacs_s0: +def HEXAGON_M2_cmacs_s0: di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; -def Hexagon_M2_cmacsc_s1: +def HEXAGON_M2_cmacsc_s1: di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; -def Hexagon_M2_cmacsc_s0: +def HEXAGON_M2_cmacsc_s0: di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; -def Hexagon_M2_cnacs_s1: +def HEXAGON_M2_cnacs_s1: di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; -def Hexagon_M2_cnacs_s0: +def HEXAGON_M2_cnacs_s0: di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; -def Hexagon_M2_cnacsc_s1: +def HEXAGON_M2_cnacsc_s1: di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; -def Hexagon_M2_cnacsc_s0: +def HEXAGON_M2_cnacsc_s0: di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; // MTYPE / COMPLEX / Complex multiply real or imaginary. -def Hexagon_M2_cmpyr_s0: +def HEXAGON_M2_cmpyr_s0: di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; -def Hexagon_M2_cmacr_s0: +def HEXAGON_M2_cmacr_s0: di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; -def Hexagon_M2_cmpyi_s0: +def HEXAGON_M2_cmpyi_s0: di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; -def Hexagon_M2_cmaci_s0: +def HEXAGON_M2_cmaci_s0: di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; // MTYPE / COMPLEX / Complex multiply with round and pack. // Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat -def Hexagon_M2_cmpyrs_s0: +def HEXAGON_M2_cmpyrs_s0: si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; -def Hexagon_M2_cmpyrs_s1: +def HEXAGON_M2_cmpyrs_s1: si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; -def Hexagon_M2_cmpyrsc_s0: +def HEXAGON_M2_cmpyrsc_s0: si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; -def Hexagon_M2_cmpyrsc_s1: +def HEXAGON_M2_cmpyrsc_s1: si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; //MTYPE / COMPLEX / Vector complex multiply real or imaginary. -def Hexagon_M2_vcmpy_s0_sat_i: +def HEXAGON_M2_vcmpy_s0_sat_i: di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; -def Hexagon_M2_vcmpy_s1_sat_i: +def HEXAGON_M2_vcmpy_s1_sat_i: di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; -def Hexagon_M2_vcmpy_s0_sat_r: +def HEXAGON_M2_vcmpy_s0_sat_r: di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; -def Hexagon_M2_vcmpy_s1_sat_r: +def HEXAGON_M2_vcmpy_s1_sat_r: di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; -def Hexagon_M2_vcmac_s0_sat_i: +def HEXAGON_M2_vcmac_s0_sat_i: di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; -def Hexagon_M2_vcmac_s0_sat_r: +def HEXAGON_M2_vcmac_s0_sat_r: di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; //MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. -def Hexagon_M2_vrcmpyi_s0: +def HEXAGON_M2_vrcmpyi_s0: di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; -def Hexagon_M2_vrcmpyr_s0: +def HEXAGON_M2_vrcmpyr_s0: di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; -def Hexagon_M2_vrcmpyi_s0c: +def HEXAGON_M2_vrcmpyi_s0c: di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; -def Hexagon_M2_vrcmpyr_s0c: +def HEXAGON_M2_vrcmpyr_s0c: di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; -def Hexagon_M2_vrcmaci_s0: +def HEXAGON_M2_vrcmaci_s0: di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; -def Hexagon_M2_vrcmacr_s0: +def HEXAGON_M2_vrcmacr_s0: di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; -def Hexagon_M2_vrcmaci_s0c: +def HEXAGON_M2_vrcmaci_s0c: di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; -def Hexagon_M2_vrcmacr_s0c: +def HEXAGON_M2_vrcmacr_s0c: di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; @@ -2409,115 +2440,120 @@ def Hexagon_M2_vrcmacr_s0c: *********************************************************************/ // MTYPE / MPYH / Multiply and use lower result. -//def Hexagon_M2_mpysmi: +//def HEXAGON_M2_mpysmi: +//FIXME: Hexagon_M2_mpysmi should really by of the type si_MInst_sim9, +// not si_MInst_sis9 - but for now, we will use s9. +// def Hexagon_M2_mpysmi: // si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; -def Hexagon_M2_mpyi: +def Hexagon_M2_mpysmi: + si_MInst_sis9 <"mpyi", int_hexagon_M2_mpysmi>; +def HEXAGON_M2_mpyi: si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; -def Hexagon_M2_mpyui: +def HEXAGON_M2_mpyui: si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; -def Hexagon_M2_macsip: +def HEXAGON_M2_macsip: si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; -def Hexagon_M2_maci: +def HEXAGON_M2_maci: si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; -def Hexagon_M2_macsin: +def HEXAGON_M2_macsin: si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; // MTYPE / MPYH / Multiply word by half (32x16). //Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] //Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] -def Hexagon_M2_mmpyl_rs1: +def HEXAGON_M2_mmpyl_rs1: di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; -def Hexagon_M2_mmpyl_s1: +def HEXAGON_M2_mmpyl_s1: di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; -def Hexagon_M2_mmpyl_rs0: +def HEXAGON_M2_mmpyl_rs0: di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; -def Hexagon_M2_mmpyl_s0: +def HEXAGON_M2_mmpyl_s0: di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; -def Hexagon_M2_mmpyh_rs1: +def HEXAGON_M2_mmpyh_rs1: di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; -def Hexagon_M2_mmpyh_s1: +def HEXAGON_M2_mmpyh_s1: di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; -def Hexagon_M2_mmpyh_rs0: +def HEXAGON_M2_mmpyh_rs0: di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; -def Hexagon_M2_mmpyh_s0: +def HEXAGON_M2_mmpyh_s0: di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; -def Hexagon_M2_mmacls_rs1: +def HEXAGON_M2_mmacls_rs1: di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; -def Hexagon_M2_mmacls_s1: +def HEXAGON_M2_mmacls_s1: di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; -def Hexagon_M2_mmacls_rs0: +def HEXAGON_M2_mmacls_rs0: di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; -def Hexagon_M2_mmacls_s0: +def HEXAGON_M2_mmacls_s0: di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; -def Hexagon_M2_mmachs_rs1: +def HEXAGON_M2_mmachs_rs1: di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; -def Hexagon_M2_mmachs_s1: +def HEXAGON_M2_mmachs_s1: di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; -def Hexagon_M2_mmachs_rs0: +def HEXAGON_M2_mmachs_rs0: di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; -def Hexagon_M2_mmachs_s0: +def HEXAGON_M2_mmachs_s0: di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; // MTYPE / MPYH / Multiply word by unsigned half (32x16). //Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] //Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] -def Hexagon_M2_mmpyul_rs1: +def HEXAGON_M2_mmpyul_rs1: di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; -def Hexagon_M2_mmpyul_s1: +def HEXAGON_M2_mmpyul_s1: di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; -def Hexagon_M2_mmpyul_rs0: +def HEXAGON_M2_mmpyul_rs0: di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; -def Hexagon_M2_mmpyul_s0: +def HEXAGON_M2_mmpyul_s0: di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; -def Hexagon_M2_mmpyuh_rs1: +def HEXAGON_M2_mmpyuh_rs1: di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; -def Hexagon_M2_mmpyuh_s1: +def HEXAGON_M2_mmpyuh_s1: di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; -def Hexagon_M2_mmpyuh_rs0: +def HEXAGON_M2_mmpyuh_rs0: di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; -def Hexagon_M2_mmpyuh_s0: +def HEXAGON_M2_mmpyuh_s0: di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; -def Hexagon_M2_mmaculs_rs1: +def HEXAGON_M2_mmaculs_rs1: di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; -def Hexagon_M2_mmaculs_s1: +def HEXAGON_M2_mmaculs_s1: di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; -def Hexagon_M2_mmaculs_rs0: +def HEXAGON_M2_mmaculs_rs0: di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; -def Hexagon_M2_mmaculs_s0: +def HEXAGON_M2_mmaculs_s0: di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; -def Hexagon_M2_mmacuhs_rs1: +def HEXAGON_M2_mmacuhs_rs1: di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; -def Hexagon_M2_mmacuhs_s1: +def HEXAGON_M2_mmacuhs_s1: di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; -def Hexagon_M2_mmacuhs_rs0: +def HEXAGON_M2_mmacuhs_rs0: di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; -def Hexagon_M2_mmacuhs_s0: +def HEXAGON_M2_mmacuhs_s0: di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; // MTYPE / MPYH / Multiply and use upper result. -def Hexagon_M2_hmmpyh_rs1: +def HEXAGON_M2_hmmpyh_rs1: si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; -def Hexagon_M2_hmmpyl_rs1: +def HEXAGON_M2_hmmpyl_rs1: si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; -def Hexagon_M2_mpy_up: +def HEXAGON_M2_mpy_up: si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; -def Hexagon_M2_dpmpyss_rnd_s0: +def HEXAGON_M2_dpmpyss_rnd_s0: si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; -def Hexagon_M2_mpyu_up: +def HEXAGON_M2_mpyu_up: si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; // MTYPE / MPYH / Multiply and use full result. -def Hexagon_M2_dpmpyuu_s0: +def HEXAGON_M2_dpmpyuu_s0: di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; -def Hexagon_M2_dpmpyuu_acc_s0: +def HEXAGON_M2_dpmpyuu_acc_s0: di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; -def Hexagon_M2_dpmpyuu_nac_s0: +def HEXAGON_M2_dpmpyuu_nac_s0: di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; -def Hexagon_M2_dpmpyss_s0: +def HEXAGON_M2_dpmpyss_s0: di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; -def Hexagon_M2_dpmpyss_acc_s0: +def HEXAGON_M2_dpmpyss_acc_s0: di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; -def Hexagon_M2_dpmpyss_nac_s0: +def HEXAGON_M2_dpmpyss_nac_s0: di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; @@ -2528,334 +2564,334 @@ def Hexagon_M2_dpmpyss_nac_s0: // MTYPE / MPYS / Scalar 16x16 multiply signed. //Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| // [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] -def Hexagon_M2_mpy_hh_s0: +def HEXAGON_M2_mpy_hh_s0: si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; -def Hexagon_M2_mpy_hh_s1: +def HEXAGON_M2_mpy_hh_s1: si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; -def Hexagon_M2_mpy_rnd_hh_s1: +def HEXAGON_M2_mpy_rnd_hh_s1: si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; -def Hexagon_M2_mpy_sat_rnd_hh_s1: +def HEXAGON_M2_mpy_sat_rnd_hh_s1: si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; -def Hexagon_M2_mpy_sat_hh_s1: +def HEXAGON_M2_mpy_sat_hh_s1: si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; -def Hexagon_M2_mpy_rnd_hh_s0: +def HEXAGON_M2_mpy_rnd_hh_s0: si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; -def Hexagon_M2_mpy_sat_rnd_hh_s0: +def HEXAGON_M2_mpy_sat_rnd_hh_s0: si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; -def Hexagon_M2_mpy_sat_hh_s0: +def HEXAGON_M2_mpy_sat_hh_s0: si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; -def Hexagon_M2_mpy_hl_s0: +def HEXAGON_M2_mpy_hl_s0: si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; -def Hexagon_M2_mpy_hl_s1: +def HEXAGON_M2_mpy_hl_s1: si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; -def Hexagon_M2_mpy_rnd_hl_s1: +def HEXAGON_M2_mpy_rnd_hl_s1: si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; -def Hexagon_M2_mpy_sat_rnd_hl_s1: +def HEXAGON_M2_mpy_sat_rnd_hl_s1: si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; -def Hexagon_M2_mpy_sat_hl_s1: +def HEXAGON_M2_mpy_sat_hl_s1: si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; -def Hexagon_M2_mpy_rnd_hl_s0: +def HEXAGON_M2_mpy_rnd_hl_s0: si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; -def Hexagon_M2_mpy_sat_rnd_hl_s0: +def HEXAGON_M2_mpy_sat_rnd_hl_s0: si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; -def Hexagon_M2_mpy_sat_hl_s0: +def HEXAGON_M2_mpy_sat_hl_s0: si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; -def Hexagon_M2_mpy_lh_s0: +def HEXAGON_M2_mpy_lh_s0: si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; -def Hexagon_M2_mpy_lh_s1: +def HEXAGON_M2_mpy_lh_s1: si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; -def Hexagon_M2_mpy_rnd_lh_s1: +def HEXAGON_M2_mpy_rnd_lh_s1: si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; -def Hexagon_M2_mpy_sat_rnd_lh_s1: +def HEXAGON_M2_mpy_sat_rnd_lh_s1: si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; -def Hexagon_M2_mpy_sat_lh_s1: +def HEXAGON_M2_mpy_sat_lh_s1: si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; -def Hexagon_M2_mpy_rnd_lh_s0: +def HEXAGON_M2_mpy_rnd_lh_s0: si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; -def Hexagon_M2_mpy_sat_rnd_lh_s0: +def HEXAGON_M2_mpy_sat_rnd_lh_s0: si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; -def Hexagon_M2_mpy_sat_lh_s0: +def HEXAGON_M2_mpy_sat_lh_s0: si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; -def Hexagon_M2_mpy_ll_s0: +def HEXAGON_M2_mpy_ll_s0: si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; -def Hexagon_M2_mpy_ll_s1: +def HEXAGON_M2_mpy_ll_s1: si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; -def Hexagon_M2_mpy_rnd_ll_s1: +def HEXAGON_M2_mpy_rnd_ll_s1: si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; -def Hexagon_M2_mpy_sat_rnd_ll_s1: +def HEXAGON_M2_mpy_sat_rnd_ll_s1: si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; -def Hexagon_M2_mpy_sat_ll_s1: +def HEXAGON_M2_mpy_sat_ll_s1: si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; -def Hexagon_M2_mpy_rnd_ll_s0: +def HEXAGON_M2_mpy_rnd_ll_s0: si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; -def Hexagon_M2_mpy_sat_rnd_ll_s0: +def HEXAGON_M2_mpy_sat_rnd_ll_s0: si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; -def Hexagon_M2_mpy_sat_ll_s0: +def HEXAGON_M2_mpy_sat_ll_s0: si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; //Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] -def Hexagon_M2_mpyd_hh_s0: +def HEXAGON_M2_mpyd_hh_s0: di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; -def Hexagon_M2_mpyd_hh_s1: +def HEXAGON_M2_mpyd_hh_s1: di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; -def Hexagon_M2_mpyd_rnd_hh_s1: +def HEXAGON_M2_mpyd_rnd_hh_s1: di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; -def Hexagon_M2_mpyd_rnd_hh_s0: +def HEXAGON_M2_mpyd_rnd_hh_s0: di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; -def Hexagon_M2_mpyd_hl_s0: +def HEXAGON_M2_mpyd_hl_s0: di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; -def Hexagon_M2_mpyd_hl_s1: +def HEXAGON_M2_mpyd_hl_s1: di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; -def Hexagon_M2_mpyd_rnd_hl_s1: +def HEXAGON_M2_mpyd_rnd_hl_s1: di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; -def Hexagon_M2_mpyd_rnd_hl_s0: +def HEXAGON_M2_mpyd_rnd_hl_s0: di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; -def Hexagon_M2_mpyd_lh_s0: +def HEXAGON_M2_mpyd_lh_s0: di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; -def Hexagon_M2_mpyd_lh_s1: +def HEXAGON_M2_mpyd_lh_s1: di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; -def Hexagon_M2_mpyd_rnd_lh_s1: +def HEXAGON_M2_mpyd_rnd_lh_s1: di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; -def Hexagon_M2_mpyd_rnd_lh_s0: +def HEXAGON_M2_mpyd_rnd_lh_s0: di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; -def Hexagon_M2_mpyd_ll_s0: +def HEXAGON_M2_mpyd_ll_s0: di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; -def Hexagon_M2_mpyd_ll_s1: +def HEXAGON_M2_mpyd_ll_s1: di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; -def Hexagon_M2_mpyd_rnd_ll_s1: +def HEXAGON_M2_mpyd_rnd_ll_s1: di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; -def Hexagon_M2_mpyd_rnd_ll_s0: +def HEXAGON_M2_mpyd_rnd_ll_s0: di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; //Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def Hexagon_M2_mpy_acc_hh_s0: +def HEXAGON_M2_mpy_acc_hh_s0: si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; -def Hexagon_M2_mpy_acc_hh_s1: +def HEXAGON_M2_mpy_acc_hh_s1: si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; -def Hexagon_M2_mpy_acc_sat_hh_s1: +def HEXAGON_M2_mpy_acc_sat_hh_s1: si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; -def Hexagon_M2_mpy_acc_sat_hh_s0: +def HEXAGON_M2_mpy_acc_sat_hh_s0: si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; -def Hexagon_M2_mpy_acc_hl_s0: +def HEXAGON_M2_mpy_acc_hl_s0: si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; -def Hexagon_M2_mpy_acc_hl_s1: +def HEXAGON_M2_mpy_acc_hl_s1: si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; -def Hexagon_M2_mpy_acc_sat_hl_s1: +def HEXAGON_M2_mpy_acc_sat_hl_s1: si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; -def Hexagon_M2_mpy_acc_sat_hl_s0: +def HEXAGON_M2_mpy_acc_sat_hl_s0: si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; -def Hexagon_M2_mpy_acc_lh_s0: +def HEXAGON_M2_mpy_acc_lh_s0: si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; -def Hexagon_M2_mpy_acc_lh_s1: +def HEXAGON_M2_mpy_acc_lh_s1: si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; -def Hexagon_M2_mpy_acc_sat_lh_s1: +def HEXAGON_M2_mpy_acc_sat_lh_s1: si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; -def Hexagon_M2_mpy_acc_sat_lh_s0: +def HEXAGON_M2_mpy_acc_sat_lh_s0: si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; -def Hexagon_M2_mpy_acc_ll_s0: +def HEXAGON_M2_mpy_acc_ll_s0: si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; -def Hexagon_M2_mpy_acc_ll_s1: +def HEXAGON_M2_mpy_acc_ll_s1: si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; -def Hexagon_M2_mpy_acc_sat_ll_s1: +def HEXAGON_M2_mpy_acc_sat_ll_s1: si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; -def Hexagon_M2_mpy_acc_sat_ll_s0: +def HEXAGON_M2_mpy_acc_sat_ll_s0: si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; //Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] -def Hexagon_M2_mpy_nac_hh_s0: +def HEXAGON_M2_mpy_nac_hh_s0: si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; -def Hexagon_M2_mpy_nac_hh_s1: +def HEXAGON_M2_mpy_nac_hh_s1: si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; -def Hexagon_M2_mpy_nac_sat_hh_s1: +def HEXAGON_M2_mpy_nac_sat_hh_s1: si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; -def Hexagon_M2_mpy_nac_sat_hh_s0: +def HEXAGON_M2_mpy_nac_sat_hh_s0: si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; -def Hexagon_M2_mpy_nac_hl_s0: +def HEXAGON_M2_mpy_nac_hl_s0: si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; -def Hexagon_M2_mpy_nac_hl_s1: +def HEXAGON_M2_mpy_nac_hl_s1: si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; -def Hexagon_M2_mpy_nac_sat_hl_s1: +def HEXAGON_M2_mpy_nac_sat_hl_s1: si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; -def Hexagon_M2_mpy_nac_sat_hl_s0: +def HEXAGON_M2_mpy_nac_sat_hl_s0: si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; -def Hexagon_M2_mpy_nac_lh_s0: +def HEXAGON_M2_mpy_nac_lh_s0: si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; -def Hexagon_M2_mpy_nac_lh_s1: +def HEXAGON_M2_mpy_nac_lh_s1: si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; -def Hexagon_M2_mpy_nac_sat_lh_s1: +def HEXAGON_M2_mpy_nac_sat_lh_s1: si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; -def Hexagon_M2_mpy_nac_sat_lh_s0: +def HEXAGON_M2_mpy_nac_sat_lh_s0: si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; -def Hexagon_M2_mpy_nac_ll_s0: +def HEXAGON_M2_mpy_nac_ll_s0: si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; -def Hexagon_M2_mpy_nac_ll_s1: +def HEXAGON_M2_mpy_nac_ll_s1: si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; -def Hexagon_M2_mpy_nac_sat_ll_s1: +def HEXAGON_M2_mpy_nac_sat_ll_s1: si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; -def Hexagon_M2_mpy_nac_sat_ll_s0: +def HEXAGON_M2_mpy_nac_sat_ll_s0: si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; //Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def Hexagon_M2_mpyd_acc_hh_s0: +def HEXAGON_M2_mpyd_acc_hh_s0: di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; -def Hexagon_M2_mpyd_acc_hh_s1: +def HEXAGON_M2_mpyd_acc_hh_s1: di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; -def Hexagon_M2_mpyd_acc_hl_s0: +def HEXAGON_M2_mpyd_acc_hl_s0: di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; -def Hexagon_M2_mpyd_acc_hl_s1: +def HEXAGON_M2_mpyd_acc_hl_s1: di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; -def Hexagon_M2_mpyd_acc_lh_s0: +def HEXAGON_M2_mpyd_acc_lh_s0: di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; -def Hexagon_M2_mpyd_acc_lh_s1: +def HEXAGON_M2_mpyd_acc_lh_s1: di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; -def Hexagon_M2_mpyd_acc_ll_s0: +def HEXAGON_M2_mpyd_acc_ll_s0: di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; -def Hexagon_M2_mpyd_acc_ll_s1: +def HEXAGON_M2_mpyd_acc_ll_s1: di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; //Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] -def Hexagon_M2_mpyd_nac_hh_s0: +def HEXAGON_M2_mpyd_nac_hh_s0: di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; -def Hexagon_M2_mpyd_nac_hh_s1: +def HEXAGON_M2_mpyd_nac_hh_s1: di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; -def Hexagon_M2_mpyd_nac_hl_s0: +def HEXAGON_M2_mpyd_nac_hl_s0: di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; -def Hexagon_M2_mpyd_nac_hl_s1: +def HEXAGON_M2_mpyd_nac_hl_s1: di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; -def Hexagon_M2_mpyd_nac_lh_s0: +def HEXAGON_M2_mpyd_nac_lh_s0: di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; -def Hexagon_M2_mpyd_nac_lh_s1: +def HEXAGON_M2_mpyd_nac_lh_s1: di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; -def Hexagon_M2_mpyd_nac_ll_s0: +def HEXAGON_M2_mpyd_nac_ll_s0: di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; -def Hexagon_M2_mpyd_nac_ll_s1: +def HEXAGON_M2_mpyd_nac_ll_s1: di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; // MTYPE / MPYS / Scalar 16x16 multiply unsigned. //Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyu_hh_s0: +def HEXAGON_M2_mpyu_hh_s0: si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; -def Hexagon_M2_mpyu_hh_s1: +def HEXAGON_M2_mpyu_hh_s1: si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; -def Hexagon_M2_mpyu_hl_s0: +def HEXAGON_M2_mpyu_hl_s0: si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; -def Hexagon_M2_mpyu_hl_s1: +def HEXAGON_M2_mpyu_hl_s1: si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; -def Hexagon_M2_mpyu_lh_s0: +def HEXAGON_M2_mpyu_lh_s0: si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; -def Hexagon_M2_mpyu_lh_s1: +def HEXAGON_M2_mpyu_lh_s1: si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; -def Hexagon_M2_mpyu_ll_s0: +def HEXAGON_M2_mpyu_ll_s0: si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; -def Hexagon_M2_mpyu_ll_s1: +def HEXAGON_M2_mpyu_ll_s1: si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; //Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyud_hh_s0: +def HEXAGON_M2_mpyud_hh_s0: di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; -def Hexagon_M2_mpyud_hh_s1: +def HEXAGON_M2_mpyud_hh_s1: di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; -def Hexagon_M2_mpyud_hl_s0: +def HEXAGON_M2_mpyud_hl_s0: di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; -def Hexagon_M2_mpyud_hl_s1: +def HEXAGON_M2_mpyud_hl_s1: di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; -def Hexagon_M2_mpyud_lh_s0: +def HEXAGON_M2_mpyud_lh_s0: di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; -def Hexagon_M2_mpyud_lh_s1: +def HEXAGON_M2_mpyud_lh_s1: di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; -def Hexagon_M2_mpyud_ll_s0: +def HEXAGON_M2_mpyud_ll_s0: di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; -def Hexagon_M2_mpyud_ll_s1: +def HEXAGON_M2_mpyud_ll_s1: di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; //Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyu_acc_hh_s0: +def HEXAGON_M2_mpyu_acc_hh_s0: si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; -def Hexagon_M2_mpyu_acc_hh_s1: +def HEXAGON_M2_mpyu_acc_hh_s1: si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; -def Hexagon_M2_mpyu_acc_hl_s0: +def HEXAGON_M2_mpyu_acc_hl_s0: si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; -def Hexagon_M2_mpyu_acc_hl_s1: +def HEXAGON_M2_mpyu_acc_hl_s1: si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; -def Hexagon_M2_mpyu_acc_lh_s0: +def HEXAGON_M2_mpyu_acc_lh_s0: si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; -def Hexagon_M2_mpyu_acc_lh_s1: +def HEXAGON_M2_mpyu_acc_lh_s1: si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; -def Hexagon_M2_mpyu_acc_ll_s0: +def HEXAGON_M2_mpyu_acc_ll_s0: si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; -def Hexagon_M2_mpyu_acc_ll_s1: +def HEXAGON_M2_mpyu_acc_ll_s1: si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; //Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyu_nac_hh_s0: +def HEXAGON_M2_mpyu_nac_hh_s0: si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; -def Hexagon_M2_mpyu_nac_hh_s1: +def HEXAGON_M2_mpyu_nac_hh_s1: si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; -def Hexagon_M2_mpyu_nac_hl_s0: +def HEXAGON_M2_mpyu_nac_hl_s0: si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; -def Hexagon_M2_mpyu_nac_hl_s1: +def HEXAGON_M2_mpyu_nac_hl_s1: si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; -def Hexagon_M2_mpyu_nac_lh_s0: +def HEXAGON_M2_mpyu_nac_lh_s0: si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; -def Hexagon_M2_mpyu_nac_lh_s1: +def HEXAGON_M2_mpyu_nac_lh_s1: si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; -def Hexagon_M2_mpyu_nac_ll_s0: +def HEXAGON_M2_mpyu_nac_ll_s0: si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; -def Hexagon_M2_mpyu_nac_ll_s1: +def HEXAGON_M2_mpyu_nac_ll_s1: si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; //Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyud_acc_hh_s0: +def HEXAGON_M2_mpyud_acc_hh_s0: di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; -def Hexagon_M2_mpyud_acc_hh_s1: +def HEXAGON_M2_mpyud_acc_hh_s1: di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; -def Hexagon_M2_mpyud_acc_hl_s0: +def HEXAGON_M2_mpyud_acc_hl_s0: di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; -def Hexagon_M2_mpyud_acc_hl_s1: +def HEXAGON_M2_mpyud_acc_hl_s1: di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; -def Hexagon_M2_mpyud_acc_lh_s0: +def HEXAGON_M2_mpyud_acc_lh_s0: di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; -def Hexagon_M2_mpyud_acc_lh_s1: +def HEXAGON_M2_mpyud_acc_lh_s1: di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; -def Hexagon_M2_mpyud_acc_ll_s0: +def HEXAGON_M2_mpyud_acc_ll_s0: di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; -def Hexagon_M2_mpyud_acc_ll_s1: +def HEXAGON_M2_mpyud_acc_ll_s1: di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; //Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] -def Hexagon_M2_mpyud_nac_hh_s0: +def HEXAGON_M2_mpyud_nac_hh_s0: di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; -def Hexagon_M2_mpyud_nac_hh_s1: +def HEXAGON_M2_mpyud_nac_hh_s1: di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; -def Hexagon_M2_mpyud_nac_hl_s0: +def HEXAGON_M2_mpyud_nac_hl_s0: di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; -def Hexagon_M2_mpyud_nac_hl_s1: +def HEXAGON_M2_mpyud_nac_hl_s1: di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; -def Hexagon_M2_mpyud_nac_lh_s0: +def HEXAGON_M2_mpyud_nac_lh_s0: di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; -def Hexagon_M2_mpyud_nac_lh_s1: +def HEXAGON_M2_mpyud_nac_lh_s1: di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; -def Hexagon_M2_mpyud_nac_ll_s0: +def HEXAGON_M2_mpyud_nac_ll_s0: di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; -def Hexagon_M2_mpyud_nac_ll_s1: +def HEXAGON_M2_mpyud_nac_ll_s1: di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; @@ -2864,15 +2900,15 @@ def Hexagon_M2_mpyud_nac_ll_s1: *********************************************************************/ // MTYPE / VB / Vector reduce add unsigned bytes. -def Hexagon_A2_vraddub: +def HEXAGON_A2_vraddub: di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; -def Hexagon_A2_vraddub_acc: +def HEXAGON_A2_vraddub_acc: di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; // MTYPE / VB / Vector sum of absolute differences unsigned bytes. -def Hexagon_A2_vrsadub: +def HEXAGON_A2_vrsadub: di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; -def Hexagon_A2_vrsadub_acc: +def HEXAGON_A2_vrsadub_acc: di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; /******************************************************************** @@ -2880,56 +2916,56 @@ def Hexagon_A2_vrsadub_acc: *********************************************************************/ // MTYPE / VH / Vector dual multiply. -def Hexagon_M2_vdmpys_s1: +def HEXAGON_M2_vdmpys_s1: di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; -def Hexagon_M2_vdmpys_s0: +def HEXAGON_M2_vdmpys_s0: di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; -def Hexagon_M2_vdmacs_s1: +def HEXAGON_M2_vdmacs_s1: di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; -def Hexagon_M2_vdmacs_s0: +def HEXAGON_M2_vdmacs_s0: di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; // MTYPE / VH / Vector dual multiply with round and pack. -def Hexagon_M2_vdmpyrs_s0: +def HEXAGON_M2_vdmpyrs_s0: si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; -def Hexagon_M2_vdmpyrs_s1: +def HEXAGON_M2_vdmpyrs_s1: si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; // MTYPE / VH / Vector multiply even halfwords. -def Hexagon_M2_vmpy2es_s1: +def HEXAGON_M2_vmpy2es_s1: di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; -def Hexagon_M2_vmpy2es_s0: +def HEXAGON_M2_vmpy2es_s0: di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; -def Hexagon_M2_vmac2es: +def HEXAGON_M2_vmac2es: di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; -def Hexagon_M2_vmac2es_s1: +def HEXAGON_M2_vmac2es_s1: di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; -def Hexagon_M2_vmac2es_s0: +def HEXAGON_M2_vmac2es_s0: di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; // MTYPE / VH / Vector multiply halfwords. -def Hexagon_M2_vmpy2s_s0: +def HEXAGON_M2_vmpy2s_s0: di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; -def Hexagon_M2_vmpy2s_s1: +def HEXAGON_M2_vmpy2s_s1: di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; -def Hexagon_M2_vmac2: +def HEXAGON_M2_vmac2: di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; -def Hexagon_M2_vmac2s_s0: +def HEXAGON_M2_vmac2s_s0: di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; -def Hexagon_M2_vmac2s_s1: +def HEXAGON_M2_vmac2s_s1: di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; // MTYPE / VH / Vector multiply halfwords with round and pack. -def Hexagon_M2_vmpy2s_s0pack: +def HEXAGON_M2_vmpy2s_s0pack: si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; -def Hexagon_M2_vmpy2s_s1pack: +def HEXAGON_M2_vmpy2s_s1pack: si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; // MTYPE / VH / Vector reduce multiply halfwords. // Rxx32+=vrmpyh(Rss32,Rtt32) -def Hexagon_M2_vrmpy_s0: +def HEXAGON_M2_vrmpy_s0: di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; -def Hexagon_M2_vrmac_s0: +def HEXAGON_M2_vrmac_s0: di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; @@ -2938,25 +2974,25 @@ def Hexagon_M2_vrmac_s0: *********************************************************************/ // STYPE / ALU / Absolute value. -def Hexagon_A2_abs: +def HEXAGON_A2_abs: si_SInst_si <"abs", int_hexagon_A2_abs>; -def Hexagon_A2_absp: +def HEXAGON_A2_absp: di_SInst_di <"abs", int_hexagon_A2_absp>; -def Hexagon_A2_abssat: +def HEXAGON_A2_abssat: si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; // STYPE / ALU / Negate. -def Hexagon_A2_negp: +def HEXAGON_A2_negp: di_SInst_di <"neg", int_hexagon_A2_negp>; -def Hexagon_A2_negsat: +def HEXAGON_A2_negsat: si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; // STYPE / ALU / Logical Not. -def Hexagon_A2_notp: +def HEXAGON_A2_notp: di_SInst_di <"not", int_hexagon_A2_notp>; // STYPE / ALU / Sign extend word to doubleword. -def Hexagon_A2_sxtw: +def HEXAGON_A2_sxtw: di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; @@ -2965,88 +3001,88 @@ def Hexagon_A2_sxtw: *********************************************************************/ // STYPE / BIT / Count leading. -def Hexagon_S2_cl0: +def HEXAGON_S2_cl0: si_SInst_si <"cl0", int_hexagon_S2_cl0>; -def Hexagon_S2_cl0p: +def HEXAGON_S2_cl0p: si_SInst_di <"cl0", int_hexagon_S2_cl0p>; -def Hexagon_S2_cl1: +def HEXAGON_S2_cl1: si_SInst_si <"cl1", int_hexagon_S2_cl1>; -def Hexagon_S2_cl1p: +def HEXAGON_S2_cl1p: si_SInst_di <"cl1", int_hexagon_S2_cl1p>; -def Hexagon_S2_clb: +def HEXAGON_S2_clb: si_SInst_si <"clb", int_hexagon_S2_clb>; -def Hexagon_S2_clbp: +def HEXAGON_S2_clbp: si_SInst_di <"clb", int_hexagon_S2_clbp>; -def Hexagon_S2_clbnorm: +def HEXAGON_S2_clbnorm: si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; // STYPE / BIT / Count trailing. -def Hexagon_S2_ct0: +def HEXAGON_S2_ct0: si_SInst_si <"ct0", int_hexagon_S2_ct0>; -def Hexagon_S2_ct1: +def HEXAGON_S2_ct1: si_SInst_si <"ct1", int_hexagon_S2_ct1>; // STYPE / BIT / Compare bit mask. -def HEXAGON_C2_bitsclr: +def Hexagon_C2_bitsclr: qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; -def HEXAGON_C2_bitsclri: +def Hexagon_C2_bitsclri: qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; -def HEXAGON_C2_bitsset: +def Hexagon_C2_bitsset: qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; // STYPE / BIT / Extract unsigned. // Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) -def Hexagon_S2_extractu: +def HEXAGON_S2_extractu: si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; -def Hexagon_S2_extractu_rp: +def HEXAGON_S2_extractu_rp: si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; -def Hexagon_S2_extractup: +def HEXAGON_S2_extractup: di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; -def Hexagon_S2_extractup_rp: +def HEXAGON_S2_extractup_rp: di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; // STYPE / BIT / Insert bitfield. -def HEXAGON_S2_insert: +def Hexagon_S2_insert: si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; -def HEXAGON_S2_insert_rp: +def Hexagon_S2_insert_rp: si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; -def HEXAGON_S2_insertp: +def Hexagon_S2_insertp: di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; -def HEXAGON_S2_insertp_rp: +def Hexagon_S2_insertp_rp: di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; // STYPE / BIT / Innterleave/deinterleave. -def HEXAGON_S2_interleave: +def Hexagon_S2_interleave: di_SInst_di <"interleave", int_hexagon_S2_interleave>; -def HEXAGON_S2_deinterleave: +def Hexagon_S2_deinterleave: di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; // STYPE / BIT / Linear feedback-shift Iteration. -def HEXAGON_S2_lfsp: +def Hexagon_S2_lfsp: di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; // STYPE / BIT / Bit reverse. -def HEXAGON_S2_brev: +def Hexagon_S2_brev: si_SInst_si <"brev", int_hexagon_S2_brev>; // STYPE / BIT / Set/Clear/Toggle Bit. -def Hexagon_S2_setbit_i: +def HEXAGON_S2_setbit_i: si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; -def Hexagon_S2_togglebit_i: +def HEXAGON_S2_togglebit_i: si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; -def Hexagon_S2_clrbit_i: +def HEXAGON_S2_clrbit_i: si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; -def Hexagon_S2_setbit_r: +def HEXAGON_S2_setbit_r: si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; -def Hexagon_S2_togglebit_r: +def HEXAGON_S2_togglebit_r: si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; -def Hexagon_S2_clrbit_r: +def HEXAGON_S2_clrbit_r: si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; // STYPE / BIT / Test Bit. -def Hexagon_S2_tstbit_i: +def HEXAGON_S2_tstbit_i: qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; -def Hexagon_S2_tstbit_r: +def HEXAGON_S2_tstbit_r: qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; @@ -3055,11 +3091,11 @@ def Hexagon_S2_tstbit_r: *********************************************************************/ // STYPE / COMPLEX / Vector Complex conjugate. -def Hexagon_A2_vconj: +def HEXAGON_A2_vconj: di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; // STYPE / COMPLEX / Vector Complex rotate. -def Hexagon_S2_vcrotate: +def HEXAGON_S2_vcrotate: di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; @@ -3068,102 +3104,102 @@ def Hexagon_S2_vcrotate: *********************************************************************/ // STYPE / PERM / Saturate. -def Hexagon_A2_sat: +def HEXAGON_A2_sat: si_SInst_di <"sat", int_hexagon_A2_sat>; -def Hexagon_A2_satb: +def HEXAGON_A2_satb: si_SInst_si <"satb", int_hexagon_A2_satb>; -def Hexagon_A2_sath: +def HEXAGON_A2_sath: si_SInst_si <"sath", int_hexagon_A2_sath>; -def Hexagon_A2_satub: +def HEXAGON_A2_satub: si_SInst_si <"satub", int_hexagon_A2_satub>; -def Hexagon_A2_satuh: +def HEXAGON_A2_satuh: si_SInst_si <"satuh", int_hexagon_A2_satuh>; // STYPE / PERM / Swizzle bytes. -def Hexagon_A2_swiz: +def HEXAGON_A2_swiz: si_SInst_si <"swiz", int_hexagon_A2_swiz>; // STYPE / PERM / Vector align. // Need custom lowering -def Hexagon_S2_valignib: +def HEXAGON_S2_valignib: di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; -def Hexagon_S2_valignrb: +def HEXAGON_S2_valignrb: di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; // STYPE / PERM / Vector round and pack. -def Hexagon_S2_vrndpackwh: +def HEXAGON_S2_vrndpackwh: si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; -def Hexagon_S2_vrndpackwhs: +def HEXAGON_S2_vrndpackwhs: si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; // STYPE / PERM / Vector saturate and pack. -def Hexagon_S2_svsathb: +def HEXAGON_S2_svsathb: si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; -def Hexagon_S2_vsathb: +def HEXAGON_S2_vsathb: si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; -def Hexagon_S2_svsathub: +def HEXAGON_S2_svsathub: si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; -def Hexagon_S2_vsathub: +def HEXAGON_S2_vsathub: si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; -def Hexagon_S2_vsatwh: +def HEXAGON_S2_vsatwh: si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; -def Hexagon_S2_vsatwuh: +def HEXAGON_S2_vsatwuh: si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; // STYPE / PERM / Vector saturate without pack. -def Hexagon_S2_vsathb_nopack: +def HEXAGON_S2_vsathb_nopack: di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; -def Hexagon_S2_vsathub_nopack: +def HEXAGON_S2_vsathub_nopack: di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; -def Hexagon_S2_vsatwh_nopack: +def HEXAGON_S2_vsatwh_nopack: di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; -def Hexagon_S2_vsatwuh_nopack: +def HEXAGON_S2_vsatwuh_nopack: di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; // STYPE / PERM / Vector shuffle. -def Hexagon_S2_shuffeb: +def HEXAGON_S2_shuffeb: di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; -def Hexagon_S2_shuffeh: +def HEXAGON_S2_shuffeh: di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; -def Hexagon_S2_shuffob: +def HEXAGON_S2_shuffob: di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; -def Hexagon_S2_shuffoh: +def HEXAGON_S2_shuffoh: di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; // STYPE / PERM / Vector splat bytes. -def Hexagon_S2_vsplatrb: +def HEXAGON_S2_vsplatrb: si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; // STYPE / PERM / Vector splat halfwords. -def Hexagon_S2_vsplatrh: +def HEXAGON_S2_vsplatrh: di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; // STYPE / PERM / Vector splice. -def HEXAGON_S2_vsplicerb: +def Hexagon_S2_vsplicerb: di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; -def HEXAGON_S2_vspliceib: +def Hexagon_S2_vspliceib: di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; // STYPE / PERM / Sign extend. -def Hexagon_S2_vsxtbh: +def HEXAGON_S2_vsxtbh: di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; -def Hexagon_S2_vsxthw: +def HEXAGON_S2_vsxthw: di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; // STYPE / PERM / Truncate. -def Hexagon_S2_vtrunehb: +def HEXAGON_S2_vtrunehb: si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; -def Hexagon_S2_vtrunohb: +def HEXAGON_S2_vtrunohb: si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; -def Hexagon_S2_vtrunewh: +def HEXAGON_S2_vtrunewh: di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; -def Hexagon_S2_vtrunowh: +def HEXAGON_S2_vtrunowh: di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; // STYPE / PERM / Zero extend. -def Hexagon_S2_vzxtbh: +def HEXAGON_S2_vzxtbh: di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; -def Hexagon_S2_vzxthw: +def HEXAGON_S2_vzxthw: di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; @@ -3172,17 +3208,17 @@ def Hexagon_S2_vzxthw: *********************************************************************/ // STYPE / PRED / Mask generate from predicate. -def Hexagon_C2_mask: +def HEXAGON_C2_mask: di_SInst_qi <"mask", int_hexagon_C2_mask>; // STYPE / PRED / Predicate transfer. -def Hexagon_C2_tfrpr: +def HEXAGON_C2_tfrpr: si_SInst_qi <"", int_hexagon_C2_tfrpr>; -def Hexagon_C2_tfrrp: +def HEXAGON_C2_tfrrp: qi_SInst_si <"", int_hexagon_C2_tfrrp>; // STYPE / PRED / Viterbi pack even and odd predicate bits. -def Hexagon_C2_vitpack: +def HEXAGON_C2_vitpack: si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; @@ -3191,202 +3227,202 @@ def Hexagon_C2_vitpack: *********************************************************************/ // STYPE / SHIFT / Shift by immediate. -def Hexagon_S2_asl_i_r: +def HEXAGON_S2_asl_i_r: si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; -def Hexagon_S2_asr_i_r: +def HEXAGON_S2_asr_i_r: si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; -def Hexagon_S2_lsr_i_r: +def HEXAGON_S2_lsr_i_r: si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; -def Hexagon_S2_asl_i_p: +def HEXAGON_S2_asl_i_p: di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; -def Hexagon_S2_asr_i_p: +def HEXAGON_S2_asr_i_p: di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; -def Hexagon_S2_lsr_i_p: +def HEXAGON_S2_lsr_i_p: di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; // STYPE / SHIFT / Shift by immediate and accumulate. -def Hexagon_S2_asl_i_r_acc: +def HEXAGON_S2_asl_i_r_acc: si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; -def Hexagon_S2_asr_i_r_acc: +def HEXAGON_S2_asr_i_r_acc: si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; -def Hexagon_S2_lsr_i_r_acc: +def HEXAGON_S2_lsr_i_r_acc: si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; -def Hexagon_S2_asl_i_r_nac: +def HEXAGON_S2_asl_i_r_nac: si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; -def Hexagon_S2_asr_i_r_nac: +def HEXAGON_S2_asr_i_r_nac: si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; -def Hexagon_S2_lsr_i_r_nac: +def HEXAGON_S2_lsr_i_r_nac: si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; -def Hexagon_S2_asl_i_p_acc: +def HEXAGON_S2_asl_i_p_acc: di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; -def Hexagon_S2_asr_i_p_acc: +def HEXAGON_S2_asr_i_p_acc: di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; -def Hexagon_S2_lsr_i_p_acc: +def HEXAGON_S2_lsr_i_p_acc: di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; -def Hexagon_S2_asl_i_p_nac: +def HEXAGON_S2_asl_i_p_nac: di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; -def Hexagon_S2_asr_i_p_nac: +def HEXAGON_S2_asr_i_p_nac: di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; -def Hexagon_S2_lsr_i_p_nac: +def HEXAGON_S2_lsr_i_p_nac: di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; // STYPE / SHIFT / Shift by immediate and add. -def Hexagon_S2_addasl_rrri: +def HEXAGON_S2_addasl_rrri: si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; // STYPE / SHIFT / Shift by immediate and logical. -def Hexagon_S2_asl_i_r_and: +def HEXAGON_S2_asl_i_r_and: si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; -def Hexagon_S2_asr_i_r_and: +def HEXAGON_S2_asr_i_r_and: si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; -def Hexagon_S2_lsr_i_r_and: +def HEXAGON_S2_lsr_i_r_and: si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; -def Hexagon_S2_asl_i_r_xacc: +def HEXAGON_S2_asl_i_r_xacc: si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; -def Hexagon_S2_lsr_i_r_xacc: +def HEXAGON_S2_lsr_i_r_xacc: si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; -def Hexagon_S2_asl_i_r_or: +def HEXAGON_S2_asl_i_r_or: si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; -def Hexagon_S2_asr_i_r_or: +def HEXAGON_S2_asr_i_r_or: si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; -def Hexagon_S2_lsr_i_r_or: +def HEXAGON_S2_lsr_i_r_or: si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; -def Hexagon_S2_asl_i_p_and: +def HEXAGON_S2_asl_i_p_and: di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; -def Hexagon_S2_asr_i_p_and: +def HEXAGON_S2_asr_i_p_and: di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; -def Hexagon_S2_lsr_i_p_and: +def HEXAGON_S2_lsr_i_p_and: di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; -def Hexagon_S2_asl_i_p_xacc: +def HEXAGON_S2_asl_i_p_xacc: di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; -def Hexagon_S2_lsr_i_p_xacc: +def HEXAGON_S2_lsr_i_p_xacc: di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; -def Hexagon_S2_asl_i_p_or: +def HEXAGON_S2_asl_i_p_or: di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; -def Hexagon_S2_asr_i_p_or: +def HEXAGON_S2_asr_i_p_or: di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; -def Hexagon_S2_lsr_i_p_or: +def HEXAGON_S2_lsr_i_p_or: di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; // STYPE / SHIFT / Shift right by immediate with rounding. -def Hexagon_S2_asr_i_r_rnd: +def HEXAGON_S2_asr_i_r_rnd: si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; -def Hexagon_S2_asr_i_r_rnd_goodsyntax: +def HEXAGON_S2_asr_i_r_rnd_goodsyntax: si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; // STYPE / SHIFT / Shift left by immediate with saturation. -def Hexagon_S2_asl_i_r_sat: +def HEXAGON_S2_asl_i_r_sat: si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; // STYPE / SHIFT / Shift by register. -def Hexagon_S2_asl_r_r: +def HEXAGON_S2_asl_r_r: si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; -def Hexagon_S2_asr_r_r: +def HEXAGON_S2_asr_r_r: si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; -def Hexagon_S2_lsl_r_r: +def HEXAGON_S2_lsl_r_r: si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; -def Hexagon_S2_lsr_r_r: +def HEXAGON_S2_lsr_r_r: si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; -def Hexagon_S2_asl_r_p: +def HEXAGON_S2_asl_r_p: di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; -def Hexagon_S2_asr_r_p: +def HEXAGON_S2_asr_r_p: di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; -def Hexagon_S2_lsl_r_p: +def HEXAGON_S2_lsl_r_p: di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; -def Hexagon_S2_lsr_r_p: +def HEXAGON_S2_lsr_r_p: di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; // STYPE / SHIFT / Shift by register and accumulate. -def Hexagon_S2_asl_r_r_acc: +def HEXAGON_S2_asl_r_r_acc: si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; -def Hexagon_S2_asr_r_r_acc: +def HEXAGON_S2_asr_r_r_acc: si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; -def Hexagon_S2_lsl_r_r_acc: +def HEXAGON_S2_lsl_r_r_acc: si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; -def Hexagon_S2_lsr_r_r_acc: +def HEXAGON_S2_lsr_r_r_acc: si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; -def Hexagon_S2_asl_r_p_acc: +def HEXAGON_S2_asl_r_p_acc: di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; -def Hexagon_S2_asr_r_p_acc: +def HEXAGON_S2_asr_r_p_acc: di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; -def Hexagon_S2_lsl_r_p_acc: +def HEXAGON_S2_lsl_r_p_acc: di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; -def Hexagon_S2_lsr_r_p_acc: +def HEXAGON_S2_lsr_r_p_acc: di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; -def Hexagon_S2_asl_r_r_nac: +def HEXAGON_S2_asl_r_r_nac: si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; -def Hexagon_S2_asr_r_r_nac: +def HEXAGON_S2_asr_r_r_nac: si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; -def Hexagon_S2_lsl_r_r_nac: +def HEXAGON_S2_lsl_r_r_nac: si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; -def Hexagon_S2_lsr_r_r_nac: +def HEXAGON_S2_lsr_r_r_nac: si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; -def Hexagon_S2_asl_r_p_nac: +def HEXAGON_S2_asl_r_p_nac: di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; -def Hexagon_S2_asr_r_p_nac: +def HEXAGON_S2_asr_r_p_nac: di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; -def Hexagon_S2_lsl_r_p_nac: +def HEXAGON_S2_lsl_r_p_nac: di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; -def Hexagon_S2_lsr_r_p_nac: +def HEXAGON_S2_lsr_r_p_nac: di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; // STYPE / SHIFT / Shift by register and logical. -def Hexagon_S2_asl_r_r_and: +def HEXAGON_S2_asl_r_r_and: si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; -def Hexagon_S2_asr_r_r_and: +def HEXAGON_S2_asr_r_r_and: si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; -def Hexagon_S2_lsl_r_r_and: +def HEXAGON_S2_lsl_r_r_and: si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; -def Hexagon_S2_lsr_r_r_and: +def HEXAGON_S2_lsr_r_r_and: si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; -def Hexagon_S2_asl_r_r_or: +def HEXAGON_S2_asl_r_r_or: si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; -def Hexagon_S2_asr_r_r_or: +def HEXAGON_S2_asr_r_r_or: si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; -def Hexagon_S2_lsl_r_r_or: +def HEXAGON_S2_lsl_r_r_or: si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; -def Hexagon_S2_lsr_r_r_or: +def HEXAGON_S2_lsr_r_r_or: si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; -def Hexagon_S2_asl_r_p_and: +def HEXAGON_S2_asl_r_p_and: di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; -def Hexagon_S2_asr_r_p_and: +def HEXAGON_S2_asr_r_p_and: di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; -def Hexagon_S2_lsl_r_p_and: +def HEXAGON_S2_lsl_r_p_and: di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; -def Hexagon_S2_lsr_r_p_and: +def HEXAGON_S2_lsr_r_p_and: di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; -def Hexagon_S2_asl_r_p_or: +def HEXAGON_S2_asl_r_p_or: di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; -def Hexagon_S2_asr_r_p_or: +def HEXAGON_S2_asr_r_p_or: di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; -def Hexagon_S2_lsl_r_p_or: +def HEXAGON_S2_lsl_r_p_or: di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; -def Hexagon_S2_lsr_r_p_or: +def HEXAGON_S2_lsr_r_p_or: di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; // STYPE / SHIFT / Shift by register with saturation. -def Hexagon_S2_asl_r_r_sat: +def HEXAGON_S2_asl_r_r_sat: si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; -def Hexagon_S2_asr_r_r_sat: +def HEXAGON_S2_asr_r_r_sat: si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; // STYPE / SHIFT / Table Index. -def HEXAGON_S2_tableidxb_goodsyntax: +def Hexagon_S2_tableidxb_goodsyntax: si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; -def HEXAGON_S2_tableidxd_goodsyntax: +def Hexagon_S2_tableidxd_goodsyntax: si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; -def HEXAGON_S2_tableidxh_goodsyntax: +def Hexagon_S2_tableidxh_goodsyntax: si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; -def HEXAGON_S2_tableidxw_goodsyntax: +def Hexagon_S2_tableidxw_goodsyntax: si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; @@ -3396,29 +3432,29 @@ def HEXAGON_S2_tableidxw_goodsyntax: // STYPE / VH / Vector absolute value halfwords. // Rdd64=vabsh(Rss64) -def Hexagon_A2_vabsh: +def HEXAGON_A2_vabsh: di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; -def Hexagon_A2_vabshsat: +def HEXAGON_A2_vabshsat: di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; // STYPE / VH / Vector shift halfwords by immediate. // Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) -def Hexagon_S2_asl_i_vh: +def HEXAGON_S2_asl_i_vh: di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; -def Hexagon_S2_asr_i_vh: +def HEXAGON_S2_asr_i_vh: di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; -def Hexagon_S2_lsr_i_vh: +def HEXAGON_S2_lsr_i_vh: di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; // STYPE / VH / Vector shift halfwords by register. // Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) -def Hexagon_S2_asl_r_vh: +def HEXAGON_S2_asl_r_vh: di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; -def Hexagon_S2_asr_r_vh: +def HEXAGON_S2_asr_r_vh: di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; -def Hexagon_S2_lsl_r_vh: +def HEXAGON_S2_lsl_r_vh: di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; -def Hexagon_S2_lsr_r_vh: +def HEXAGON_S2_lsr_r_vh: di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; @@ -3427,36 +3463,41 @@ def Hexagon_S2_lsr_r_vh: *********************************************************************/ // STYPE / VW / Vector absolute value words. -def Hexagon_A2_vabsw: +def HEXAGON_A2_vabsw: di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; -def Hexagon_A2_vabswsat: +def HEXAGON_A2_vabswsat: di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; // STYPE / VW / Vector shift words by immediate. // Rdd64=v[asl/vsl]w(Rss64,Rt32) -def Hexagon_S2_asl_i_vw: +def HEXAGON_S2_asl_i_vw: di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; -def Hexagon_S2_asr_i_vw: +def HEXAGON_S2_asr_i_vw: di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; -def Hexagon_S2_lsr_i_vw: +def HEXAGON_S2_lsr_i_vw: di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; // STYPE / VW / Vector shift words by register. // Rdd64=v[asl/vsl]w(Rss64,Rt32) -def Hexagon_S2_asl_r_vw: +def HEXAGON_S2_asl_r_vw: di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; -def Hexagon_S2_asr_r_vw: +def HEXAGON_S2_asr_r_vw: di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; -def Hexagon_S2_lsl_r_vw: +def HEXAGON_S2_lsl_r_vw: di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; -def Hexagon_S2_lsr_r_vw: +def HEXAGON_S2_lsr_r_vw: di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; // STYPE / VW / Vector shift words with truncate and pack. -def Hexagon_S2_asr_r_svw_trun: +def HEXAGON_S2_asr_r_svw_trun: si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; -def Hexagon_S2_asr_i_svw_trun: +def HEXAGON_S2_asr_i_svw_trun: si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; +// LD / Circular loads. +def HEXAGON_circ_ldd: + di_LDInstPI_diu4 <"circ_ldd", int_hexagon_circ_ldd>; + include "HexagonIntrinsicsV3.td" include "HexagonIntrinsicsV4.td" +include "HexagonIntrinsicsV5.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td index 68eaf68..2788101 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -12,18 +12,28 @@ // Optimized with intrinisics accumulates // def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), - (COMBINE_rr - (Hexagon_M2_maci - (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), - subreg_hireg), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), - (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), - (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), - subreg_loreg))>; + (i64 + (COMBINE_rr + (HEXAGON_M2_maci + (HEXAGON_M2_maci + (i32 + (EXTRACT_SUBREG + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), + subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), + subreg_hireg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))), + (i32 + (EXTRACT_SUBREG + (i64 + (MPYU64 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)), + (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), + subreg_loreg)))), subreg_loreg))))>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td new file mode 100644 index 0000000..1d44b52 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -0,0 +1,395 @@ +class sf_SInst_sf + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class si_SInst_sf + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class sf_SInst_si + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1))]>; + +class sf_SInst_di + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class sf_SInst_df + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class si_SInst_df + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class df_SInst_sf + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class di_SInst_sf + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class df_SInst_si + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class df_SInst_df + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class di_SInst_df + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + + +class df_SInst_di + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1))]>; + +class sf_MInst_sfsf + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class df_MInst_dfdf + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class qi_ALU64_dfdf + : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class qi_ALU64_dfu5 + : ALU64_ri<(outs PredRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + + +class sf_MInst_sfsfsf_acc + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_nac + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + + +class sf_MInst_sfsfsfsi_sc + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2, $src3):scale")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_acc_lib + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2):lib")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class sf_MInst_sfsfsf_nac_lib + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):lib")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, + IntRegs:$src2, IntRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_acc + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_nac + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + + +class df_MInst_dfdfdfsi_sc + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2, IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2, $src3):scale")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2, IntRegs:$src3))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_acc_lib + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2):lib")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class df_MInst_dfdfdf_nac_lib + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$dst2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):lib")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2, DoubleRegs:$dst2))], + "$dst2 = $dst">; + +class qi_SInst_sfsf + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_sfu5 + : MInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class sf_ALU64_u10_pos + : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class sf_ALU64_u10_neg + : ALU64_ri<(outs IntRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class df_ALU64_u10_pos + : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):pos")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class df_ALU64_u10_neg + : ALU64_ri<(outs DoubleRegs:$dst), (ins u10Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1):neg")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class di_MInst_diu6 + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class di_MInst_diu4_rnd + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_MInst_diu4_rnd_sat + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_diu4_sat + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u4Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + + +def HEXAGON_C4_fastcorner9: + qi_SInst_qiqi <"fastcorner9", int_hexagon_C4_fastcorner9>; +def HEXAGON_C4_fastcorner9_not: + qi_SInst_qiqi <"!fastcorner9", int_hexagon_C4_fastcorner9_not>; +def HEXAGON_M5_vrmpybuu: + di_MInst_didi <"vrmpybu", int_hexagon_M5_vrmpybuu>; +def HEXAGON_M5_vrmacbuu: + di_MInst_dididi_acc <"vrmpybu", int_hexagon_M5_vrmacbuu>; +def HEXAGON_M5_vrmpybsu: + di_MInst_didi <"vrmpybsu", int_hexagon_M5_vrmpybsu>; +def HEXAGON_M5_vrmacbsu: + di_MInst_dididi_acc <"vrmpybsu", int_hexagon_M5_vrmacbsu>; +def HEXAGON_M5_vmpybuu: + di_MInst_sisi <"vmpybu", int_hexagon_M5_vmpybuu>; +def HEXAGON_M5_vmpybsu: + di_MInst_sisi <"vmpybsu", int_hexagon_M5_vmpybsu>; +def HEXAGON_M5_vmacbuu: + di_MInst_disisi_acc <"vmpybu", int_hexagon_M5_vmacbuu>; +def HEXAGON_M5_vmacbsu: + di_MInst_disisi_acc <"vmpybsu", int_hexagon_M5_vmacbsu>; +def HEXAGON_M5_vdmpybsu: + di_MInst_didi_sat <"vdmpybsu", int_hexagon_M5_vdmpybsu>; +def HEXAGON_M5_vdmacbsu: + di_MInst_dididi_acc_sat <"vdmpybsu", int_hexagon_M5_vdmacbsu>; +def HEXAGON_A5_vaddhubs: + si_SInst_didi_sat <"vaddhub", int_hexagon_A5_vaddhubs>; +def HEXAGON_S5_popcountp: + si_SInst_di <"popcount", int_hexagon_S5_popcountp>; +def HEXAGON_S5_asrhub_rnd_sat_goodsyntax: + si_MInst_diu4_rnd_sat <"vasrhub", int_hexagon_S5_asrhub_rnd_sat_goodsyntax>; +def HEXAGON_S5_asrhub_sat: + si_SInst_diu4_sat <"vasrhub", int_hexagon_S5_asrhub_sat>; +def HEXAGON_S5_vasrhrnd_goodsyntax: + di_MInst_diu4_rnd <"vasrh", int_hexagon_S5_vasrhrnd_goodsyntax>; +def HEXAGON_S2_asr_i_p_rnd: + di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p_rnd>; +def HEXAGON_S2_asr_i_p_rnd_goodsyntax: + di_MInst_diu6 <"asrrnd", int_hexagon_S2_asr_i_p_rnd_goodsyntax>; +def HEXAGON_F2_sfadd: + sf_MInst_sfsf <"sfadd", int_hexagon_F2_sfadd>; +def HEXAGON_F2_sfsub: + sf_MInst_sfsf <"sfsub", int_hexagon_F2_sfsub>; +def HEXAGON_F2_sfmpy: + sf_MInst_sfsf <"sfmpy", int_hexagon_F2_sfmpy>; +def HEXAGON_F2_sffma: + sf_MInst_sfsfsf_acc <"sfmpy", int_hexagon_F2_sffma>; +def HEXAGON_F2_sffma_sc: + sf_MInst_sfsfsfsi_sc <"sfmpy", int_hexagon_F2_sffma_sc>; +def HEXAGON_F2_sffms: + sf_MInst_sfsfsf_nac <"sfmpy", int_hexagon_F2_sffms>; +def HEXAGON_F2_sffma_lib: + sf_MInst_sfsfsf_acc_lib <"sfmpy", int_hexagon_F2_sffma_lib>; +def HEXAGON_F2_sffms_lib: + sf_MInst_sfsfsf_nac_lib <"sfmpy", int_hexagon_F2_sffms_lib>; +def HEXAGON_F2_sfcmpeq: + qi_SInst_sfsf <"sfcmp.eq", int_hexagon_F2_sfcmpeq>; +def HEXAGON_F2_sfcmpgt: + qi_SInst_sfsf <"sfcmp.gt", int_hexagon_F2_sfcmpgt>; +def HEXAGON_F2_sfcmpge: + qi_SInst_sfsf <"sfcmp.ge", int_hexagon_F2_sfcmpge>; +def HEXAGON_F2_sfcmpuo: + qi_SInst_sfsf <"sfcmp.uo", int_hexagon_F2_sfcmpuo>; +def HEXAGON_F2_sfmax: + sf_MInst_sfsf <"sfmax", int_hexagon_F2_sfmax>; +def HEXAGON_F2_sfmin: + sf_MInst_sfsf <"sfmin", int_hexagon_F2_sfmin>; +def HEXAGON_F2_sfclass: + qi_SInst_sfu5 <"sfclass", int_hexagon_F2_sfclass>; +def HEXAGON_F2_sfimm_p: + sf_ALU64_u10_pos <"sfmake", int_hexagon_F2_sfimm_p>; +def HEXAGON_F2_sfimm_n: + sf_ALU64_u10_neg <"sfmake", int_hexagon_F2_sfimm_n>; +def HEXAGON_F2_sffixupn: + sf_MInst_sfsf <"sffixupn", int_hexagon_F2_sffixupn>; +def HEXAGON_F2_sffixupd: + sf_MInst_sfsf <"sffixupd", int_hexagon_F2_sffixupd>; +def HEXAGON_F2_sffixupr: + sf_SInst_sf <"sffixupr", int_hexagon_F2_sffixupr>; +def HEXAGON_F2_dfadd: + df_MInst_dfdf <"dfadd", int_hexagon_F2_dfadd>; +def HEXAGON_F2_dfsub: + df_MInst_dfdf <"dfsub", int_hexagon_F2_dfsub>; +def HEXAGON_F2_dfmpy: + df_MInst_dfdf <"dfmpy", int_hexagon_F2_dfmpy>; +def HEXAGON_F2_dffma: + df_MInst_dfdfdf_acc <"dfmpy", int_hexagon_F2_dffma>; +def HEXAGON_F2_dffms: + df_MInst_dfdfdf_nac <"dfmpy", int_hexagon_F2_dffms>; +def HEXAGON_F2_dffma_lib: + df_MInst_dfdfdf_acc_lib <"dfmpy", int_hexagon_F2_dffma_lib>; +def HEXAGON_F2_dffms_lib: + df_MInst_dfdfdf_nac_lib <"dfmpy", int_hexagon_F2_dffms_lib>; +def HEXAGON_F2_dffma_sc: + df_MInst_dfdfdfsi_sc <"dfmpy", int_hexagon_F2_dffma_sc>; +def HEXAGON_F2_dfmax: + df_MInst_dfdf <"dfmax", int_hexagon_F2_dfmax>; +def HEXAGON_F2_dfmin: + df_MInst_dfdf <"dfmin", int_hexagon_F2_dfmin>; +def HEXAGON_F2_dfcmpeq: + qi_ALU64_dfdf <"dfcmp.eq", int_hexagon_F2_dfcmpeq>; +def HEXAGON_F2_dfcmpgt: + qi_ALU64_dfdf <"dfcmp.gt", int_hexagon_F2_dfcmpgt>; +def HEXAGON_F2_dfcmpge: + qi_ALU64_dfdf <"dfcmp.ge", int_hexagon_F2_dfcmpge>; +def HEXAGON_F2_dfcmpuo: + qi_ALU64_dfdf <"dfcmp.uo", int_hexagon_F2_dfcmpuo>; +def HEXAGON_F2_dfclass: + qi_ALU64_dfu5 <"dfclass", int_hexagon_F2_dfclass>; +def HEXAGON_F2_dfimm_p: + df_ALU64_u10_pos <"dfmake", int_hexagon_F2_dfimm_p>; +def HEXAGON_F2_dfimm_n: + df_ALU64_u10_neg <"dfmake", int_hexagon_F2_dfimm_n>; +def HEXAGON_F2_dffixupn: + df_MInst_dfdf <"dffixupn", int_hexagon_F2_dffixupn>; +def HEXAGON_F2_dffixupd: + df_MInst_dfdf <"dffixupd", int_hexagon_F2_dffixupd>; +def HEXAGON_F2_dffixupr: + df_SInst_df <"dffixupr", int_hexagon_F2_dffixupr>; +def HEXAGON_F2_conv_sf2df: + df_SInst_sf <"convert_sf2df", int_hexagon_F2_conv_sf2df>; +def HEXAGON_F2_conv_df2sf: + sf_SInst_df <"convert_df2sf", int_hexagon_F2_conv_df2sf>; +def HEXAGON_F2_conv_uw2sf: + sf_SInst_si <"convert_uw2sf", int_hexagon_F2_conv_uw2sf>; +def HEXAGON_F2_conv_uw2df: + df_SInst_si <"convert_uw2df", int_hexagon_F2_conv_uw2df>; +def HEXAGON_F2_conv_w2sf: + sf_SInst_si <"convert_w2sf", int_hexagon_F2_conv_w2sf>; +def HEXAGON_F2_conv_w2df: + df_SInst_si <"convert_w2df", int_hexagon_F2_conv_w2df>; +def HEXAGON_F2_conv_ud2sf: + sf_SInst_di <"convert_ud2sf", int_hexagon_F2_conv_ud2sf>; +def HEXAGON_F2_conv_ud2df: + df_SInst_di <"convert_ud2df", int_hexagon_F2_conv_ud2df>; +def HEXAGON_F2_conv_d2sf: + sf_SInst_di <"convert_d2sf", int_hexagon_F2_conv_d2sf>; +def HEXAGON_F2_conv_d2df: + df_SInst_di <"convert_d2df", int_hexagon_F2_conv_d2df>; +def HEXAGON_F2_conv_sf2uw: + si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw>; +def HEXAGON_F2_conv_sf2w: + si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w>; +def HEXAGON_F2_conv_sf2ud: + di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud>; +def HEXAGON_F2_conv_sf2d: + di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d>; +def HEXAGON_F2_conv_df2uw: + si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw>; +def HEXAGON_F2_conv_df2w: + si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w>; +def HEXAGON_F2_conv_df2ud: + di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud>; +def HEXAGON_F2_conv_df2d: + di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d>; +def HEXAGON_F2_conv_sf2uw_chop: + si_SInst_sf <"convert_sf2uw", int_hexagon_F2_conv_sf2uw_chop>; +def HEXAGON_F2_conv_sf2w_chop: + si_SInst_sf <"convert_sf2w", int_hexagon_F2_conv_sf2w_chop>; +def HEXAGON_F2_conv_sf2ud_chop: + di_SInst_sf <"convert_sf2ud", int_hexagon_F2_conv_sf2ud_chop>; +def HEXAGON_F2_conv_sf2d_chop: + di_SInst_sf <"convert_sf2d", int_hexagon_F2_conv_sf2d_chop>; +def HEXAGON_F2_conv_df2uw_chop: + si_SInst_df <"convert_df2uw", int_hexagon_F2_conv_df2uw_chop>; +def HEXAGON_F2_conv_df2w_chop: + si_SInst_df <"convert_df2w", int_hexagon_F2_conv_df2w_chop>; +def HEXAGON_F2_conv_df2ud_chop: + di_SInst_df <"convert_df2ud", int_hexagon_F2_conv_df2ud_chop>; +def HEXAGON_F2_conv_df2d_chop: + di_SInst_df <"convert_df2d", int_hexagon_F2_conv_df2d_chop>; diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h new file mode 100644 index 0000000..7a16c24 --- /dev/null +++ b/lib/Target/Hexagon/HexagonMCInst.h @@ -0,0 +1,41 @@ +//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class extends MCInst to allow some VLIW annotation. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONMCINST_H +#define HEXAGONMCINST_H + +#include "llvm/MC/MCInst.h" +#include "llvm/CodeGen/MachineInstr.h" + +namespace llvm { + class HexagonMCInst: public MCInst { + // Packet start and end markers + unsigned startPacket: 1, endPacket: 1; + const MachineInstr *MachineI; + public: + explicit HexagonMCInst(): MCInst(), + startPacket(0), endPacket(0) {} + + const MachineInstr* getMI() const { return MachineI; } + + void setMI(const MachineInstr *MI) { MachineI = MI; } + + bool isStartPacket() const { return (startPacket); } + bool isEndPacket() const { return (endPacket); } + + void setStartPacket(bool yes) { startPacket = yes; } + void setEndPacket(bool yes) { endPacket = yes; } + }; +} + +#endif diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index fbb331b..70bddcc 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -49,7 +49,7 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI, switch (MO.getType()) { default: MI->dump(); - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) continue; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp new file mode 100644 index 0000000..7ece408 --- /dev/null +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -0,0 +1,647 @@ +//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements NewValueJump pass in Hexagon. +// Ideally, we should merge this as a Peephole pass prior to register +// allocation, but because we have a spill in between the feeder and new value +// jump instructions, we are forced to write after register allocation. +// Having said that, we should re-attempt to pull this earlier at some point +// in future. + +// The basic approach looks for sequence of predicated jump, compare instruciton +// that genereates the predicate and, the feeder to the predicate. Once it finds +// all, it collapses compare and jump instruction into a new valu jump +// intstructions. +// +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "hexagon-nvj" +#include "llvm/PassSupport.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonInstrInfo.h" +#include "HexagonMachineFunctionInfo.h" + +#include + +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created"); + +static cl::opt +DbgNVJCount("nvj-count", cl::init(-1), cl::Hidden, cl::desc( + "Maximum number of predicated jumps to be converted to New Value Jump")); + +static cl::opt DisableNewValueJumps("disable-nvjump", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable New Value Jumps")); + +namespace { + struct HexagonNewValueJump : public MachineFunctionPass { + const HexagonInstrInfo *QII; + const HexagonRegisterInfo *QRI; + + public: + static char ID; + + HexagonNewValueJump() : MachineFunctionPass(ID) { } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon NewValueJump"; + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + private: + + }; + +} // end of anonymous namespace + +char HexagonNewValueJump::ID = 0; + +// We have identified this II could be feeder to NVJ, +// verify that it can be. +static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + MachineBasicBlock::iterator end, + MachineBasicBlock::iterator skip, + MachineFunction &MF) { + + // Predicated instruction can not be feeder to NVJ. + if (QII->isPredicated(II)) + return false; + + // Bail out if feederReg is a paired register (double regs in + // our case). One would think that we can check to see if a given + // register cmpReg1 or cmpReg2 is a sub register of feederReg + // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic + // before the callsite of this function + // But we can not as it comes in the following fashion. + // %D0 = Hexagon_S2_lsr_r_p %D0, %R2 + // %R0 = KILL %R0, %D0 + // %P0 = CMPEQri %R0, 0 + // Hence, we need to check if it's a KILL instruction. + if (II->getOpcode() == TargetOpcode::KILL) + return false; + + + // Make sure there there is no 'def' or 'use' of any of the uses of + // feeder insn between it's definition, this MI and jump, jmpInst + // skipping compare, cmpInst. + // Here's the example. + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // r4=memub(r3+r21<<#0) + // if (p0.new) jump:t .LBB29_45 + // Without this check, it will be converted into + // r4=memub(r3+r21<<#0) + // r21=memub(r22+r24<<#0) + // p0 = cmp.eq(r21, #0) + // if (p0.new) jump:t .LBB29_45 + // and result WAR hazards if converted to New Value Jump. + + for (unsigned i = 0; i < II->getNumOperands(); ++i) { + if (II->getOperand(i).isReg() && + (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { + MachineBasicBlock::iterator localII = II; + ++localII; + unsigned Reg = II->getOperand(i).getReg(); + for (MachineBasicBlock::iterator localBegin = localII; + localBegin != end; ++localBegin) { + if (localBegin == skip ) continue; + // Check for Subregisters too. + if (localBegin->modifiesRegister(Reg, TRI) || + localBegin->readsRegister(Reg, TRI)) + return false; + } + } + } + return true; +} + +// These are the common checks that need to performed +// to determine if +// 1. compare instruction can be moved before jump. +// 2. feeder to the compare instruction can be moved before jump. +static bool commonChecksToProhibitNewValueJump(bool afterRA, + MachineBasicBlock::iterator MII) { + + // If store in path, bail out. + if (MII->getDesc().mayStore()) + return false; + + // if call in path, bail out. + if (MII->getOpcode() == Hexagon::CALLv3) + return false; + + // if NVJ is running prior to RA, do the following checks. + if (!afterRA) { + // The following Target Opcode instructions are spurious + // to new value jump. If they are in the path, bail out. + // KILL sets kill flag on the opcode. It also sets up a + // single register, out of pair. + // %D0 = Hexagon_S2_lsr_r_p %D0, %R2 + // %R0 = KILL %R0, %D0 + // %P0 = CMPEQri %R0, 0 + // PHI can be anything after RA. + // COPY can remateriaze things in between feeder, compare and nvj. + if (MII->getOpcode() == TargetOpcode::KILL || + MII->getOpcode() == TargetOpcode::PHI || + MII->getOpcode() == TargetOpcode::COPY) + return false; + + // The following pseudo Hexagon instructions sets "use" and "def" + // of registers by individual passes in the backend. At this time, + // we don't know the scope of usage and definitions of these + // instructions. + if (MII->getOpcode() == Hexagon::TFR_condset_rr || + MII->getOpcode() == Hexagon::TFR_condset_ii || + MII->getOpcode() == Hexagon::TFR_condset_ri || + MII->getOpcode() == Hexagon::TFR_condset_ir || + MII->getOpcode() == Hexagon::LDriw_pred || + MII->getOpcode() == Hexagon::STriw_pred) + return false; + } + + return true; +} + +static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, + const TargetRegisterInfo *TRI, + MachineBasicBlock::iterator II, + unsigned pReg, + bool secondReg, + bool optLocation, + MachineBasicBlock::iterator end, + MachineFunction &MF) { + + MachineInstr *MI = II; + + // If the second operand of the compare is an imm, make sure it's in the + // range specified by the arch. + if (!secondReg) { + int64_t v = MI->getOperand(2).getImm(); + if (MI->getOpcode() == Hexagon::CMPGEri || + (MI->getOpcode() == Hexagon::CMPGEUri && v > 0)) + --v; + + if (!(isUInt<5>(v) || + ((MI->getOpcode() == Hexagon::CMPEQri || + MI->getOpcode() == Hexagon::CMPGTri || + MI->getOpcode() == Hexagon::CMPGEri) && + (v == -1)))) + return false; + } + + unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + cmpReg1 = MI->getOperand(1).getReg(); + + if (secondReg) { + cmpOp2 = MI->getOperand(2).getReg(); + + // Make sure that that second register is not from COPY + // At machine code level, we don't need this, but if we decide + // to move new value jump prior to RA, we would be needing this. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { + MachineInstr *def = MRI.getVRegDef(cmpOp2); + if (def->getOpcode() == TargetOpcode::COPY) + return false; + } + } + + // Walk the instructions after the compare (predicate def) to the jump, + // and satisfy the following conditions. + ++II ; + for (MachineBasicBlock::iterator localII = II; localII != end; + ++localII) { + + // Check 1. + // If "common" checks fail, bail out. + if (!commonChecksToProhibitNewValueJump(optLocation, localII)) + return false; + + // Check 2. + // If there is a def or use of predicate (result of compare), bail out. + if (localII->modifiesRegister(pReg, TRI) || + localII->readsRegister(pReg, TRI)) + return false; + + // Check 3. + // If there is a def of any of the use of the compare (operands of compare), + // bail out. + // Eg. + // p0 = cmp.eq(r2, r0) + // r2 = r4 + // if (p0.new) jump:t .LBB28_3 + if (localII->modifiesRegister(cmpReg1, TRI) || + (secondReg && localII->modifiesRegister(cmpOp2, TRI))) + return false; + } + return true; +} + +// Given a compare operator, return a matching New Value Jump +// compare operator. Make sure that MI here is included in +// HexagonInstrInfo.cpp::isNewValueJumpCandidate +static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg, + bool secondRegNewified) { + switch (MI->getOpcode()) { + case Hexagon::CMPEQrr: + return Hexagon::JMP_EQrrPt_nv_V4; + + case Hexagon::CMPEQri: { + if (reg >= 0) + return Hexagon::JMP_EQriPt_nv_V4; + else + return Hexagon::JMP_EQriPtneg_nv_V4; + } + + case Hexagon::CMPLTrr: + case Hexagon::CMPGTrr: { + if (secondRegNewified) + return Hexagon::JMP_GTrrdnPt_nv_V4; + else + return Hexagon::JMP_GTrrPt_nv_V4; + } + + case Hexagon::CMPGEri: { + if (reg >= 1) + return Hexagon::JMP_GTriPt_nv_V4; + else + return Hexagon::JMP_GTriPtneg_nv_V4; + } + + case Hexagon::CMPGTri: { + if (reg >= 0) + return Hexagon::JMP_GTriPt_nv_V4; + else + return Hexagon::JMP_GTriPtneg_nv_V4; + } + + case Hexagon::CMPLTUrr: + case Hexagon::CMPGTUrr: { + if (secondRegNewified) + return Hexagon::JMP_GTUrrdnPt_nv_V4; + else + return Hexagon::JMP_GTUrrPt_nv_V4; + } + + case Hexagon::CMPGTUri: + return Hexagon::JMP_GTUriPt_nv_V4; + + case Hexagon::CMPGEUri: { + if (reg == 0) + return Hexagon::JMP_EQrrPt_nv_V4; + else + return Hexagon::JMP_GTUriPt_nv_V4; + } + + default: + llvm_unreachable("Could not find matching New Value Jump instruction."); + } + // return *some value* to avoid compiler warning + return 0; +} + +bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" + << "********** Function: " + << MF.getFunction()->getName() << "\n"); + +#if 0 + // for now disable this, if we move NewValueJump before register + // allocation we need this information. + LiveVariables &LVs = getAnalysis(); +#endif + + QII = static_cast(MF.getTarget().getInstrInfo()); + QRI = + static_cast(MF.getTarget().getRegisterInfo()); + + if (!QRI->Subtarget.hasV4TOps() || + DisableNewValueJumps) { + return false; + } + + int nvjCount = DbgNVJCount; + int nvjGenerated = 0; + + // Loop through all the bb's of the function + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + + DEBUG(dbgs() << "** dumping bb ** " + << MBB->getNumber() << "\n"); + DEBUG(MBB->dump()); + DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); + bool foundJump = false; + bool foundCompare = false; + bool invertPredicate = false; + unsigned predReg = 0; // predicate reg of the jump. + unsigned cmpReg1 = 0; + int cmpOp2 = 0; + bool MO1IsKill = false; + bool MO2IsKill = false; + MachineBasicBlock::iterator jmpPos; + MachineBasicBlock::iterator cmpPos; + MachineInstr *cmpInstr = NULL, *jmpInstr = NULL; + MachineBasicBlock *jmpTarget = NULL; + bool afterRA = false; + bool isSecondOpReg = false; + bool isSecondOpNewified = false; + // Traverse the basic block - bottom up + for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); + MII != E;) { + MachineInstr *MI = --MII; + if (MI->isDebugValue()) { + continue; + } + + if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) + break; + + DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n"); + + if (!foundJump && + (MI->getOpcode() == Hexagon::JMP_c || + MI->getOpcode() == Hexagon::JMP_cNot || + MI->getOpcode() == Hexagon::JMP_cdnPt || + MI->getOpcode() == Hexagon::JMP_cdnPnt || + MI->getOpcode() == Hexagon::JMP_cdnNotPt || + MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) { + // This is where you would insert your compare and + // instr that feeds compare + jmpPos = MII; + jmpInstr = MI; + predReg = MI->getOperand(0).getReg(); + afterRA = TargetRegisterInfo::isPhysicalRegister(predReg); + + // If ifconverter had not messed up with the kill flags of the + // operands, the following check on the kill flag would suffice. + // if(!jmpInstr->getOperand(0).isKill()) break; + + // This predicate register is live out out of BB + // this would only work if we can actually use Live + // variable analysis on phy regs - but LLVM does not + // provide LV analysis on phys regs. + //if(LVs.isLiveOut(predReg, *MBB)) break; + + // Get all the successors of this block - which will always + // be 2. Check if the predicate register is live in in those + // successor. If yes, we can not delete the predicate - + // I am doing this only because LLVM does not provide LiveOut + // at the BB level. + bool predLive = false; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SIE = MBB->succ_end(); SI != SIE; ++SI) { + MachineBasicBlock* succMBB = *SI; + if (succMBB->isLiveIn(predReg)) { + predLive = true; + } + } + if (predLive) + break; + + jmpTarget = MI->getOperand(1).getMBB(); + foundJump = true; + if (MI->getOpcode() == Hexagon::JMP_cNot || + MI->getOpcode() == Hexagon::JMP_cdnNotPt || + MI->getOpcode() == Hexagon::JMP_cdnNotPnt) { + invertPredicate = true; + } + continue; + } + + // No new value jump if there is a barrier. A barrier has to be in its + // own packet. A barrier has zero operands. We conservatively bail out + // here if we see any instruction with zero operands. + if (foundJump && MI->getNumOperands() == 0) + break; + + if (foundJump && + !foundCompare && + MI->getOperand(0).isReg() && + MI->getOperand(0).getReg() == predReg) { + + // Not all compares can be new value compare. Arch Spec: 7.6.1.1 + if (QII->isNewValueJumpCandidate(MI)) { + + assert((MI->getDesc().isCompare()) && + "Only compare instruction can be collapsed into New Value Jump"); + isSecondOpReg = MI->getOperand(2).isReg(); + + if (!canCompareBeNewValueJump(QII, QRI, MII, predReg, isSecondOpReg, + afterRA, jmpPos, MF)) + break; + + cmpInstr = MI; + cmpPos = MII; + foundCompare = true; + + // We need cmpReg1 and cmpOp2(imm or reg) while building + // new value jump instruction. + cmpReg1 = MI->getOperand(1).getReg(); + if (MI->getOperand(1).isKill()) + MO1IsKill = true; + + if (isSecondOpReg) { + cmpOp2 = MI->getOperand(2).getReg(); + if (MI->getOperand(2).isKill()) + MO2IsKill = true; + } else + cmpOp2 = MI->getOperand(2).getImm(); + continue; + } + } + + if (foundCompare && foundJump) { + + // If "common" checks fail, bail out on this BB. + if (!commonChecksToProhibitNewValueJump(afterRA, MII)) + break; + + bool foundFeeder = false; + MachineBasicBlock::iterator feederPos = MII; + if (MI->getOperand(0).isReg() && + MI->getOperand(0).isDef() && + (MI->getOperand(0).getReg() == cmpReg1 || + (isSecondOpReg && + MI->getOperand(0).getReg() == (unsigned) cmpOp2))) { + + unsigned feederReg = MI->getOperand(0).getReg(); + + // First try to see if we can get the feeder from the first operand + // of the compare. If we can not, and if secondOpReg is true + // (second operand of the compare is also register), try that one. + // TODO: Try to come up with some heuristic to figure out which + // feeder would benefit. + + if (feederReg == cmpReg1) { + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) { + if (!isSecondOpReg) + break; + else + continue; + } else + foundFeeder = true; + } + + if (!foundFeeder && + isSecondOpReg && + feederReg == (unsigned) cmpOp2) + if (!canBeFeederToNewValueJump(QII, QRI, MII, jmpPos, cmpPos, MF)) + break; + + if (isSecondOpReg) { + // In case of CMPLT, or CMPLTU, or EQ with the second register + // to newify, swap the operands. + if (cmpInstr->getOpcode() == Hexagon::CMPLTrr || + cmpInstr->getOpcode() == Hexagon::CMPLTUrr || + (cmpInstr->getOpcode() == Hexagon::CMPEQrr && + feederReg == (unsigned) cmpOp2)) { + unsigned tmp = cmpReg1; + bool tmpIsKill = MO1IsKill; + cmpReg1 = cmpOp2; + MO1IsKill = MO2IsKill; + cmpOp2 = tmp; + MO2IsKill = tmpIsKill; + } + + // Now we have swapped the operands, all we need to check is, + // if the second operand (after swap) is the feeder. + // And if it is, make a note. + if (feederReg == (unsigned)cmpOp2) + isSecondOpNewified = true; + } + + // Now that we are moving feeder close the jump, + // make sure we are respecting the kill values of + // the operands of the feeder. + + bool updatedIsKill = false; + for (unsigned i = 0; i < MI->getNumOperands(); i++) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned feederReg = MO.getReg(); + for (MachineBasicBlock::iterator localII = feederPos, + end = jmpPos; localII != end; localII++) { + MachineInstr *localMI = localII; + for (unsigned j = 0; j < localMI->getNumOperands(); j++) { + MachineOperand &localMO = localMI->getOperand(j); + if (localMO.isReg() && localMO.isUse() && + localMO.isKill() && feederReg == localMO.getReg()) { + // We found that there is kill of a use register + // Set up a kill flag on the register + localMO.setIsKill(false); + MO.setIsKill(); + updatedIsKill = true; + break; + } + } + if (updatedIsKill) break; + } + } + if (updatedIsKill) break; + } + + MBB->splice(jmpPos, MI->getParent(), MI); + MBB->splice(jmpPos, MI->getParent(), cmpInstr); + DebugLoc dl = MI->getDebugLoc(); + MachineInstr *NewMI; + + assert((QII->isNewValueJumpCandidate(cmpInstr)) && + "This compare is not a New Value Jump candidate."); + unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2, + isSecondOpNewified); + if (invertPredicate) + opc = QII->getInvertedPredicatedOpcode(opc); + + // Manage the conversions from CMPGEUri to either CMPEQrr + // or CMPGTUri properly. See Arch spec for CMPGEUri instructions. + // This has to be after the getNewValueJumpOpcode function call as + // second operand of the compare could be modified in this logic. + if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) { + if (cmpOp2 == 0) { + cmpOp2 = cmpReg1; + MO2IsKill = MO1IsKill; + isSecondOpReg = true; + } else + --cmpOp2; + } + + // Manage the conversions from CMPGEri to CMPGTUri properly. + // See Arch spec for CMPGEri instructions. + if (cmpInstr->getOpcode() == Hexagon::CMPGEri) + --cmpOp2; + + if (isSecondOpReg) { + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addReg(cmpOp2, getKillRegState(MO2IsKill)) + .addMBB(jmpTarget); + } + else { + NewMI = BuildMI(*MBB, jmpPos, dl, + QII->get(opc)) + .addReg(cmpReg1, getKillRegState(MO1IsKill)) + .addImm(cmpOp2) + .addMBB(jmpTarget); + } + + assert(NewMI && "New Value Jump Instruction Not created!"); + if (cmpInstr->getOperand(0).isReg() && + cmpInstr->getOperand(0).isKill()) + cmpInstr->getOperand(0).setIsKill(false); + if (cmpInstr->getOperand(1).isReg() && + cmpInstr->getOperand(1).isKill()) + cmpInstr->getOperand(1).setIsKill(false); + cmpInstr->eraseFromParent(); + jmpInstr->eraseFromParent(); + ++nvjGenerated; + ++NumNVJGenerated; + break; + } + } + } + } + + return true; + +} + +FunctionPass *llvm::createHexagonNewValueJump() { + return new HexagonNewValueJump(); +} diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2a9de92..2c23674 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -63,6 +63,7 @@ const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction return CalleeSavedRegsV2; case HexagonSubtarget::V3: case HexagonSubtarget::V4: + case HexagonSubtarget::V5: return CalleeSavedRegsV3; } llvm_unreachable("Callee saved registers requested for unknown architecture " @@ -109,6 +110,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { return CalleeSavedRegClassesV2; case HexagonSubtarget::V3: case HexagonSubtarget::V4: + case HexagonSubtarget::V5: return CalleeSavedRegClassesV3; } llvm_unreachable("Callee saved register classes requested for unknown " @@ -179,13 +181,15 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // r0 = add(r30, #10000) // r0 = memw(r0) if ( (MI.getOpcode() == Hexagon::LDriw) || - (MI.getOpcode() == Hexagon::LDrid) || - (MI.getOpcode() == Hexagon::LDrih) || - (MI.getOpcode() == Hexagon::LDriuh) || - (MI.getOpcode() == Hexagon::LDrib) || - (MI.getOpcode() == Hexagon::LDriub) ) { + (MI.getOpcode() == Hexagon::LDrid) || + (MI.getOpcode() == Hexagon::LDrih) || + (MI.getOpcode() == Hexagon::LDriuh) || + (MI.getOpcode() == Hexagon::LDrib) || + (MI.getOpcode() == Hexagon::LDriub) || + (MI.getOpcode() == Hexagon::LDriw_f) || + (MI.getOpcode() == Hexagon::LDrid_f)) { unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ? - *getSubRegisters(MI.getOperand(0).getReg()) : + getSubReg(MI.getOperand(0).getReg(), Hexagon::subreg_loreg) : MI.getOperand(0).getReg(); // Check if offset can fit in addi. @@ -203,10 +207,13 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); MI.getOperand(i+1).ChangeToImmediate(0); - } else if ((MI.getOpcode() == Hexagon::STriw) || + } else if ((MI.getOpcode() == Hexagon::STriw_indexed) || + (MI.getOpcode() == Hexagon::STriw) || (MI.getOpcode() == Hexagon::STrid) || (MI.getOpcode() == Hexagon::STrih) || - (MI.getOpcode() == Hexagon::STrib)) { + (MI.getOpcode() == Hexagon::STrib) || + (MI.getOpcode() == Hexagon::STrid_f) || + (MI.getOpcode() == Hexagon::STriw_f)) { // For stores, we need a reserved register. Change // memw(r30 + #10000) = r0 to: // diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 6cf727b..85355ae 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -73,6 +73,10 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { return true; } + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } + // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index d44eae3..fe41fc3 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -131,6 +131,9 @@ let Namespace = "Hexagon" in { def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>; def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>; + def M0 : Rc<6, "m0">, DwarfRegNum<[71]>; + def M1 : Rc<7, "m1">, DwarfRegNum<[72]>; + def PC : Rc<9, "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct? def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct? } @@ -140,19 +143,15 @@ let Namespace = "Hexagon" in { // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32], 32, +def IntRegs : RegisterClass<"Hexagon", [i32,f32], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)> { } - - -def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, +def DoubleRegs : RegisterClass<"Hexagon", [i64,f64], 64, (add (sequence "D%u", 0, 4), - (sequence "D%u", 6, 13), D5, D14, D15)> { - let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)]; -} + (sequence "D%u", 6, 13), D5, D14, D15)>; def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> @@ -162,6 +161,7 @@ def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1), - (sequence "SA%u", 0, 1), PC, GP)> { + (sequence "SA%u", 0, 1), + (sequence "M%u", 0, 1), PC, GP)> { let Size = 32; } diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index 66a00e1..2468f0b 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -1,4 +1,4 @@ -//===- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends =// +//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends // // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index fbea445..d1076b8 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -13,7 +13,6 @@ def LSUNIT : FuncUnit; def MUNIT : FuncUnit; def SUNIT : FuncUnit; - // Itinerary classes def ALU32 : InstrItinClass; def ALU64 : InstrItinClass; @@ -24,23 +23,31 @@ def LD : InstrItinClass; def M : InstrItinClass; def ST : InstrItinClass; def S : InstrItinClass; +def SYS : InstrItinClass; +def MARKER : InstrItinClass; def PSEUDO : InstrItinClass; - def HexagonItineraries : - ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> -]>; - + ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> + ]>; + +def HexagonModel : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItineraries; +} //===----------------------------------------------------------------------===// // V4 Machine Info + diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 4cf66fe..9b41126 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -23,7 +23,6 @@ // | SLOT3 | XTYPE ALU32 J CR | // |===========|==================================================| - // Functional Units. def SLOT0 : FuncUnit; def SLOT1 : FuncUnit; @@ -34,22 +33,32 @@ def SLOT3 : FuncUnit; def NV_V4 : InstrItinClass; def MEM_V4 : InstrItinClass; // ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. +def PREFIX : InstrItinClass; + +def HexagonItinerariesV4 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [ + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]> + ]>; -def HexagonItinerariesV4 : ProcessorItineraries< - [SLOT0, SLOT1, SLOT2, SLOT3], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> -]>; +def HexagonModelV4 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV4; +} //===----------------------------------------------------------------------===// // Hexagon V4 Resource Definitions - diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp index d10c9f2..a81cd91 100644 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -14,7 +14,7 @@ // {p0 = cmp.eq(r0,r1)} // {r3 = mux(p0,#1,#3)} // -// This requires two packets. If we use .new predicated immediate transfers, +// This requires two packets. If we use .new predicated immediate transfers, // then we can do this in a single packet, e.g.: // // {p0 = cmp.eq(r0,r1) @@ -81,40 +81,126 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); ++MII) { MachineInstr *MI = MII; - int Opc = MI->getOpcode(); - if (Opc == Hexagon::TFR_condset_rr) { - - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(2).getReg(); - int SrcReg2 = MI->getOperand(3).getReg(); - - // Minor optimization: do not emit the predicated copy if the source and - // the destination is the same register - if (DestReg != SrcReg1) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + int Opc1, Opc2; + switch(MI->getOpcode()) { + case Hexagon::TFR_condset_rr: + case Hexagon::TFR_condset_rr_f: + case Hexagon::TFR_condset_rr64_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_rr || + MI->getOpcode() == Hexagon::TFR_condset_rr_f) { + Opc1 = Hexagon::TFR_cPt; + Opc2 = Hexagon::TFR_cNotPt; + } + else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) { + Opc1 = Hexagon::TFR64_cPt; + Opc2 = Hexagon::TFR64_cNotPt; + } + + // Minor optimization: do not emit the predicated copy if the source + // and the destination is the same register. + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + break; } - if (DestReg != SrcReg2) { - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt), - DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + case Hexagon::TFR_condset_ri: + case Hexagon::TFR_condset_ri_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + + // Do not emit the predicated copy if the source and the destination + // is the same register. + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFR_cPt), DestReg). + addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (MI->getOpcode() == Hexagon::TFR_condset_ri ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(3).getImm()); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt_f), DestReg). + addReg(MI->getOperand(1).getReg()). + addFPImm(MI->getOperand(3).getFPImm()); + } + + MII = MBB->erase(MI); + --MII; + break; + } + case Hexagon::TFR_condset_ir: + case Hexagon::TFR_condset_ir_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_ir ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt), DestReg). + addReg(MI->getOperand(1).getReg()). + addImm(MI->getOperand(2).getImm()); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt_f), DestReg). + addReg(MI->getOperand(1).getReg()). + addFPImm(MI->getOperand(2).getFPImm()); + } + + // Do not emit the predicated copy if the source and + // the destination is the same register. + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFR_cNotPt), DestReg). + addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + break; + } + case Hexagon::TFR_condset_ii: + case Hexagon::TFR_condset_ii_f: { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(1).getReg(); + + if (MI->getOpcode() == Hexagon::TFR_condset_ii ) { + int Immed1 = MI->getOperand(2).getImm(); + int Immed2 = MI->getOperand(3).getImm(); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt), + DestReg).addReg(SrcReg1).addImm(Immed1); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt), + DestReg).addReg(SrcReg1).addImm(Immed2); + } else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cPt_f), DestReg). + addReg(SrcReg1). + addFPImm(MI->getOperand(2).getFPImm()); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::TFRI_cNotPt_f), DestReg). + addReg(SrcReg1). + addFPImm(MI->getOperand(3).getFPImm()); + } + MII = MBB->erase(MI); + --MII; + break; } - MII = MBB->erase(MI); - --MII; - } else if (Opc == Hexagon::TFR_condset_ii) { - int DestReg = MI->getOperand(0).getReg(); - int SrcReg1 = MI->getOperand(1).getReg(); - int Immed1 = MI->getOperand(2).getImm(); - int Immed2 = MI->getOperand(3).getImm(); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt), - DestReg).addReg(SrcReg1).addImm(Immed1); - BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt), - DestReg).addReg(SrcReg1).addImm(Immed2); - MII = MBB->erase(MI); - --MII; } } } - return true; } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 654d336..5d087db 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -13,6 +13,7 @@ #include "HexagonSubtarget.h" #include "Hexagon.h" +#include "HexagonRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -29,11 +30,17 @@ static cl::opt EnableMemOps( "enable-hexagon-memops", cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, - cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); + cl::desc("Generate V4 memop instructions.")); + +static cl::opt +EnableIEEERndNear( + "enable-hexagon-ieee-rnd-near", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Generate non-chopped conversion from fp to int.")); HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): HexagonGenSubtargetInfo(TT, CPU, FS), - HexagonArchVersion(V1), + HexagonArchVersion(V2), CPUString(CPU.str()) { ParseSubtargetFeatures(CPU, FS); @@ -45,18 +52,27 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): break; case HexagonSubtarget::V4: break; + case HexagonSubtarget::V5: + break; default: - llvm_unreachable("Unknown Architecture Version."); + // If the programmer has not specified a Hexagon version, default + // to -mv4. + CPUString = "hexagonv4"; + HexagonArchVersion = HexagonSubtarget::V4; + break; } // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - // Max issue per cycle == bundle width. - InstrItins.IssueWidth = 4; - if (EnableMemOps) UseMemOps = true; else UseMemOps = false; + + if (EnableIEEERndNear) + ModeIEEERndNear = true; + else + ModeIEEERndNear = false; } + diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 3079086..5d9d6d8 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -22,16 +22,18 @@ #include "HexagonGenSubtargetInfo.inc" #define Hexagon_SMALL_DATA_THRESHOLD 8 +#define Hexagon_SLOTS 4 namespace llvm { class HexagonSubtarget : public HexagonGenSubtargetInfo { bool UseMemOps; + bool ModeIEEERndNear; public: enum HexagonArchEnum { - V1, V2, V3, V4 + V1, V2, V3, V4, V5 }; HexagonArchEnum HexagonArchVersion; @@ -55,7 +57,11 @@ public: bool hasV3TOps () const { return HexagonArchVersion >= V3; } bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } bool hasV4TOps () const { return HexagonArchVersion >= V4; } + bool hasV4TOpsOnly () const { return HexagonArchVersion == V4; } bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } + bool hasV5TOps () const { return HexagonArchVersion >= V5; } + bool hasV5TOpsOnly () const { return HexagonArchVersion == V5; } + bool modeIEEERndNear () const { return ModeIEEERndNear; } bool isSubtargetV2() const { return HexagonArchVersion == V2;} const std::string &getCPUString () const { return CPUString; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 55bbba7..a7b291f 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -55,7 +55,9 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") , + DataLayout("e-p:32:32:32-" + "i64:64:64-i32:32:32-i16:16:16-i1:32:32-" + "f64:64:64-f32:32:32-a0:0-n32") , Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget), @@ -100,43 +102,47 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - PM->add(createHexagonRemoveExtendOps(getHexagonTargetMachine())); - PM->add(createHexagonISelDag(getHexagonTargetMachine())); - PM->add(createHexagonPeephole()); + addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine())); + addPass(createHexagonPeephole()); return false; } bool HexagonPassConfig::addPreRegAlloc() { if (!DisableHardwareLoops) { - PM->add(createHexagonHardwareLoops()); + addPass(createHexagonHardwareLoops()); } - return false; } bool HexagonPassConfig::addPostRegAlloc() { - PM->add(createHexagonCFGOptimizer(getHexagonTargetMachine())); + addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } bool HexagonPassConfig::addPreSched2() { - addPass(IfConverterID); + addPass(&IfConverterID); return true; } bool HexagonPassConfig::addPreEmitPass() { if (!DisableHardwareLoops) { - PM->add(createHexagonFixupHwLoops()); + addPass(createHexagonFixupHwLoops()); } + addPass(createHexagonNewValueJump()); + // Expand Spill code for predicate registers. - PM->add(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); + addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); // Split up TFRcondsets into conditional transfers. - PM->add(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); + addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); + + // Create Packets. + addPass(createHexagonPacketizer()); return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp new file mode 100644 index 0000000..a03ed03 --- /dev/null +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -0,0 +1,3646 @@ +//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a simple VLIW packetizer using DFA. The packetizer works on +// machine basic blocks. For each instruction I in BB, the packetizer consults +// the DFA to see if machine resources are available to execute I. If so, the +// packetizer checks if I depends on any instruction J in the current packet. +// If no dependency is found, I is added to current packet and machine resource +// is marked as taken. If any dependency is found, a target API call is made to +// prune the dependence. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "packets" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" + +#include + +using namespace llvm; + +namespace { + class HexagonPacketizer : public MachineFunctionPass { + + public: + static char ID; + HexagonPacketizer() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { + return "Hexagon Packetizer"; + } + + bool runOnMachineFunction(MachineFunction &Fn); + }; + char HexagonPacketizer::ID = 0; + + class HexagonPacketizerList : public VLIWPacketizerList { + + private: + + // Has the instruction been promoted to a dot-new instruction. + bool PromotedToDotNew; + + // Has the instruction been glued to allocframe. + bool GlueAllocframeStore; + + // Has the feeder instruction been glued to new value jump. + bool GlueToNewValueJump; + + // Check if there is a dependence between some instruction already in this + // packet and this instruction. + bool Dependence; + + // Only check for dependence if there are resources available to + // schedule this instruction. + bool FoundSequentialDependence; + + public: + // Ctor. + HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, + MachineDominatorTree &MDT); + + // initPacketizerState - initialize some internal flags. + void initPacketizerState(); + + // ignorePseudoInstruction - Ignore bundling of pseudo instructions. + bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB); + + // isSoloInstruction - return true if instruction MI can not be packetized + // with any other instruction, which means that MI itself is a packet. + bool isSoloInstruction(MachineInstr *MI); + + // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ + // together. + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ); + + // isLegalToPruneDependencies - Is it legal to prune dependece between SUI + // and SUJ. + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ); + + MachineBasicBlock::iterator addToPacket(MachineInstr *MI); + private: + bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); + bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU, + unsigned DepReg, + std::map MIToSUnit, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC); + bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU, + unsigned DepReg, + std::map MIToSUnit, + MachineBasicBlock::iterator &MII); + bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI, + unsigned DepReg, + std::map MIToSUnit); + bool DemoteToDotOld(MachineInstr* MI); + bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2, + std::map MIToSUnit); + bool RestrictingDepExistInPacket(MachineInstr*, + unsigned, std::map ); + bool isNewifiable(MachineInstr* MI); + bool isCondInst(MachineInstr* MI); + bool IsNewifyStore (MachineInstr* MI); + bool tryAllocateResourcesForConstExt(MachineInstr* MI); + bool canReserveResourcesForConstExt(MachineInstr *MI); + void reserveResourcesForConstExt(MachineInstr* MI); + bool isNewValueInst(MachineInstr* MI); + bool isDotNewInst(MachineInstr* MI); + }; +} + +// HexagonPacketizerList Ctor. +HexagonPacketizerList::HexagonPacketizerList( + MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT) + : VLIWPacketizerList(MF, MLI, MDT, true){ +} + +bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis(); + MachineDominatorTree &MDT = getAnalysis(); + + // Instantiate the packetizer. + HexagonPacketizerList Packetizer(Fn, MLI, MDT); + + // DFA state table should not be empty. + assert(Packetizer.getResourceTracker() && "Empty DFA table!"); + + // + // Loop over all basic blocks and remove KILL pseudo-instructions + // These instructions confuse the dependence analysis. Consider: + // D0 = ... (Insn 0) + // R0 = KILL R0, D0 (Insn 1) + // R0 = ... (Insn 2) + // Here, Insn 1 will result in the dependence graph not emitting an output + // dependence between Insn 0 and Insn 2. This can lead to incorrect + // packetization + // + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + MachineBasicBlock::iterator End = MBB->end(); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != End) { + if (MI->isKill()) { + MachineBasicBlock::iterator DeleteMI = MI; + ++MI; + MBB->erase(DeleteMI); + End = MBB->end(); + continue; + } + ++MI; + } + } + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); + MBB != MBBe; ++MBB) { + // Find scheduling regions and schedule / packetize each region. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin();) { + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn)) + break; + } + I = MBB->begin(); + + // Skip empty scheduling regions. + if (I == RegionEnd) { + RegionEnd = llvm::prior(RegionEnd); + --RemainingCount; + continue; + } + // Skip regions with one instruction. + if (I == llvm::prior(RegionEnd)) { + RegionEnd = llvm::prior(RegionEnd); + continue; + } + + Packetizer.PacketizeMIs(MBB, I, RegionEnd); + RegionEnd = I; + } + } + + return true; +} + + +static bool IsIndirectCall(MachineInstr* MI) { + return ((MI->getOpcode() == Hexagon::CALLR) || + (MI->getOpcode() == Hexagon::CALLRv3)); +} + +// Reserve resources for constant extender. Trigure an assertion if +// reservation fail. +void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr( + QII->get(Hexagon::IMMEXT), MI->getDebugLoc()); + + if (ResourceTracker->canReserveResources(PseudoMI)) { + ResourceTracker->reserveResources(PseudoMI); + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + } else { + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + llvm_unreachable("can not reserve resources for constant extender."); + } + return; +} + +bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + assert(QII->isExtended(MI) && + "Should only be called for constant extended instructions"); + MachineFunction *MF = MI->getParent()->getParent(); + MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT), + MI->getDebugLoc()); + bool CanReserve = ResourceTracker->canReserveResources(PseudoMI); + MF->DeleteMachineInstr(PseudoMI); + return CanReserve; +} + +// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return +// true, otherwise, return false. +bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr( + QII->get(Hexagon::IMMEXT), MI->getDebugLoc()); + + if (ResourceTracker->canReserveResources(PseudoMI)) { + ResourceTracker->reserveResources(PseudoMI); + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + return true; + } else { + MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI); + return false; + } +} + + +bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI, + SDep::Kind DepType, + unsigned DepReg) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + + // Check for lr dependence + if (DepReg == QRI->getRARegister()) { + return true; + } + + if (QII->isDeallocRet(MI)) { + if (DepReg == QRI->getFrameRegister() || + DepReg == QRI->getStackRegister()) + return true; + } + + // Check if this is a predicate dependence + const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg); + if (RC == &Hexagon::PredRegsRegClass) { + return true; + } + + // + // Lastly check for an operand used in an indirect call + // If we had an attribute for checking if an instruction is an indirect call, + // then we could have avoided this relatively brittle implementation of + // IsIndirectCall() + // + // Assumes that the first operand of the CALLr is the function address + // + if (IsIndirectCall(MI) && (DepType == SDep::Data)) { + MachineOperand MO = MI->getOperand(0); + if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) { + return true; + } + } + + return false; +} + +static bool IsRegDependence(const SDep::Kind DepType) { + return (DepType == SDep::Data || DepType == SDep::Anti || + DepType == SDep::Output); +} + +static bool IsDirectJump(MachineInstr* MI) { + return (MI->getOpcode() == Hexagon::JMP); +} + +static bool IsSchedBarrier(MachineInstr* MI) { + switch (MI->getOpcode()) { + case Hexagon::BARRIER: + return true; + } + return false; +} + +static bool IsControlFlow(MachineInstr* MI) { + return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); +} + +bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + if (QII->isNewValueJump(MI)) + return true; + + if (QII->isNewValueStore(MI)) + return true; + + return false; +} + +// Function returns true if an instruction can be promoted to the new-value +// store. It will always return false for v2 and v3. +// It lists all the conditional and unconditional stores that can be promoted +// to the new-value stores. + +bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) { + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + switch (MI->getOpcode()) + { + // store byte + case Hexagon::STrib: + case Hexagon::STrib_indexed: + case Hexagon::STrib_indexed_shl_V4: + case Hexagon::STrib_shl_V4: + case Hexagon::STrib_GP_V4: + case Hexagon::STb_GP_V4: + case Hexagon::POST_STbri: + case Hexagon::STrib_cPt: + case Hexagon::STrib_cdnPt_V4: + case Hexagon::STrib_cNotPt: + case Hexagon::STrib_cdnNotPt_V4: + case Hexagon::STrib_indexed_cPt: + case Hexagon::STrib_indexed_cdnPt_V4: + case Hexagon::STrib_indexed_cNotPt: + case Hexagon::STrib_indexed_cdnNotPt_V4: + case Hexagon::STrib_indexed_shl_cPt_V4: + case Hexagon::STrib_indexed_shl_cdnPt_V4: + case Hexagon::STrib_indexed_shl_cNotPt_V4: + case Hexagon::STrib_indexed_shl_cdnNotPt_V4: + case Hexagon::POST_STbri_cPt: + case Hexagon::POST_STbri_cdnPt_V4: + case Hexagon::POST_STbri_cNotPt: + case Hexagon::POST_STbri_cdnNotPt_V4: + case Hexagon::STb_GP_cPt_V4: + case Hexagon::STb_GP_cNotPt_V4: + case Hexagon::STb_GP_cdnPt_V4: + case Hexagon::STb_GP_cdnNotPt_V4: + case Hexagon::STrib_GP_cPt_V4: + case Hexagon::STrib_GP_cNotPt_V4: + case Hexagon::STrib_GP_cdnPt_V4: + case Hexagon::STrib_GP_cdnNotPt_V4: + + // store halfword + case Hexagon::STrih: + case Hexagon::STrih_indexed: + case Hexagon::STrih_indexed_shl_V4: + case Hexagon::STrih_shl_V4: + case Hexagon::STrih_GP_V4: + case Hexagon::STh_GP_V4: + case Hexagon::POST_SThri: + case Hexagon::STrih_cPt: + case Hexagon::STrih_cdnPt_V4: + case Hexagon::STrih_cNotPt: + case Hexagon::STrih_cdnNotPt_V4: + case Hexagon::STrih_indexed_cPt: + case Hexagon::STrih_indexed_cdnPt_V4: + case Hexagon::STrih_indexed_cNotPt: + case Hexagon::STrih_indexed_cdnNotPt_V4: + case Hexagon::STrih_indexed_shl_cPt_V4: + case Hexagon::STrih_indexed_shl_cdnPt_V4: + case Hexagon::STrih_indexed_shl_cNotPt_V4: + case Hexagon::STrih_indexed_shl_cdnNotPt_V4: + case Hexagon::POST_SThri_cPt: + case Hexagon::POST_SThri_cdnPt_V4: + case Hexagon::POST_SThri_cNotPt: + case Hexagon::POST_SThri_cdnNotPt_V4: + case Hexagon::STh_GP_cPt_V4: + case Hexagon::STh_GP_cNotPt_V4: + case Hexagon::STh_GP_cdnPt_V4: + case Hexagon::STh_GP_cdnNotPt_V4: + case Hexagon::STrih_GP_cPt_V4: + case Hexagon::STrih_GP_cNotPt_V4: + case Hexagon::STrih_GP_cdnPt_V4: + case Hexagon::STrih_GP_cdnNotPt_V4: + + // store word + case Hexagon::STriw: + case Hexagon::STriw_indexed: + case Hexagon::STriw_indexed_shl_V4: + case Hexagon::STriw_shl_V4: + case Hexagon::STriw_GP_V4: + case Hexagon::STw_GP_V4: + case Hexagon::POST_STwri: + case Hexagon::STriw_cPt: + case Hexagon::STriw_cdnPt_V4: + case Hexagon::STriw_cNotPt: + case Hexagon::STriw_cdnNotPt_V4: + case Hexagon::STriw_indexed_cPt: + case Hexagon::STriw_indexed_cdnPt_V4: + case Hexagon::STriw_indexed_cNotPt: + case Hexagon::STriw_indexed_cdnNotPt_V4: + case Hexagon::STriw_indexed_shl_cPt_V4: + case Hexagon::STriw_indexed_shl_cdnPt_V4: + case Hexagon::STriw_indexed_shl_cNotPt_V4: + case Hexagon::STriw_indexed_shl_cdnNotPt_V4: + case Hexagon::POST_STwri_cPt: + case Hexagon::POST_STwri_cdnPt_V4: + case Hexagon::POST_STwri_cNotPt: + case Hexagon::POST_STwri_cdnNotPt_V4: + case Hexagon::STw_GP_cPt_V4: + case Hexagon::STw_GP_cNotPt_V4: + case Hexagon::STw_GP_cdnPt_V4: + case Hexagon::STw_GP_cdnNotPt_V4: + case Hexagon::STriw_GP_cPt_V4: + case Hexagon::STriw_GP_cNotPt_V4: + case Hexagon::STriw_GP_cdnPt_V4: + case Hexagon::STriw_GP_cdnNotPt_V4: + return QRI->Subtarget.hasV4TOps(); + } + return false; +} + +static bool IsLoopN(MachineInstr *MI) { + return (MI->getOpcode() == Hexagon::LOOP0_i || + MI->getOpcode() == Hexagon::LOOP0_r); +} + +/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a +/// callee-saved register. +static bool DoesModifyCalleeSavedReg(MachineInstr *MI, + const TargetRegisterInfo *TRI) { + for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + unsigned CalleeSavedReg = *CSR; + if (MI->modifiesRegister(CalleeSavedReg, TRI)) + return true; + } + return false; +} + +// Return the new value instruction for a given store. +static int GetDotNewOp(const int opc) { + switch (opc) { + default: llvm_unreachable("Unknown .new type"); + // store new value byte + case Hexagon::STrib: + return Hexagon::STrib_nv_V4; + + case Hexagon::STrib_indexed: + return Hexagon::STrib_indexed_nv_V4; + + case Hexagon::STrib_indexed_shl_V4: + return Hexagon::STrib_indexed_shl_nv_V4; + + case Hexagon::STrib_shl_V4: + return Hexagon::STrib_shl_nv_V4; + + case Hexagon::STrib_GP_V4: + return Hexagon::STrib_GP_nv_V4; + + case Hexagon::STb_GP_V4: + return Hexagon::STb_GP_nv_V4; + + case Hexagon::POST_STbri: + return Hexagon::POST_STbri_nv_V4; + + case Hexagon::STrib_cPt: + return Hexagon::STrib_cPt_nv_V4; + + case Hexagon::STrib_cdnPt_V4: + return Hexagon::STrib_cdnPt_nv_V4; + + case Hexagon::STrib_cNotPt: + return Hexagon::STrib_cNotPt_nv_V4; + + case Hexagon::STrib_cdnNotPt_V4: + return Hexagon::STrib_cdnNotPt_nv_V4; + + case Hexagon::STrib_indexed_cPt: + return Hexagon::STrib_indexed_cPt_nv_V4; + + case Hexagon::STrib_indexed_cdnPt_V4: + return Hexagon::STrib_indexed_cdnPt_nv_V4; + + case Hexagon::STrib_indexed_cNotPt: + return Hexagon::STrib_indexed_cNotPt_nv_V4; + + case Hexagon::STrib_indexed_cdnNotPt_V4: + return Hexagon::STrib_indexed_cdnNotPt_nv_V4; + + case Hexagon::STrib_indexed_shl_cPt_V4: + return Hexagon::STrib_indexed_shl_cPt_nv_V4; + + case Hexagon::STrib_indexed_shl_cdnPt_V4: + return Hexagon::STrib_indexed_shl_cdnPt_nv_V4; + + case Hexagon::STrib_indexed_shl_cNotPt_V4: + return Hexagon::STrib_indexed_shl_cNotPt_nv_V4; + + case Hexagon::STrib_indexed_shl_cdnNotPt_V4: + return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_STbri_cPt: + return Hexagon::POST_STbri_cPt_nv_V4; + + case Hexagon::POST_STbri_cdnPt_V4: + return Hexagon::POST_STbri_cdnPt_nv_V4; + + case Hexagon::POST_STbri_cNotPt: + return Hexagon::POST_STbri_cNotPt_nv_V4; + + case Hexagon::POST_STbri_cdnNotPt_V4: + return Hexagon::POST_STbri_cdnNotPt_nv_V4; + + case Hexagon::STb_GP_cPt_V4: + return Hexagon::STb_GP_cPt_nv_V4; + + case Hexagon::STb_GP_cNotPt_V4: + return Hexagon::STb_GP_cNotPt_nv_V4; + + case Hexagon::STb_GP_cdnPt_V4: + return Hexagon::STb_GP_cdnPt_nv_V4; + + case Hexagon::STb_GP_cdnNotPt_V4: + return Hexagon::STb_GP_cdnNotPt_nv_V4; + + case Hexagon::STrib_GP_cPt_V4: + return Hexagon::STrib_GP_cPt_nv_V4; + + case Hexagon::STrib_GP_cNotPt_V4: + return Hexagon::STrib_GP_cNotPt_nv_V4; + + case Hexagon::STrib_GP_cdnPt_V4: + return Hexagon::STrib_GP_cdnPt_nv_V4; + + case Hexagon::STrib_GP_cdnNotPt_V4: + return Hexagon::STrib_GP_cdnNotPt_nv_V4; + + // store new value halfword + case Hexagon::STrih: + return Hexagon::STrih_nv_V4; + + case Hexagon::STrih_indexed: + return Hexagon::STrih_indexed_nv_V4; + + case Hexagon::STrih_indexed_shl_V4: + return Hexagon::STrih_indexed_shl_nv_V4; + + case Hexagon::STrih_shl_V4: + return Hexagon::STrih_shl_nv_V4; + + case Hexagon::STrih_GP_V4: + return Hexagon::STrih_GP_nv_V4; + + case Hexagon::STh_GP_V4: + return Hexagon::STh_GP_nv_V4; + + case Hexagon::POST_SThri: + return Hexagon::POST_SThri_nv_V4; + + case Hexagon::STrih_cPt: + return Hexagon::STrih_cPt_nv_V4; + + case Hexagon::STrih_cdnPt_V4: + return Hexagon::STrih_cdnPt_nv_V4; + + case Hexagon::STrih_cNotPt: + return Hexagon::STrih_cNotPt_nv_V4; + + case Hexagon::STrih_cdnNotPt_V4: + return Hexagon::STrih_cdnNotPt_nv_V4; + + case Hexagon::STrih_indexed_cPt: + return Hexagon::STrih_indexed_cPt_nv_V4; + + case Hexagon::STrih_indexed_cdnPt_V4: + return Hexagon::STrih_indexed_cdnPt_nv_V4; + + case Hexagon::STrih_indexed_cNotPt: + return Hexagon::STrih_indexed_cNotPt_nv_V4; + + case Hexagon::STrih_indexed_cdnNotPt_V4: + return Hexagon::STrih_indexed_cdnNotPt_nv_V4; + + case Hexagon::STrih_indexed_shl_cPt_V4: + return Hexagon::STrih_indexed_shl_cPt_nv_V4; + + case Hexagon::STrih_indexed_shl_cdnPt_V4: + return Hexagon::STrih_indexed_shl_cdnPt_nv_V4; + + case Hexagon::STrih_indexed_shl_cNotPt_V4: + return Hexagon::STrih_indexed_shl_cNotPt_nv_V4; + + case Hexagon::STrih_indexed_shl_cdnNotPt_V4: + return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_SThri_cPt: + return Hexagon::POST_SThri_cPt_nv_V4; + + case Hexagon::POST_SThri_cdnPt_V4: + return Hexagon::POST_SThri_cdnPt_nv_V4; + + case Hexagon::POST_SThri_cNotPt: + return Hexagon::POST_SThri_cNotPt_nv_V4; + + case Hexagon::POST_SThri_cdnNotPt_V4: + return Hexagon::POST_SThri_cdnNotPt_nv_V4; + + case Hexagon::STh_GP_cPt_V4: + return Hexagon::STh_GP_cPt_nv_V4; + + case Hexagon::STh_GP_cNotPt_V4: + return Hexagon::STh_GP_cNotPt_nv_V4; + + case Hexagon::STh_GP_cdnPt_V4: + return Hexagon::STh_GP_cdnPt_nv_V4; + + case Hexagon::STh_GP_cdnNotPt_V4: + return Hexagon::STh_GP_cdnNotPt_nv_V4; + + case Hexagon::STrih_GP_cPt_V4: + return Hexagon::STrih_GP_cPt_nv_V4; + + case Hexagon::STrih_GP_cNotPt_V4: + return Hexagon::STrih_GP_cNotPt_nv_V4; + + case Hexagon::STrih_GP_cdnPt_V4: + return Hexagon::STrih_GP_cdnPt_nv_V4; + + case Hexagon::STrih_GP_cdnNotPt_V4: + return Hexagon::STrih_GP_cdnNotPt_nv_V4; + + // store new value word + case Hexagon::STriw: + return Hexagon::STriw_nv_V4; + + case Hexagon::STriw_indexed: + return Hexagon::STriw_indexed_nv_V4; + + case Hexagon::STriw_indexed_shl_V4: + return Hexagon::STriw_indexed_shl_nv_V4; + + case Hexagon::STriw_shl_V4: + return Hexagon::STriw_shl_nv_V4; + + case Hexagon::STriw_GP_V4: + return Hexagon::STriw_GP_nv_V4; + + case Hexagon::STw_GP_V4: + return Hexagon::STw_GP_nv_V4; + + case Hexagon::POST_STwri: + return Hexagon::POST_STwri_nv_V4; + + case Hexagon::STriw_cPt: + return Hexagon::STriw_cPt_nv_V4; + + case Hexagon::STriw_cdnPt_V4: + return Hexagon::STriw_cdnPt_nv_V4; + + case Hexagon::STriw_cNotPt: + return Hexagon::STriw_cNotPt_nv_V4; + + case Hexagon::STriw_cdnNotPt_V4: + return Hexagon::STriw_cdnNotPt_nv_V4; + + case Hexagon::STriw_indexed_cPt: + return Hexagon::STriw_indexed_cPt_nv_V4; + + case Hexagon::STriw_indexed_cdnPt_V4: + return Hexagon::STriw_indexed_cdnPt_nv_V4; + + case Hexagon::STriw_indexed_cNotPt: + return Hexagon::STriw_indexed_cNotPt_nv_V4; + + case Hexagon::STriw_indexed_cdnNotPt_V4: + return Hexagon::STriw_indexed_cdnNotPt_nv_V4; + + case Hexagon::STriw_indexed_shl_cPt_V4: + return Hexagon::STriw_indexed_shl_cPt_nv_V4; + + case Hexagon::STriw_indexed_shl_cdnPt_V4: + return Hexagon::STriw_indexed_shl_cdnPt_nv_V4; + + case Hexagon::STriw_indexed_shl_cNotPt_V4: + return Hexagon::STriw_indexed_shl_cNotPt_nv_V4; + + case Hexagon::STriw_indexed_shl_cdnNotPt_V4: + return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_STwri_cPt: + return Hexagon::POST_STwri_cPt_nv_V4; + + case Hexagon::POST_STwri_cdnPt_V4: + return Hexagon::POST_STwri_cdnPt_nv_V4; + + case Hexagon::POST_STwri_cNotPt: + return Hexagon::POST_STwri_cNotPt_nv_V4; + + case Hexagon::POST_STwri_cdnNotPt_V4: + return Hexagon::POST_STwri_cdnNotPt_nv_V4; + + case Hexagon::STw_GP_cPt_V4: + return Hexagon::STw_GP_cPt_nv_V4; + + case Hexagon::STw_GP_cNotPt_V4: + return Hexagon::STw_GP_cNotPt_nv_V4; + + case Hexagon::STw_GP_cdnPt_V4: + return Hexagon::STw_GP_cdnPt_nv_V4; + + case Hexagon::STw_GP_cdnNotPt_V4: + return Hexagon::STw_GP_cdnNotPt_nv_V4; + + case Hexagon::STriw_GP_cPt_V4: + return Hexagon::STriw_GP_cPt_nv_V4; + + case Hexagon::STriw_GP_cNotPt_V4: + return Hexagon::STriw_GP_cNotPt_nv_V4; + + case Hexagon::STriw_GP_cdnPt_V4: + return Hexagon::STriw_GP_cdnPt_nv_V4; + + case Hexagon::STriw_GP_cdnNotPt_V4: + return Hexagon::STriw_GP_cdnNotPt_nv_V4; + } +} + +// Return .new predicate version for an instruction +static int GetDotNewPredOp(const int opc) { + switch (opc) { + default: llvm_unreachable("Unknown .new type"); + // Conditional stores + // Store byte conditionally + case Hexagon::STrib_cPt : + return Hexagon::STrib_cdnPt_V4; + + case Hexagon::STrib_cNotPt : + return Hexagon::STrib_cdnNotPt_V4; + + case Hexagon::STrib_indexed_cPt : + return Hexagon::STrib_indexed_cdnPt_V4; + + case Hexagon::STrib_indexed_cNotPt : + return Hexagon::STrib_indexed_cdnNotPt_V4; + + case Hexagon::STrib_imm_cPt_V4 : + return Hexagon::STrib_imm_cdnPt_V4; + + case Hexagon::STrib_imm_cNotPt_V4 : + return Hexagon::STrib_imm_cdnNotPt_V4; + + case Hexagon::POST_STbri_cPt : + return Hexagon::POST_STbri_cdnPt_V4; + + case Hexagon::POST_STbri_cNotPt : + return Hexagon::POST_STbri_cdnNotPt_V4; + + case Hexagon::STrib_indexed_shl_cPt_V4 : + return Hexagon::STrib_indexed_shl_cdnPt_V4; + + case Hexagon::STrib_indexed_shl_cNotPt_V4 : + return Hexagon::STrib_indexed_shl_cdnNotPt_V4; + + case Hexagon::STb_GP_cPt_V4 : + return Hexagon::STb_GP_cdnPt_V4; + + case Hexagon::STb_GP_cNotPt_V4 : + return Hexagon::STb_GP_cdnNotPt_V4; + + case Hexagon::STrib_GP_cPt_V4 : + return Hexagon::STrib_GP_cdnPt_V4; + + case Hexagon::STrib_GP_cNotPt_V4 : + return Hexagon::STrib_GP_cdnNotPt_V4; + + // Store doubleword conditionally + case Hexagon::STrid_cPt : + return Hexagon::STrid_cdnPt_V4; + + case Hexagon::STrid_cNotPt : + return Hexagon::STrid_cdnNotPt_V4; + + case Hexagon::STrid_indexed_cPt : + return Hexagon::STrid_indexed_cdnPt_V4; + + case Hexagon::STrid_indexed_cNotPt : + return Hexagon::STrid_indexed_cdnNotPt_V4; + + case Hexagon::STrid_indexed_shl_cPt_V4 : + return Hexagon::STrid_indexed_shl_cdnPt_V4; + + case Hexagon::STrid_indexed_shl_cNotPt_V4 : + return Hexagon::STrid_indexed_shl_cdnNotPt_V4; + + case Hexagon::POST_STdri_cPt : + return Hexagon::POST_STdri_cdnPt_V4; + + case Hexagon::POST_STdri_cNotPt : + return Hexagon::POST_STdri_cdnNotPt_V4; + + case Hexagon::STd_GP_cPt_V4 : + return Hexagon::STd_GP_cdnPt_V4; + + case Hexagon::STd_GP_cNotPt_V4 : + return Hexagon::STd_GP_cdnNotPt_V4; + + case Hexagon::STrid_GP_cPt_V4 : + return Hexagon::STrid_GP_cdnPt_V4; + + case Hexagon::STrid_GP_cNotPt_V4 : + return Hexagon::STrid_GP_cdnNotPt_V4; + + // Store halfword conditionally + case Hexagon::STrih_cPt : + return Hexagon::STrih_cdnPt_V4; + + case Hexagon::STrih_cNotPt : + return Hexagon::STrih_cdnNotPt_V4; + + case Hexagon::STrih_indexed_cPt : + return Hexagon::STrih_indexed_cdnPt_V4; + + case Hexagon::STrih_indexed_cNotPt : + return Hexagon::STrih_indexed_cdnNotPt_V4; + + case Hexagon::STrih_imm_cPt_V4 : + return Hexagon::STrih_imm_cdnPt_V4; + + case Hexagon::STrih_imm_cNotPt_V4 : + return Hexagon::STrih_imm_cdnNotPt_V4; + + case Hexagon::STrih_indexed_shl_cPt_V4 : + return Hexagon::STrih_indexed_shl_cdnPt_V4; + + case Hexagon::STrih_indexed_shl_cNotPt_V4 : + return Hexagon::STrih_indexed_shl_cdnNotPt_V4; + + case Hexagon::POST_SThri_cPt : + return Hexagon::POST_SThri_cdnPt_V4; + + case Hexagon::POST_SThri_cNotPt : + return Hexagon::POST_SThri_cdnNotPt_V4; + + case Hexagon::STh_GP_cPt_V4 : + return Hexagon::STh_GP_cdnPt_V4; + + case Hexagon::STh_GP_cNotPt_V4 : + return Hexagon::STh_GP_cdnNotPt_V4; + + case Hexagon::STrih_GP_cPt_V4 : + return Hexagon::STrih_GP_cdnPt_V4; + + case Hexagon::STrih_GP_cNotPt_V4 : + return Hexagon::STrih_GP_cdnNotPt_V4; + + // Store word conditionally + case Hexagon::STriw_cPt : + return Hexagon::STriw_cdnPt_V4; + + case Hexagon::STriw_cNotPt : + return Hexagon::STriw_cdnNotPt_V4; + + case Hexagon::STriw_indexed_cPt : + return Hexagon::STriw_indexed_cdnPt_V4; + + case Hexagon::STriw_indexed_cNotPt : + return Hexagon::STriw_indexed_cdnNotPt_V4; + + case Hexagon::STriw_imm_cPt_V4 : + return Hexagon::STriw_imm_cdnPt_V4; + + case Hexagon::STriw_imm_cNotPt_V4 : + return Hexagon::STriw_imm_cdnNotPt_V4; + + case Hexagon::STriw_indexed_shl_cPt_V4 : + return Hexagon::STriw_indexed_shl_cdnPt_V4; + + case Hexagon::STriw_indexed_shl_cNotPt_V4 : + return Hexagon::STriw_indexed_shl_cdnNotPt_V4; + + case Hexagon::POST_STwri_cPt : + return Hexagon::POST_STwri_cdnPt_V4; + + case Hexagon::POST_STwri_cNotPt : + return Hexagon::POST_STwri_cdnNotPt_V4; + + case Hexagon::STw_GP_cPt_V4 : + return Hexagon::STw_GP_cdnPt_V4; + + case Hexagon::STw_GP_cNotPt_V4 : + return Hexagon::STw_GP_cdnNotPt_V4; + + case Hexagon::STriw_GP_cPt_V4 : + return Hexagon::STriw_GP_cdnPt_V4; + + case Hexagon::STriw_GP_cNotPt_V4 : + return Hexagon::STriw_GP_cdnNotPt_V4; + + // Condtional Jumps + case Hexagon::JMP_c: + return Hexagon::JMP_cdnPt; + + case Hexagon::JMP_cNot: + return Hexagon::JMP_cdnNotPt; + + case Hexagon::JMPR_cPt: + return Hexagon::JMPR_cdnPt_V3; + + case Hexagon::JMPR_cNotPt: + return Hexagon::JMPR_cdnNotPt_V3; + + // Conditional Transfers + case Hexagon::TFR_cPt: + return Hexagon::TFR_cdnPt; + + case Hexagon::TFR_cNotPt: + return Hexagon::TFR_cdnNotPt; + + case Hexagon::TFRI_cPt: + return Hexagon::TFRI_cdnPt; + + case Hexagon::TFRI_cNotPt: + return Hexagon::TFRI_cdnNotPt; + + // Load double word + case Hexagon::LDrid_cPt : + return Hexagon::LDrid_cdnPt; + + case Hexagon::LDrid_cNotPt : + return Hexagon::LDrid_cdnNotPt; + + case Hexagon::LDrid_indexed_cPt : + return Hexagon::LDrid_indexed_cdnPt; + + case Hexagon::LDrid_indexed_cNotPt : + return Hexagon::LDrid_indexed_cdnNotPt; + + case Hexagon::POST_LDrid_cPt : + return Hexagon::POST_LDrid_cdnPt_V4; + + case Hexagon::POST_LDrid_cNotPt : + return Hexagon::POST_LDrid_cdnNotPt_V4; + + // Load word + case Hexagon::LDriw_cPt : + return Hexagon::LDriw_cdnPt; + + case Hexagon::LDriw_cNotPt : + return Hexagon::LDriw_cdnNotPt; + + case Hexagon::LDriw_indexed_cPt : + return Hexagon::LDriw_indexed_cdnPt; + + case Hexagon::LDriw_indexed_cNotPt : + return Hexagon::LDriw_indexed_cdnNotPt; + + case Hexagon::POST_LDriw_cPt : + return Hexagon::POST_LDriw_cdnPt_V4; + + case Hexagon::POST_LDriw_cNotPt : + return Hexagon::POST_LDriw_cdnNotPt_V4; + + // Load halfword + case Hexagon::LDrih_cPt : + return Hexagon::LDrih_cdnPt; + + case Hexagon::LDrih_cNotPt : + return Hexagon::LDrih_cdnNotPt; + + case Hexagon::LDrih_indexed_cPt : + return Hexagon::LDrih_indexed_cdnPt; + + case Hexagon::LDrih_indexed_cNotPt : + return Hexagon::LDrih_indexed_cdnNotPt; + + case Hexagon::POST_LDrih_cPt : + return Hexagon::POST_LDrih_cdnPt_V4; + + case Hexagon::POST_LDrih_cNotPt : + return Hexagon::POST_LDrih_cdnNotPt_V4; + + // Load byte + case Hexagon::LDrib_cPt : + return Hexagon::LDrib_cdnPt; + + case Hexagon::LDrib_cNotPt : + return Hexagon::LDrib_cdnNotPt; + + case Hexagon::LDrib_indexed_cPt : + return Hexagon::LDrib_indexed_cdnPt; + + case Hexagon::LDrib_indexed_cNotPt : + return Hexagon::LDrib_indexed_cdnNotPt; + + case Hexagon::POST_LDrib_cPt : + return Hexagon::POST_LDrib_cdnPt_V4; + + case Hexagon::POST_LDrib_cNotPt : + return Hexagon::POST_LDrib_cdnNotPt_V4; + + // Load unsigned halfword + case Hexagon::LDriuh_cPt : + return Hexagon::LDriuh_cdnPt; + + case Hexagon::LDriuh_cNotPt : + return Hexagon::LDriuh_cdnNotPt; + + case Hexagon::LDriuh_indexed_cPt : + return Hexagon::LDriuh_indexed_cdnPt; + + case Hexagon::LDriuh_indexed_cNotPt : + return Hexagon::LDriuh_indexed_cdnNotPt; + + case Hexagon::POST_LDriuh_cPt : + return Hexagon::POST_LDriuh_cdnPt_V4; + + case Hexagon::POST_LDriuh_cNotPt : + return Hexagon::POST_LDriuh_cdnNotPt_V4; + + // Load unsigned byte + case Hexagon::LDriub_cPt : + return Hexagon::LDriub_cdnPt; + + case Hexagon::LDriub_cNotPt : + return Hexagon::LDriub_cdnNotPt; + + case Hexagon::LDriub_indexed_cPt : + return Hexagon::LDriub_indexed_cdnPt; + + case Hexagon::LDriub_indexed_cNotPt : + return Hexagon::LDriub_indexed_cdnNotPt; + + case Hexagon::POST_LDriub_cPt : + return Hexagon::POST_LDriub_cdnPt_V4; + + case Hexagon::POST_LDriub_cNotPt : + return Hexagon::POST_LDriub_cdnNotPt_V4; + + // V4 indexed+scaled load + + case Hexagon::LDrid_indexed_cPt_V4 : + return Hexagon::LDrid_indexed_cdnPt_V4; + + case Hexagon::LDrid_indexed_cNotPt_V4 : + return Hexagon::LDrid_indexed_cdnNotPt_V4; + + case Hexagon::LDrid_indexed_shl_cPt_V4 : + return Hexagon::LDrid_indexed_shl_cdnPt_V4; + + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + return Hexagon::LDrid_indexed_shl_cdnNotPt_V4; + + case Hexagon::LDrib_indexed_cPt_V4 : + return Hexagon::LDrib_indexed_cdnPt_V4; + + case Hexagon::LDrib_indexed_cNotPt_V4 : + return Hexagon::LDrib_indexed_cdnNotPt_V4; + + case Hexagon::LDrib_indexed_shl_cPt_V4 : + return Hexagon::LDrib_indexed_shl_cdnPt_V4; + + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + return Hexagon::LDrib_indexed_shl_cdnNotPt_V4; + + case Hexagon::LDriub_indexed_cPt_V4 : + return Hexagon::LDriub_indexed_cdnPt_V4; + + case Hexagon::LDriub_indexed_cNotPt_V4 : + return Hexagon::LDriub_indexed_cdnNotPt_V4; + + case Hexagon::LDriub_indexed_shl_cPt_V4 : + return Hexagon::LDriub_indexed_shl_cdnPt_V4; + + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + return Hexagon::LDriub_indexed_shl_cdnNotPt_V4; + + case Hexagon::LDrih_indexed_cPt_V4 : + return Hexagon::LDrih_indexed_cdnPt_V4; + + case Hexagon::LDrih_indexed_cNotPt_V4 : + return Hexagon::LDrih_indexed_cdnNotPt_V4; + + case Hexagon::LDrih_indexed_shl_cPt_V4 : + return Hexagon::LDrih_indexed_shl_cdnPt_V4; + + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + return Hexagon::LDrih_indexed_shl_cdnNotPt_V4; + + case Hexagon::LDriuh_indexed_cPt_V4 : + return Hexagon::LDriuh_indexed_cdnPt_V4; + + case Hexagon::LDriuh_indexed_cNotPt_V4 : + return Hexagon::LDriuh_indexed_cdnNotPt_V4; + + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + return Hexagon::LDriuh_indexed_shl_cdnPt_V4; + + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4; + + case Hexagon::LDriw_indexed_cPt_V4 : + return Hexagon::LDriw_indexed_cdnPt_V4; + + case Hexagon::LDriw_indexed_cNotPt_V4 : + return Hexagon::LDriw_indexed_cdnNotPt_V4; + + case Hexagon::LDriw_indexed_shl_cPt_V4 : + return Hexagon::LDriw_indexed_shl_cdnPt_V4; + + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + return Hexagon::LDriw_indexed_shl_cdnNotPt_V4; + + // V4 global address load + + case Hexagon::LDd_GP_cPt_V4: + return Hexagon::LDd_GP_cdnPt_V4; + + case Hexagon::LDd_GP_cNotPt_V4: + return Hexagon::LDd_GP_cdnNotPt_V4; + + case Hexagon::LDb_GP_cPt_V4: + return Hexagon::LDb_GP_cdnPt_V4; + + case Hexagon::LDb_GP_cNotPt_V4: + return Hexagon::LDb_GP_cdnNotPt_V4; + + case Hexagon::LDub_GP_cPt_V4: + return Hexagon::LDub_GP_cdnPt_V4; + + case Hexagon::LDub_GP_cNotPt_V4: + return Hexagon::LDub_GP_cdnNotPt_V4; + + case Hexagon::LDh_GP_cPt_V4: + return Hexagon::LDh_GP_cdnPt_V4; + + case Hexagon::LDh_GP_cNotPt_V4: + return Hexagon::LDh_GP_cdnNotPt_V4; + + case Hexagon::LDuh_GP_cPt_V4: + return Hexagon::LDuh_GP_cdnPt_V4; + + case Hexagon::LDuh_GP_cNotPt_V4: + return Hexagon::LDuh_GP_cdnNotPt_V4; + + case Hexagon::LDw_GP_cPt_V4: + return Hexagon::LDw_GP_cdnPt_V4; + + case Hexagon::LDw_GP_cNotPt_V4: + return Hexagon::LDw_GP_cdnNotPt_V4; + + case Hexagon::LDrid_GP_cPt_V4: + return Hexagon::LDrid_GP_cdnPt_V4; + + case Hexagon::LDrid_GP_cNotPt_V4: + return Hexagon::LDrid_GP_cdnNotPt_V4; + + case Hexagon::LDrib_GP_cPt_V4: + return Hexagon::LDrib_GP_cdnPt_V4; + + case Hexagon::LDrib_GP_cNotPt_V4: + return Hexagon::LDrib_GP_cdnNotPt_V4; + + case Hexagon::LDriub_GP_cPt_V4: + return Hexagon::LDriub_GP_cdnPt_V4; + + case Hexagon::LDriub_GP_cNotPt_V4: + return Hexagon::LDriub_GP_cdnNotPt_V4; + + case Hexagon::LDrih_GP_cPt_V4: + return Hexagon::LDrih_GP_cdnPt_V4; + + case Hexagon::LDrih_GP_cNotPt_V4: + return Hexagon::LDrih_GP_cdnNotPt_V4; + + case Hexagon::LDriuh_GP_cPt_V4: + return Hexagon::LDriuh_GP_cdnPt_V4; + + case Hexagon::LDriuh_GP_cNotPt_V4: + return Hexagon::LDriuh_GP_cdnNotPt_V4; + + case Hexagon::LDriw_GP_cPt_V4: + return Hexagon::LDriw_GP_cdnPt_V4; + + case Hexagon::LDriw_GP_cNotPt_V4: + return Hexagon::LDriw_GP_cdnNotPt_V4; + + // Conditional store new-value byte + case Hexagon::STrib_cPt_nv_V4 : + return Hexagon::STrib_cdnPt_nv_V4; + case Hexagon::STrib_cNotPt_nv_V4 : + return Hexagon::STrib_cdnNotPt_nv_V4; + + case Hexagon::STrib_indexed_cPt_nv_V4 : + return Hexagon::STrib_indexed_cdnPt_nv_V4; + case Hexagon::STrib_indexed_cNotPt_nv_V4 : + return Hexagon::STrib_indexed_cdnNotPt_nv_V4; + + case Hexagon::STrib_indexed_shl_cPt_nv_V4 : + return Hexagon::STrib_indexed_shl_cdnPt_nv_V4; + case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 : + return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_STbri_cPt_nv_V4 : + return Hexagon::POST_STbri_cdnPt_nv_V4; + case Hexagon::POST_STbri_cNotPt_nv_V4 : + return Hexagon::POST_STbri_cdnNotPt_nv_V4; + + case Hexagon::STb_GP_cPt_nv_V4 : + return Hexagon::STb_GP_cdnPt_nv_V4; + + case Hexagon::STb_GP_cNotPt_nv_V4 : + return Hexagon::STb_GP_cdnNotPt_nv_V4; + + case Hexagon::STrib_GP_cPt_nv_V4 : + return Hexagon::STrib_GP_cdnPt_nv_V4; + + case Hexagon::STrib_GP_cNotPt_nv_V4 : + return Hexagon::STrib_GP_cdnNotPt_nv_V4; + + // Conditional store new-value halfword + case Hexagon::STrih_cPt_nv_V4 : + return Hexagon::STrih_cdnPt_nv_V4; + case Hexagon::STrih_cNotPt_nv_V4 : + return Hexagon::STrih_cdnNotPt_nv_V4; + + case Hexagon::STrih_indexed_cPt_nv_V4 : + return Hexagon::STrih_indexed_cdnPt_nv_V4; + case Hexagon::STrih_indexed_cNotPt_nv_V4 : + return Hexagon::STrih_indexed_cdnNotPt_nv_V4; + + case Hexagon::STrih_indexed_shl_cPt_nv_V4 : + return Hexagon::STrih_indexed_shl_cdnPt_nv_V4; + case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 : + return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_SThri_cPt_nv_V4 : + return Hexagon::POST_SThri_cdnPt_nv_V4; + case Hexagon::POST_SThri_cNotPt_nv_V4 : + return Hexagon::POST_SThri_cdnNotPt_nv_V4; + + case Hexagon::STh_GP_cPt_nv_V4 : + return Hexagon::STh_GP_cdnPt_nv_V4; + + case Hexagon::STh_GP_cNotPt_nv_V4 : + return Hexagon::STh_GP_cdnNotPt_nv_V4; + + case Hexagon::STrih_GP_cPt_nv_V4 : + return Hexagon::STrih_GP_cdnPt_nv_V4; + + case Hexagon::STrih_GP_cNotPt_nv_V4 : + return Hexagon::STrih_GP_cdnNotPt_nv_V4; + + // Conditional store new-value word + case Hexagon::STriw_cPt_nv_V4 : + return Hexagon::STriw_cdnPt_nv_V4; + case Hexagon::STriw_cNotPt_nv_V4 : + return Hexagon::STriw_cdnNotPt_nv_V4; + + case Hexagon::STriw_indexed_cPt_nv_V4 : + return Hexagon::STriw_indexed_cdnPt_nv_V4; + case Hexagon::STriw_indexed_cNotPt_nv_V4 : + return Hexagon::STriw_indexed_cdnNotPt_nv_V4; + + case Hexagon::STriw_indexed_shl_cPt_nv_V4 : + return Hexagon::STriw_indexed_shl_cdnPt_nv_V4; + case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 : + return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4; + + case Hexagon::POST_STwri_cPt_nv_V4 : + return Hexagon::POST_STwri_cdnPt_nv_V4; + case Hexagon::POST_STwri_cNotPt_nv_V4: + return Hexagon::POST_STwri_cdnNotPt_nv_V4; + + case Hexagon::STw_GP_cPt_nv_V4 : + return Hexagon::STw_GP_cdnPt_nv_V4; + + case Hexagon::STw_GP_cNotPt_nv_V4 : + return Hexagon::STw_GP_cdnNotPt_nv_V4; + + case Hexagon::STriw_GP_cPt_nv_V4 : + return Hexagon::STriw_GP_cdnPt_nv_V4; + + case Hexagon::STriw_GP_cNotPt_nv_V4 : + return Hexagon::STriw_GP_cdnNotPt_nv_V4; + + // Conditional add + case Hexagon::ADD_ri_cPt : + return Hexagon::ADD_ri_cdnPt; + case Hexagon::ADD_ri_cNotPt : + return Hexagon::ADD_ri_cdnNotPt; + + case Hexagon::ADD_rr_cPt : + return Hexagon::ADD_rr_cdnPt; + case Hexagon::ADD_rr_cNotPt : + return Hexagon::ADD_rr_cdnNotPt; + + // Conditional logical Operations + case Hexagon::XOR_rr_cPt : + return Hexagon::XOR_rr_cdnPt; + case Hexagon::XOR_rr_cNotPt : + return Hexagon::XOR_rr_cdnNotPt; + + case Hexagon::AND_rr_cPt : + return Hexagon::AND_rr_cdnPt; + case Hexagon::AND_rr_cNotPt : + return Hexagon::AND_rr_cdnNotPt; + + case Hexagon::OR_rr_cPt : + return Hexagon::OR_rr_cdnPt; + case Hexagon::OR_rr_cNotPt : + return Hexagon::OR_rr_cdnNotPt; + + // Conditional Subtract + case Hexagon::SUB_rr_cPt : + return Hexagon::SUB_rr_cdnPt; + case Hexagon::SUB_rr_cNotPt : + return Hexagon::SUB_rr_cdnNotPt; + + // Conditional combine + case Hexagon::COMBINE_rr_cPt : + return Hexagon::COMBINE_rr_cdnPt; + case Hexagon::COMBINE_rr_cNotPt : + return Hexagon::COMBINE_rr_cdnNotPt; + + case Hexagon::ASLH_cPt_V4 : + return Hexagon::ASLH_cdnPt_V4; + case Hexagon::ASLH_cNotPt_V4 : + return Hexagon::ASLH_cdnNotPt_V4; + + case Hexagon::ASRH_cPt_V4 : + return Hexagon::ASRH_cdnPt_V4; + case Hexagon::ASRH_cNotPt_V4 : + return Hexagon::ASRH_cdnNotPt_V4; + + case Hexagon::SXTB_cPt_V4 : + return Hexagon::SXTB_cdnPt_V4; + case Hexagon::SXTB_cNotPt_V4 : + return Hexagon::SXTB_cdnNotPt_V4; + + case Hexagon::SXTH_cPt_V4 : + return Hexagon::SXTH_cdnPt_V4; + case Hexagon::SXTH_cNotPt_V4 : + return Hexagon::SXTH_cdnNotPt_V4; + + case Hexagon::ZXTB_cPt_V4 : + return Hexagon::ZXTB_cdnPt_V4; + case Hexagon::ZXTB_cNotPt_V4 : + return Hexagon::ZXTB_cdnNotPt_V4; + + case Hexagon::ZXTH_cPt_V4 : + return Hexagon::ZXTH_cdnPt_V4; + case Hexagon::ZXTH_cNotPt_V4 : + return Hexagon::ZXTH_cdnNotPt_V4; + } +} + +// Returns true if an instruction can be promoted to .new predicate +// or new-value store. +bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) { + if ( isCondInst(MI) || IsNewifyStore(MI)) + return true; + else + return false; +} + +bool HexagonPacketizerList::isCondInst (MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const MCInstrDesc& TID = MI->getDesc(); + // bug 5670: until that is fixed, + // this portion is disabled. + if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) || + || QII->isConditionalTransfer(MI) + || QII->isConditionalALU32(MI) + || QII->isConditionalLoad(MI) + || QII->isConditionalStore(MI)) { + return true; + } + return false; +} + + +// Promote an instructiont to its .new form. +// At this time, we have already made a call to CanPromoteToDotNew +// and made sure that it can *indeed* be promoted. +bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI, + SDep::Kind DepType, MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC) { + + assert (DepType == SDep::Data); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + int NewOpcode; + if (RC == &Hexagon::PredRegsRegClass) + NewOpcode = GetDotNewPredOp(MI->getOpcode()); + else + NewOpcode = GetDotNewOp(MI->getOpcode()); + MI->setDesc(QII->get(NewOpcode)); + + return true; +} + +// Returns the most basic instruction for the .new predicated instructions and +// new-value stores. +// For example, all of the following instructions will be converted back to the +// same instruction: +// 1) if (p0.new) memw(R0+#0) = R1.new ---> +// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1 +// 3) if (p0.new) memw(R0+#0) = R1 ---> +// +// To understand the translation of instruction 1 to its original form, consider +// a packet with 3 instructions. +// { p0 = cmp.eq(R0,R1) +// if (p0.new) R2 = add(R3, R4) +// R5 = add (R3, R1) +// } +// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet +// +// This instruction can be part of the previous packet only if both p0 and R2 +// are promoted to .new values. This promotion happens in steps, first +// predicate register is promoted to .new and in the next iteration R2 is +// promoted. Therefore, in case of dependence check failure (due to R5) during +// next iteration, it should be converted back to its most basic form. + +static int GetDotOldOp(const int opc) { + switch (opc) { + default: llvm_unreachable("Unknown .old type"); + case Hexagon::TFR_cdnPt: + return Hexagon::TFR_cPt; + + case Hexagon::TFR_cdnNotPt: + return Hexagon::TFR_cNotPt; + + case Hexagon::TFRI_cdnPt: + return Hexagon::TFRI_cPt; + + case Hexagon::TFRI_cdnNotPt: + return Hexagon::TFRI_cNotPt; + + case Hexagon::JMP_cdnPt: + return Hexagon::JMP_c; + + case Hexagon::JMP_cdnNotPt: + return Hexagon::JMP_cNot; + + case Hexagon::JMPR_cdnPt_V3: + return Hexagon::JMPR_cPt; + + case Hexagon::JMPR_cdnNotPt_V3: + return Hexagon::JMPR_cNotPt; + + // Load double word + + case Hexagon::LDrid_cdnPt : + return Hexagon::LDrid_cPt; + + case Hexagon::LDrid_cdnNotPt : + return Hexagon::LDrid_cNotPt; + + case Hexagon::LDrid_indexed_cdnPt : + return Hexagon::LDrid_indexed_cPt; + + case Hexagon::LDrid_indexed_cdnNotPt : + return Hexagon::LDrid_indexed_cNotPt; + + case Hexagon::POST_LDrid_cdnPt_V4 : + return Hexagon::POST_LDrid_cPt; + + case Hexagon::POST_LDrid_cdnNotPt_V4 : + return Hexagon::POST_LDrid_cNotPt; + + // Load word + + case Hexagon::LDriw_cdnPt : + return Hexagon::LDriw_cPt; + + case Hexagon::LDriw_cdnNotPt : + return Hexagon::LDriw_cNotPt; + + case Hexagon::LDriw_indexed_cdnPt : + return Hexagon::LDriw_indexed_cPt; + + case Hexagon::LDriw_indexed_cdnNotPt : + return Hexagon::LDriw_indexed_cNotPt; + + case Hexagon::POST_LDriw_cdnPt_V4 : + return Hexagon::POST_LDriw_cPt; + + case Hexagon::POST_LDriw_cdnNotPt_V4 : + return Hexagon::POST_LDriw_cNotPt; + + // Load half + + case Hexagon::LDrih_cdnPt : + return Hexagon::LDrih_cPt; + + case Hexagon::LDrih_cdnNotPt : + return Hexagon::LDrih_cNotPt; + + case Hexagon::LDrih_indexed_cdnPt : + return Hexagon::LDrih_indexed_cPt; + + case Hexagon::LDrih_indexed_cdnNotPt : + return Hexagon::LDrih_indexed_cNotPt; + + case Hexagon::POST_LDrih_cdnPt_V4 : + return Hexagon::POST_LDrih_cPt; + + case Hexagon::POST_LDrih_cdnNotPt_V4 : + return Hexagon::POST_LDrih_cNotPt; + + // Load byte + + case Hexagon::LDrib_cdnPt : + return Hexagon::LDrib_cPt; + + case Hexagon::LDrib_cdnNotPt : + return Hexagon::LDrib_cNotPt; + + case Hexagon::LDrib_indexed_cdnPt : + return Hexagon::LDrib_indexed_cPt; + + case Hexagon::LDrib_indexed_cdnNotPt : + return Hexagon::LDrib_indexed_cNotPt; + + case Hexagon::POST_LDrib_cdnPt_V4 : + return Hexagon::POST_LDrib_cPt; + + case Hexagon::POST_LDrib_cdnNotPt_V4 : + return Hexagon::POST_LDrib_cNotPt; + + // Load unsigned half + + case Hexagon::LDriuh_cdnPt : + return Hexagon::LDriuh_cPt; + + case Hexagon::LDriuh_cdnNotPt : + return Hexagon::LDriuh_cNotPt; + + case Hexagon::LDriuh_indexed_cdnPt : + return Hexagon::LDriuh_indexed_cPt; + + case Hexagon::LDriuh_indexed_cdnNotPt : + return Hexagon::LDriuh_indexed_cNotPt; + + case Hexagon::POST_LDriuh_cdnPt_V4 : + return Hexagon::POST_LDriuh_cPt; + + case Hexagon::POST_LDriuh_cdnNotPt_V4 : + return Hexagon::POST_LDriuh_cNotPt; + + // Load unsigned byte + case Hexagon::LDriub_cdnPt : + return Hexagon::LDriub_cPt; + + case Hexagon::LDriub_cdnNotPt : + return Hexagon::LDriub_cNotPt; + + case Hexagon::LDriub_indexed_cdnPt : + return Hexagon::LDriub_indexed_cPt; + + case Hexagon::LDriub_indexed_cdnNotPt : + return Hexagon::LDriub_indexed_cNotPt; + + case Hexagon::POST_LDriub_cdnPt_V4 : + return Hexagon::POST_LDriub_cPt; + + case Hexagon::POST_LDriub_cdnNotPt_V4 : + return Hexagon::POST_LDriub_cNotPt; + + // V4 indexed+scaled Load + + case Hexagon::LDrid_indexed_cdnPt_V4 : + return Hexagon::LDrid_indexed_cPt_V4; + + case Hexagon::LDrid_indexed_cdnNotPt_V4 : + return Hexagon::LDrid_indexed_cNotPt_V4; + + case Hexagon::LDrid_indexed_shl_cdnPt_V4 : + return Hexagon::LDrid_indexed_shl_cPt_V4; + + case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDrid_indexed_shl_cNotPt_V4; + + case Hexagon::LDrib_indexed_cdnPt_V4 : + return Hexagon::LDrib_indexed_cPt_V4; + + case Hexagon::LDrib_indexed_cdnNotPt_V4 : + return Hexagon::LDrib_indexed_cNotPt_V4; + + case Hexagon::LDrib_indexed_shl_cdnPt_V4 : + return Hexagon::LDrib_indexed_shl_cPt_V4; + + case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDrib_indexed_shl_cNotPt_V4; + + case Hexagon::LDriub_indexed_cdnPt_V4 : + return Hexagon::LDriub_indexed_cPt_V4; + + case Hexagon::LDriub_indexed_cdnNotPt_V4 : + return Hexagon::LDriub_indexed_cNotPt_V4; + + case Hexagon::LDriub_indexed_shl_cdnPt_V4 : + return Hexagon::LDriub_indexed_shl_cPt_V4; + + case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDriub_indexed_shl_cNotPt_V4; + + case Hexagon::LDrih_indexed_cdnPt_V4 : + return Hexagon::LDrih_indexed_cPt_V4; + + case Hexagon::LDrih_indexed_cdnNotPt_V4 : + return Hexagon::LDrih_indexed_cNotPt_V4; + + case Hexagon::LDrih_indexed_shl_cdnPt_V4 : + return Hexagon::LDrih_indexed_shl_cPt_V4; + + case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDrih_indexed_shl_cNotPt_V4; + + case Hexagon::LDriuh_indexed_cdnPt_V4 : + return Hexagon::LDriuh_indexed_cPt_V4; + + case Hexagon::LDriuh_indexed_cdnNotPt_V4 : + return Hexagon::LDriuh_indexed_cNotPt_V4; + + case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : + return Hexagon::LDriuh_indexed_shl_cPt_V4; + + case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDriuh_indexed_shl_cNotPt_V4; + + case Hexagon::LDriw_indexed_cdnPt_V4 : + return Hexagon::LDriw_indexed_cPt_V4; + + case Hexagon::LDriw_indexed_cdnNotPt_V4 : + return Hexagon::LDriw_indexed_cNotPt_V4; + + case Hexagon::LDriw_indexed_shl_cdnPt_V4 : + return Hexagon::LDriw_indexed_shl_cPt_V4; + + case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : + return Hexagon::LDriw_indexed_shl_cNotPt_V4; + + // V4 global address load + + case Hexagon::LDd_GP_cdnPt_V4: + return Hexagon::LDd_GP_cPt_V4; + + case Hexagon::LDd_GP_cdnNotPt_V4: + return Hexagon::LDd_GP_cNotPt_V4; + + case Hexagon::LDb_GP_cdnPt_V4: + return Hexagon::LDb_GP_cPt_V4; + + case Hexagon::LDb_GP_cdnNotPt_V4: + return Hexagon::LDb_GP_cNotPt_V4; + + case Hexagon::LDub_GP_cdnPt_V4: + return Hexagon::LDub_GP_cPt_V4; + + case Hexagon::LDub_GP_cdnNotPt_V4: + return Hexagon::LDub_GP_cNotPt_V4; + + case Hexagon::LDh_GP_cdnPt_V4: + return Hexagon::LDh_GP_cPt_V4; + + case Hexagon::LDh_GP_cdnNotPt_V4: + return Hexagon::LDh_GP_cNotPt_V4; + + case Hexagon::LDuh_GP_cdnPt_V4: + return Hexagon::LDuh_GP_cPt_V4; + + case Hexagon::LDuh_GP_cdnNotPt_V4: + return Hexagon::LDuh_GP_cNotPt_V4; + + case Hexagon::LDw_GP_cdnPt_V4: + return Hexagon::LDw_GP_cPt_V4; + + case Hexagon::LDw_GP_cdnNotPt_V4: + return Hexagon::LDw_GP_cNotPt_V4; + + case Hexagon::LDrid_GP_cdnPt_V4: + return Hexagon::LDrid_GP_cPt_V4; + + case Hexagon::LDrid_GP_cdnNotPt_V4: + return Hexagon::LDrid_GP_cNotPt_V4; + + case Hexagon::LDrib_GP_cdnPt_V4: + return Hexagon::LDrib_GP_cPt_V4; + + case Hexagon::LDrib_GP_cdnNotPt_V4: + return Hexagon::LDrib_GP_cNotPt_V4; + + case Hexagon::LDriub_GP_cdnPt_V4: + return Hexagon::LDriub_GP_cPt_V4; + + case Hexagon::LDriub_GP_cdnNotPt_V4: + return Hexagon::LDriub_GP_cNotPt_V4; + + case Hexagon::LDrih_GP_cdnPt_V4: + return Hexagon::LDrih_GP_cPt_V4; + + case Hexagon::LDrih_GP_cdnNotPt_V4: + return Hexagon::LDrih_GP_cNotPt_V4; + + case Hexagon::LDriuh_GP_cdnPt_V4: + return Hexagon::LDriuh_GP_cPt_V4; + + case Hexagon::LDriuh_GP_cdnNotPt_V4: + return Hexagon::LDriuh_GP_cNotPt_V4; + + case Hexagon::LDriw_GP_cdnPt_V4: + return Hexagon::LDriw_GP_cPt_V4; + + case Hexagon::LDriw_GP_cdnNotPt_V4: + return Hexagon::LDriw_GP_cNotPt_V4; + + // Conditional add + + case Hexagon::ADD_ri_cdnPt : + return Hexagon::ADD_ri_cPt; + case Hexagon::ADD_ri_cdnNotPt : + return Hexagon::ADD_ri_cNotPt; + + case Hexagon::ADD_rr_cdnPt : + return Hexagon::ADD_rr_cPt; + case Hexagon::ADD_rr_cdnNotPt: + return Hexagon::ADD_rr_cNotPt; + + // Conditional logical Operations + + case Hexagon::XOR_rr_cdnPt : + return Hexagon::XOR_rr_cPt; + case Hexagon::XOR_rr_cdnNotPt : + return Hexagon::XOR_rr_cNotPt; + + case Hexagon::AND_rr_cdnPt : + return Hexagon::AND_rr_cPt; + case Hexagon::AND_rr_cdnNotPt : + return Hexagon::AND_rr_cNotPt; + + case Hexagon::OR_rr_cdnPt : + return Hexagon::OR_rr_cPt; + case Hexagon::OR_rr_cdnNotPt : + return Hexagon::OR_rr_cNotPt; + + // Conditional Subtract + + case Hexagon::SUB_rr_cdnPt : + return Hexagon::SUB_rr_cPt; + case Hexagon::SUB_rr_cdnNotPt : + return Hexagon::SUB_rr_cNotPt; + + // Conditional combine + + case Hexagon::COMBINE_rr_cdnPt : + return Hexagon::COMBINE_rr_cPt; + case Hexagon::COMBINE_rr_cdnNotPt : + return Hexagon::COMBINE_rr_cNotPt; + +// Conditional shift operations + + case Hexagon::ASLH_cdnPt_V4 : + return Hexagon::ASLH_cPt_V4; + case Hexagon::ASLH_cdnNotPt_V4 : + return Hexagon::ASLH_cNotPt_V4; + + case Hexagon::ASRH_cdnPt_V4 : + return Hexagon::ASRH_cPt_V4; + case Hexagon::ASRH_cdnNotPt_V4 : + return Hexagon::ASRH_cNotPt_V4; + + case Hexagon::SXTB_cdnPt_V4 : + return Hexagon::SXTB_cPt_V4; + case Hexagon::SXTB_cdnNotPt_V4 : + return Hexagon::SXTB_cNotPt_V4; + + case Hexagon::SXTH_cdnPt_V4 : + return Hexagon::SXTH_cPt_V4; + case Hexagon::SXTH_cdnNotPt_V4 : + return Hexagon::SXTH_cNotPt_V4; + + case Hexagon::ZXTB_cdnPt_V4 : + return Hexagon::ZXTB_cPt_V4; + case Hexagon::ZXTB_cdnNotPt_V4 : + return Hexagon::ZXTB_cNotPt_V4; + + case Hexagon::ZXTH_cdnPt_V4 : + return Hexagon::ZXTH_cPt_V4; + case Hexagon::ZXTH_cdnNotPt_V4 : + return Hexagon::ZXTH_cNotPt_V4; + + // Store byte + + case Hexagon::STrib_imm_cdnPt_V4 : + return Hexagon::STrib_imm_cPt_V4; + + case Hexagon::STrib_imm_cdnNotPt_V4 : + return Hexagon::STrib_imm_cNotPt_V4; + + case Hexagon::STrib_cdnPt_nv_V4 : + case Hexagon::STrib_cPt_nv_V4 : + case Hexagon::STrib_cdnPt_V4 : + return Hexagon::STrib_cPt; + + case Hexagon::STrib_cdnNotPt_nv_V4 : + case Hexagon::STrib_cNotPt_nv_V4 : + case Hexagon::STrib_cdnNotPt_V4 : + return Hexagon::STrib_cNotPt; + + case Hexagon::STrib_indexed_cdnPt_V4 : + case Hexagon::STrib_indexed_cPt_nv_V4 : + case Hexagon::STrib_indexed_cdnPt_nv_V4 : + return Hexagon::STrib_indexed_cPt; + + case Hexagon::STrib_indexed_cdnNotPt_V4 : + case Hexagon::STrib_indexed_cNotPt_nv_V4 : + case Hexagon::STrib_indexed_cdnNotPt_nv_V4 : + return Hexagon::STrib_indexed_cNotPt; + + case Hexagon::STrib_indexed_shl_cdnPt_nv_V4: + case Hexagon::STrib_indexed_shl_cPt_nv_V4 : + case Hexagon::STrib_indexed_shl_cdnPt_V4 : + return Hexagon::STrib_indexed_shl_cPt_V4; + + case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4: + case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 : + case Hexagon::STrib_indexed_shl_cdnNotPt_V4 : + return Hexagon::STrib_indexed_shl_cNotPt_V4; + + case Hexagon::POST_STbri_cdnPt_nv_V4 : + case Hexagon::POST_STbri_cPt_nv_V4 : + case Hexagon::POST_STbri_cdnPt_V4 : + return Hexagon::POST_STbri_cPt; + + case Hexagon::POST_STbri_cdnNotPt_nv_V4 : + case Hexagon::POST_STbri_cNotPt_nv_V4: + case Hexagon::POST_STbri_cdnNotPt_V4 : + return Hexagon::POST_STbri_cNotPt; + + case Hexagon::STb_GP_cdnPt_nv_V4: + case Hexagon::STb_GP_cdnPt_V4: + case Hexagon::STb_GP_cPt_nv_V4: + return Hexagon::STb_GP_cPt_V4; + + case Hexagon::STb_GP_cdnNotPt_nv_V4: + case Hexagon::STb_GP_cdnNotPt_V4: + case Hexagon::STb_GP_cNotPt_nv_V4: + return Hexagon::STb_GP_cNotPt_V4; + + case Hexagon::STrib_GP_cdnPt_nv_V4: + case Hexagon::STrib_GP_cdnPt_V4: + case Hexagon::STrib_GP_cPt_nv_V4: + return Hexagon::STrib_GP_cPt_V4; + + case Hexagon::STrib_GP_cdnNotPt_nv_V4: + case Hexagon::STrib_GP_cdnNotPt_V4: + case Hexagon::STrib_GP_cNotPt_nv_V4: + return Hexagon::STrib_GP_cNotPt_V4; + + // Store new-value byte - unconditional + case Hexagon::STrib_nv_V4: + return Hexagon::STrib; + + case Hexagon::STrib_indexed_nv_V4: + return Hexagon::STrib_indexed; + + case Hexagon::STrib_indexed_shl_nv_V4: + return Hexagon::STrib_indexed_shl_V4; + + case Hexagon::STrib_shl_nv_V4: + return Hexagon::STrib_shl_V4; + + case Hexagon::STrib_GP_nv_V4: + return Hexagon::STrib_GP_V4; + + case Hexagon::STb_GP_nv_V4: + return Hexagon::STb_GP_V4; + + case Hexagon::POST_STbri_nv_V4: + return Hexagon::POST_STbri; + + // Store halfword + case Hexagon::STrih_imm_cdnPt_V4 : + return Hexagon::STrih_imm_cPt_V4; + + case Hexagon::STrih_imm_cdnNotPt_V4 : + return Hexagon::STrih_imm_cNotPt_V4; + + case Hexagon::STrih_cdnPt_nv_V4 : + case Hexagon::STrih_cPt_nv_V4 : + case Hexagon::STrih_cdnPt_V4 : + return Hexagon::STrih_cPt; + + case Hexagon::STrih_cdnNotPt_nv_V4 : + case Hexagon::STrih_cNotPt_nv_V4 : + case Hexagon::STrih_cdnNotPt_V4 : + return Hexagon::STrih_cNotPt; + + case Hexagon::STrih_indexed_cdnPt_nv_V4: + case Hexagon::STrih_indexed_cPt_nv_V4 : + case Hexagon::STrih_indexed_cdnPt_V4 : + return Hexagon::STrih_indexed_cPt; + + case Hexagon::STrih_indexed_cdnNotPt_nv_V4: + case Hexagon::STrih_indexed_cNotPt_nv_V4 : + case Hexagon::STrih_indexed_cdnNotPt_V4 : + return Hexagon::STrih_indexed_cNotPt; + + case Hexagon::STrih_indexed_shl_cdnPt_nv_V4 : + case Hexagon::STrih_indexed_shl_cPt_nv_V4 : + case Hexagon::STrih_indexed_shl_cdnPt_V4 : + return Hexagon::STrih_indexed_shl_cPt_V4; + + case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4 : + case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 : + case Hexagon::STrih_indexed_shl_cdnNotPt_V4 : + return Hexagon::STrih_indexed_shl_cNotPt_V4; + + case Hexagon::POST_SThri_cdnPt_nv_V4 : + case Hexagon::POST_SThri_cPt_nv_V4 : + case Hexagon::POST_SThri_cdnPt_V4 : + return Hexagon::POST_SThri_cPt; + + case Hexagon::POST_SThri_cdnNotPt_nv_V4 : + case Hexagon::POST_SThri_cNotPt_nv_V4 : + case Hexagon::POST_SThri_cdnNotPt_V4 : + return Hexagon::POST_SThri_cNotPt; + + case Hexagon::STh_GP_cdnPt_nv_V4: + case Hexagon::STh_GP_cdnPt_V4: + case Hexagon::STh_GP_cPt_nv_V4: + return Hexagon::STh_GP_cPt_V4; + + case Hexagon::STh_GP_cdnNotPt_nv_V4: + case Hexagon::STh_GP_cdnNotPt_V4: + case Hexagon::STh_GP_cNotPt_nv_V4: + return Hexagon::STh_GP_cNotPt_V4; + + case Hexagon::STrih_GP_cdnPt_nv_V4: + case Hexagon::STrih_GP_cdnPt_V4: + case Hexagon::STrih_GP_cPt_nv_V4: + return Hexagon::STrih_GP_cPt_V4; + + case Hexagon::STrih_GP_cdnNotPt_nv_V4: + case Hexagon::STrih_GP_cdnNotPt_V4: + case Hexagon::STrih_GP_cNotPt_nv_V4: + return Hexagon::STrih_GP_cNotPt_V4; + + // Store new-value halfword - unconditional + + case Hexagon::STrih_nv_V4: + return Hexagon::STrih; + + case Hexagon::STrih_indexed_nv_V4: + return Hexagon::STrih_indexed; + + case Hexagon::STrih_indexed_shl_nv_V4: + return Hexagon::STrih_indexed_shl_V4; + + case Hexagon::STrih_shl_nv_V4: + return Hexagon::STrih_shl_V4; + + case Hexagon::STrih_GP_nv_V4: + return Hexagon::STrih_GP_V4; + + case Hexagon::STh_GP_nv_V4: + return Hexagon::STh_GP_V4; + + case Hexagon::POST_SThri_nv_V4: + return Hexagon::POST_SThri; + + // Store word + + case Hexagon::STriw_imm_cdnPt_V4 : + return Hexagon::STriw_imm_cPt_V4; + + case Hexagon::STriw_imm_cdnNotPt_V4 : + return Hexagon::STriw_imm_cNotPt_V4; + + case Hexagon::STriw_cdnPt_nv_V4 : + case Hexagon::STriw_cPt_nv_V4 : + case Hexagon::STriw_cdnPt_V4 : + return Hexagon::STriw_cPt; + + case Hexagon::STriw_cdnNotPt_nv_V4 : + case Hexagon::STriw_cNotPt_nv_V4 : + case Hexagon::STriw_cdnNotPt_V4 : + return Hexagon::STriw_cNotPt; + + case Hexagon::STriw_indexed_cdnPt_nv_V4 : + case Hexagon::STriw_indexed_cPt_nv_V4 : + case Hexagon::STriw_indexed_cdnPt_V4 : + return Hexagon::STriw_indexed_cPt; + + case Hexagon::STriw_indexed_cdnNotPt_nv_V4 : + case Hexagon::STriw_indexed_cNotPt_nv_V4 : + case Hexagon::STriw_indexed_cdnNotPt_V4 : + return Hexagon::STriw_indexed_cNotPt; + + case Hexagon::STriw_indexed_shl_cdnPt_nv_V4 : + case Hexagon::STriw_indexed_shl_cPt_nv_V4 : + case Hexagon::STriw_indexed_shl_cdnPt_V4 : + return Hexagon::STriw_indexed_shl_cPt_V4; + + case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4 : + case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 : + case Hexagon::STriw_indexed_shl_cdnNotPt_V4 : + return Hexagon::STriw_indexed_shl_cNotPt_V4; + + case Hexagon::POST_STwri_cdnPt_nv_V4 : + case Hexagon::POST_STwri_cPt_nv_V4 : + case Hexagon::POST_STwri_cdnPt_V4 : + return Hexagon::POST_STwri_cPt; + + case Hexagon::POST_STwri_cdnNotPt_nv_V4 : + case Hexagon::POST_STwri_cNotPt_nv_V4 : + case Hexagon::POST_STwri_cdnNotPt_V4 : + return Hexagon::POST_STwri_cNotPt; + + case Hexagon::STw_GP_cdnPt_nv_V4: + case Hexagon::STw_GP_cdnPt_V4: + case Hexagon::STw_GP_cPt_nv_V4: + return Hexagon::STw_GP_cPt_V4; + + case Hexagon::STw_GP_cdnNotPt_nv_V4: + case Hexagon::STw_GP_cdnNotPt_V4: + case Hexagon::STw_GP_cNotPt_nv_V4: + return Hexagon::STw_GP_cNotPt_V4; + + case Hexagon::STriw_GP_cdnPt_nv_V4: + case Hexagon::STriw_GP_cdnPt_V4: + case Hexagon::STriw_GP_cPt_nv_V4: + return Hexagon::STriw_GP_cPt_V4; + + case Hexagon::STriw_GP_cdnNotPt_nv_V4: + case Hexagon::STriw_GP_cdnNotPt_V4: + case Hexagon::STriw_GP_cNotPt_nv_V4: + return Hexagon::STriw_GP_cNotPt_V4; + + // Store new-value word - unconditional + + case Hexagon::STriw_nv_V4: + return Hexagon::STriw; + + case Hexagon::STriw_indexed_nv_V4: + return Hexagon::STriw_indexed; + + case Hexagon::STriw_indexed_shl_nv_V4: + return Hexagon::STriw_indexed_shl_V4; + + case Hexagon::STriw_shl_nv_V4: + return Hexagon::STriw_shl_V4; + + case Hexagon::STriw_GP_nv_V4: + return Hexagon::STriw_GP_V4; + + case Hexagon::STw_GP_nv_V4: + return Hexagon::STw_GP_V4; + + case Hexagon::POST_STwri_nv_V4: + return Hexagon::POST_STwri; + + // Store doubleword + + case Hexagon::STrid_cdnPt_V4 : + return Hexagon::STrid_cPt; + + case Hexagon::STrid_cdnNotPt_V4 : + return Hexagon::STrid_cNotPt; + + case Hexagon::STrid_indexed_cdnPt_V4 : + return Hexagon::STrid_indexed_cPt; + + case Hexagon::STrid_indexed_cdnNotPt_V4 : + return Hexagon::STrid_indexed_cNotPt; + + case Hexagon::STrid_indexed_shl_cdnPt_V4 : + return Hexagon::STrid_indexed_shl_cPt_V4; + + case Hexagon::STrid_indexed_shl_cdnNotPt_V4 : + return Hexagon::STrid_indexed_shl_cNotPt_V4; + + case Hexagon::POST_STdri_cdnPt_V4 : + return Hexagon::POST_STdri_cPt; + + case Hexagon::POST_STdri_cdnNotPt_V4 : + return Hexagon::POST_STdri_cNotPt; + + case Hexagon::STd_GP_cdnPt_V4 : + return Hexagon::STd_GP_cPt_V4; + + case Hexagon::STd_GP_cdnNotPt_V4 : + return Hexagon::STd_GP_cNotPt_V4; + + case Hexagon::STrid_GP_cdnPt_V4 : + return Hexagon::STrid_GP_cPt_V4; + + case Hexagon::STrid_GP_cdnNotPt_V4 : + return Hexagon::STrid_GP_cNotPt_V4; + } +} + +bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) { + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + int NewOpcode = GetDotOldOp(MI->getOpcode()); + MI->setDesc(QII->get(NewOpcode)); + return true; +} + +// Returns true if an instruction is predicated on p0 and false if it's +// predicated on !p0. + +static bool GetPredicateSense(MachineInstr* MI, + const HexagonInstrInfo *QII) { + + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown predicate sense of the instruction"); + case Hexagon::TFR_cPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFRI_cPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::STrib_cPt : + case Hexagon::STrib_cdnPt_V4 : + case Hexagon::STrib_indexed_cPt : + case Hexagon::STrib_indexed_cdnPt_V4 : + case Hexagon::STrib_indexed_shl_cPt_V4 : + case Hexagon::STrib_indexed_shl_cdnPt_V4 : + case Hexagon::POST_STbri_cPt : + case Hexagon::POST_STbri_cdnPt_V4 : + case Hexagon::STrih_cPt : + case Hexagon::STrih_cdnPt_V4 : + case Hexagon::STrih_indexed_cPt : + case Hexagon::STrih_indexed_cdnPt_V4 : + case Hexagon::STrih_indexed_shl_cPt_V4 : + case Hexagon::STrih_indexed_shl_cdnPt_V4 : + case Hexagon::POST_SThri_cPt : + case Hexagon::POST_SThri_cdnPt_V4 : + case Hexagon::STriw_cPt : + case Hexagon::STriw_cdnPt_V4 : + case Hexagon::STriw_indexed_cPt : + case Hexagon::STriw_indexed_cdnPt_V4 : + case Hexagon::STriw_indexed_shl_cPt_V4 : + case Hexagon::STriw_indexed_shl_cdnPt_V4 : + case Hexagon::POST_STwri_cPt : + case Hexagon::POST_STwri_cdnPt_V4 : + case Hexagon::STrib_imm_cPt_V4 : + case Hexagon::STrib_imm_cdnPt_V4 : + case Hexagon::STrid_cPt : + case Hexagon::STrid_cdnPt_V4 : + case Hexagon::STrid_indexed_cPt : + case Hexagon::STrid_indexed_cdnPt_V4 : + case Hexagon::STrid_indexed_shl_cPt_V4 : + case Hexagon::STrid_indexed_shl_cdnPt_V4 : + case Hexagon::POST_STdri_cPt : + case Hexagon::POST_STdri_cdnPt_V4 : + case Hexagon::STrih_imm_cPt_V4 : + case Hexagon::STrih_imm_cdnPt_V4 : + case Hexagon::STriw_imm_cPt_V4 : + case Hexagon::STriw_imm_cdnPt_V4 : + case Hexagon::JMP_cdnPt : + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cdnPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cdnPt : + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cdnPt_V4 : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cdnPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cdnPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cdnPt_V4 : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cdnPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cdnPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cdnPt_V4 : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cdnPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cdnPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cdnPt_V4 : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cdnPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cdnPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cdnPt_V4 : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cdnPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cdnPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cdnPt_V4 : + case Hexagon::LDrid_indexed_cPt_V4 : + case Hexagon::LDrid_indexed_cdnPt_V4 : + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnPt_V4 : + case Hexagon::LDrib_indexed_cPt_V4 : + case Hexagon::LDrib_indexed_cdnPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnPt_V4 : + case Hexagon::LDriub_indexed_cPt_V4 : + case Hexagon::LDriub_indexed_cdnPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnPt_V4 : + case Hexagon::LDrih_indexed_cPt_V4 : + case Hexagon::LDrih_indexed_cdnPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnPt_V4 : + case Hexagon::LDriuh_indexed_cPt_V4 : + case Hexagon::LDriuh_indexed_cdnPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : + case Hexagon::LDriw_indexed_cPt_V4 : + case Hexagon::LDriw_indexed_cdnPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnPt_V4 : + case Hexagon::ADD_ri_cPt : + case Hexagon::ADD_ri_cdnPt : + case Hexagon::ADD_rr_cPt : + case Hexagon::ADD_rr_cdnPt : + case Hexagon::XOR_rr_cPt : + case Hexagon::XOR_rr_cdnPt : + case Hexagon::AND_rr_cPt : + case Hexagon::AND_rr_cdnPt : + case Hexagon::OR_rr_cPt : + case Hexagon::OR_rr_cdnPt : + case Hexagon::SUB_rr_cPt : + case Hexagon::SUB_rr_cdnPt : + case Hexagon::COMBINE_rr_cPt : + case Hexagon::COMBINE_rr_cdnPt : + case Hexagon::ASLH_cPt_V4 : + case Hexagon::ASLH_cdnPt_V4 : + case Hexagon::ASRH_cPt_V4 : + case Hexagon::ASRH_cdnPt_V4 : + case Hexagon::SXTB_cPt_V4 : + case Hexagon::SXTB_cdnPt_V4 : + case Hexagon::SXTH_cPt_V4 : + case Hexagon::SXTH_cdnPt_V4 : + case Hexagon::ZXTB_cPt_V4 : + case Hexagon::ZXTB_cdnPt_V4 : + case Hexagon::ZXTH_cPt_V4 : + case Hexagon::ZXTH_cdnPt_V4 : + case Hexagon::LDrid_GP_cPt_V4 : + case Hexagon::LDrib_GP_cPt_V4 : + case Hexagon::LDriub_GP_cPt_V4 : + case Hexagon::LDrih_GP_cPt_V4 : + case Hexagon::LDriuh_GP_cPt_V4 : + case Hexagon::LDriw_GP_cPt_V4 : + case Hexagon::LDd_GP_cPt_V4 : + case Hexagon::LDb_GP_cPt_V4 : + case Hexagon::LDub_GP_cPt_V4 : + case Hexagon::LDh_GP_cPt_V4 : + case Hexagon::LDuh_GP_cPt_V4 : + case Hexagon::LDw_GP_cPt_V4 : + case Hexagon::STrid_GP_cPt_V4 : + case Hexagon::STrib_GP_cPt_V4 : + case Hexagon::STrih_GP_cPt_V4 : + case Hexagon::STriw_GP_cPt_V4 : + case Hexagon::STd_GP_cPt_V4 : + case Hexagon::STb_GP_cPt_V4 : + case Hexagon::STh_GP_cPt_V4 : + case Hexagon::STw_GP_cPt_V4 : + case Hexagon::LDrid_GP_cdnPt_V4 : + case Hexagon::LDrib_GP_cdnPt_V4 : + case Hexagon::LDriub_GP_cdnPt_V4 : + case Hexagon::LDrih_GP_cdnPt_V4 : + case Hexagon::LDriuh_GP_cdnPt_V4 : + case Hexagon::LDriw_GP_cdnPt_V4 : + case Hexagon::LDd_GP_cdnPt_V4 : + case Hexagon::LDb_GP_cdnPt_V4 : + case Hexagon::LDub_GP_cdnPt_V4 : + case Hexagon::LDh_GP_cdnPt_V4 : + case Hexagon::LDuh_GP_cdnPt_V4 : + case Hexagon::LDw_GP_cdnPt_V4 : + case Hexagon::STrid_GP_cdnPt_V4 : + case Hexagon::STrib_GP_cdnPt_V4 : + case Hexagon::STrih_GP_cdnPt_V4 : + case Hexagon::STriw_GP_cdnPt_V4 : + case Hexagon::STd_GP_cdnPt_V4 : + case Hexagon::STb_GP_cdnPt_V4 : + case Hexagon::STh_GP_cdnPt_V4 : + case Hexagon::STw_GP_cdnPt_V4 : + return true; + + case Hexagon::TFR_cNotPt: + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFRI_cNotPt: + case Hexagon::TFRI_cdnNotPt: + case Hexagon::STrib_cNotPt : + case Hexagon::STrib_cdnNotPt_V4 : + case Hexagon::STrib_indexed_cNotPt : + case Hexagon::STrib_indexed_cdnNotPt_V4 : + case Hexagon::STrib_indexed_shl_cNotPt_V4 : + case Hexagon::STrib_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_STbri_cNotPt : + case Hexagon::POST_STbri_cdnNotPt_V4 : + case Hexagon::STrih_cNotPt : + case Hexagon::STrih_cdnNotPt_V4 : + case Hexagon::STrih_indexed_cNotPt : + case Hexagon::STrih_indexed_cdnNotPt_V4 : + case Hexagon::STrih_indexed_shl_cNotPt_V4 : + case Hexagon::STrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_SThri_cNotPt : + case Hexagon::POST_SThri_cdnNotPt_V4 : + case Hexagon::STriw_cNotPt : + case Hexagon::STriw_cdnNotPt_V4 : + case Hexagon::STriw_indexed_cNotPt : + case Hexagon::STriw_indexed_cdnNotPt_V4 : + case Hexagon::STriw_indexed_shl_cNotPt_V4 : + case Hexagon::STriw_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_STwri_cNotPt : + case Hexagon::POST_STwri_cdnNotPt_V4 : + case Hexagon::STrib_imm_cNotPt_V4 : + case Hexagon::STrib_imm_cdnNotPt_V4 : + case Hexagon::STrid_cNotPt : + case Hexagon::STrid_cdnNotPt_V4 : + case Hexagon::STrid_indexed_cdnNotPt_V4 : + case Hexagon::STrid_indexed_cNotPt : + case Hexagon::STrid_indexed_shl_cNotPt_V4 : + case Hexagon::STrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_STdri_cNotPt : + case Hexagon::POST_STdri_cdnNotPt_V4 : + case Hexagon::STrih_imm_cNotPt_V4 : + case Hexagon::STrih_imm_cdnNotPt_V4 : + case Hexagon::STriw_imm_cNotPt_V4 : + case Hexagon::STriw_imm_cdnNotPt_V4 : + case Hexagon::JMP_cdnNotPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_cdnNotPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::LDrid_indexed_cdnNotPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::POST_LDrid_cdnNotPt_V4 : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_cdnNotPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::LDriw_indexed_cdnNotPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::POST_LDriw_cdnNotPt_V4 : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_cdnNotPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::LDrih_indexed_cdnNotPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::POST_LDrih_cdnNotPt_V4 : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_cdnNotPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::LDrib_indexed_cdnNotPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::POST_LDrib_cdnNotPt_V4 : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_cdnNotPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::LDriuh_indexed_cdnNotPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::POST_LDriuh_cdnNotPt_V4 : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_cdnNotPt : + case Hexagon::LDriub_indexed_cNotPt : + case Hexagon::LDriub_indexed_cdnNotPt : + case Hexagon::POST_LDriub_cNotPt : + case Hexagon::POST_LDriub_cdnNotPt_V4 : + case Hexagon::LDrid_indexed_cNotPt_V4 : + case Hexagon::LDrid_indexed_cdnNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_cNotPt_V4 : + case Hexagon::LDrib_indexed_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_cNotPt_V4 : + case Hexagon::LDriub_indexed_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_cNotPt_V4 : + case Hexagon::LDrih_indexed_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_cNotPt_V4 : + case Hexagon::LDriuh_indexed_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_cNotPt_V4 : + case Hexagon::LDriw_indexed_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : + case Hexagon::ADD_ri_cNotPt : + case Hexagon::ADD_ri_cdnNotPt : + case Hexagon::ADD_rr_cNotPt : + case Hexagon::ADD_rr_cdnNotPt : + case Hexagon::XOR_rr_cNotPt : + case Hexagon::XOR_rr_cdnNotPt : + case Hexagon::AND_rr_cNotPt : + case Hexagon::AND_rr_cdnNotPt : + case Hexagon::OR_rr_cNotPt : + case Hexagon::OR_rr_cdnNotPt : + case Hexagon::SUB_rr_cNotPt : + case Hexagon::SUB_rr_cdnNotPt : + case Hexagon::COMBINE_rr_cNotPt : + case Hexagon::COMBINE_rr_cdnNotPt : + case Hexagon::ASLH_cNotPt_V4 : + case Hexagon::ASLH_cdnNotPt_V4 : + case Hexagon::ASRH_cNotPt_V4 : + case Hexagon::ASRH_cdnNotPt_V4 : + case Hexagon::SXTB_cNotPt_V4 : + case Hexagon::SXTB_cdnNotPt_V4 : + case Hexagon::SXTH_cNotPt_V4 : + case Hexagon::SXTH_cdnNotPt_V4 : + case Hexagon::ZXTB_cNotPt_V4 : + case Hexagon::ZXTB_cdnNotPt_V4 : + case Hexagon::ZXTH_cNotPt_V4 : + case Hexagon::ZXTH_cdnNotPt_V4 : + + case Hexagon::LDrid_GP_cNotPt_V4 : + case Hexagon::LDrib_GP_cNotPt_V4 : + case Hexagon::LDriub_GP_cNotPt_V4 : + case Hexagon::LDrih_GP_cNotPt_V4 : + case Hexagon::LDriuh_GP_cNotPt_V4 : + case Hexagon::LDriw_GP_cNotPt_V4 : + case Hexagon::LDd_GP_cNotPt_V4 : + case Hexagon::LDb_GP_cNotPt_V4 : + case Hexagon::LDub_GP_cNotPt_V4 : + case Hexagon::LDh_GP_cNotPt_V4 : + case Hexagon::LDuh_GP_cNotPt_V4 : + case Hexagon::LDw_GP_cNotPt_V4 : + case Hexagon::STrid_GP_cNotPt_V4 : + case Hexagon::STrib_GP_cNotPt_V4 : + case Hexagon::STrih_GP_cNotPt_V4 : + case Hexagon::STriw_GP_cNotPt_V4 : + case Hexagon::STd_GP_cNotPt_V4 : + case Hexagon::STb_GP_cNotPt_V4 : + case Hexagon::STh_GP_cNotPt_V4 : + case Hexagon::STw_GP_cNotPt_V4 : + case Hexagon::LDrid_GP_cdnNotPt_V4 : + case Hexagon::LDrib_GP_cdnNotPt_V4 : + case Hexagon::LDriub_GP_cdnNotPt_V4 : + case Hexagon::LDrih_GP_cdnNotPt_V4 : + case Hexagon::LDriuh_GP_cdnNotPt_V4 : + case Hexagon::LDriw_GP_cdnNotPt_V4 : + case Hexagon::LDd_GP_cdnNotPt_V4 : + case Hexagon::LDb_GP_cdnNotPt_V4 : + case Hexagon::LDub_GP_cdnNotPt_V4 : + case Hexagon::LDh_GP_cdnNotPt_V4 : + case Hexagon::LDuh_GP_cdnNotPt_V4 : + case Hexagon::LDw_GP_cdnNotPt_V4 : + case Hexagon::STrid_GP_cdnNotPt_V4 : + case Hexagon::STrib_GP_cdnNotPt_V4 : + case Hexagon::STrih_GP_cdnNotPt_V4 : + case Hexagon::STriw_GP_cdnNotPt_V4 : + case Hexagon::STd_GP_cdnNotPt_V4 : + case Hexagon::STb_GP_cdnNotPt_V4 : + case Hexagon::STh_GP_cdnNotPt_V4 : + case Hexagon::STw_GP_cdnNotPt_V4 : + return false; + } + // return *some value* to avoid compiler warning + return false; +} + +bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { + if (isNewValueInst(MI)) + return true; + + switch (MI->getOpcode()) { + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFRI_cdnNotPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::LDrid_cdnPt : + case Hexagon::LDrid_cdnNotPt : + case Hexagon::LDrid_indexed_cdnPt : + case Hexagon::LDrid_indexed_cdnNotPt : + case Hexagon::POST_LDrid_cdnPt_V4 : + case Hexagon::POST_LDrid_cdnNotPt_V4 : + case Hexagon::LDriw_cdnPt : + case Hexagon::LDriw_cdnNotPt : + case Hexagon::LDriw_indexed_cdnPt : + case Hexagon::LDriw_indexed_cdnNotPt : + case Hexagon::POST_LDriw_cdnPt_V4 : + case Hexagon::POST_LDriw_cdnNotPt_V4 : + case Hexagon::LDrih_cdnPt : + case Hexagon::LDrih_cdnNotPt : + case Hexagon::LDrih_indexed_cdnPt : + case Hexagon::LDrih_indexed_cdnNotPt : + case Hexagon::POST_LDrih_cdnPt_V4 : + case Hexagon::POST_LDrih_cdnNotPt_V4 : + case Hexagon::LDrib_cdnPt : + case Hexagon::LDrib_cdnNotPt : + case Hexagon::LDrib_indexed_cdnPt : + case Hexagon::LDrib_indexed_cdnNotPt : + case Hexagon::POST_LDrib_cdnPt_V4 : + case Hexagon::POST_LDrib_cdnNotPt_V4 : + case Hexagon::LDriuh_cdnPt : + case Hexagon::LDriuh_cdnNotPt : + case Hexagon::LDriuh_indexed_cdnPt : + case Hexagon::LDriuh_indexed_cdnNotPt : + case Hexagon::POST_LDriuh_cdnPt_V4 : + case Hexagon::POST_LDriuh_cdnNotPt_V4 : + case Hexagon::LDriub_cdnPt : + case Hexagon::LDriub_cdnNotPt : + case Hexagon::LDriub_indexed_cdnPt : + case Hexagon::LDriub_indexed_cdnNotPt : + case Hexagon::POST_LDriub_cdnPt_V4 : + case Hexagon::POST_LDriub_cdnNotPt_V4 : + + case Hexagon::LDrid_indexed_cdnPt_V4 : + case Hexagon::LDrid_indexed_cdnNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_cdnPt_V4 : + case Hexagon::LDrib_indexed_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_cdnPt_V4 : + case Hexagon::LDriub_indexed_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_cdnPt_V4 : + case Hexagon::LDrih_indexed_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_cdnPt_V4 : + case Hexagon::LDriuh_indexed_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_cdnPt_V4 : + case Hexagon::LDriw_indexed_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : + +// Coditional add + case Hexagon::ADD_ri_cdnPt: + case Hexagon::ADD_ri_cdnNotPt: + case Hexagon::ADD_rr_cdnPt: + case Hexagon::ADD_rr_cdnNotPt: + + // Conditional logical operations + case Hexagon::XOR_rr_cdnPt : + case Hexagon::XOR_rr_cdnNotPt : + case Hexagon::AND_rr_cdnPt : + case Hexagon::AND_rr_cdnNotPt : + case Hexagon::OR_rr_cdnPt : + case Hexagon::OR_rr_cdnNotPt : + + // Conditonal subtract + case Hexagon::SUB_rr_cdnPt : + case Hexagon::SUB_rr_cdnNotPt : + + // Conditional combine + case Hexagon::COMBINE_rr_cdnPt : + case Hexagon::COMBINE_rr_cdnNotPt : + + // Conditional shift operations + case Hexagon::ASLH_cdnPt_V4: + case Hexagon::ASLH_cdnNotPt_V4: + case Hexagon::ASRH_cdnPt_V4: + case Hexagon::ASRH_cdnNotPt_V4: + case Hexagon::SXTB_cdnPt_V4: + case Hexagon::SXTB_cdnNotPt_V4: + case Hexagon::SXTH_cdnPt_V4: + case Hexagon::SXTH_cdnNotPt_V4: + case Hexagon::ZXTB_cdnPt_V4: + case Hexagon::ZXTB_cdnNotPt_V4: + case Hexagon::ZXTH_cdnPt_V4: + case Hexagon::ZXTH_cdnNotPt_V4: + + // Conditional stores + case Hexagon::STrib_imm_cdnPt_V4 : + case Hexagon::STrib_imm_cdnNotPt_V4 : + case Hexagon::STrib_cdnPt_V4 : + case Hexagon::STrib_cdnNotPt_V4 : + case Hexagon::STrib_indexed_cdnPt_V4 : + case Hexagon::STrib_indexed_cdnNotPt_V4 : + case Hexagon::POST_STbri_cdnPt_V4 : + case Hexagon::POST_STbri_cdnNotPt_V4 : + case Hexagon::STrib_indexed_shl_cdnPt_V4 : + case Hexagon::STrib_indexed_shl_cdnNotPt_V4 : + + // Store doubleword conditionally + case Hexagon::STrid_indexed_cdnPt_V4 : + case Hexagon::STrid_indexed_cdnNotPt_V4 : + case Hexagon::STrid_indexed_shl_cdnPt_V4 : + case Hexagon::STrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_STdri_cdnPt_V4 : + case Hexagon::POST_STdri_cdnNotPt_V4 : + + // Store halfword conditionally + case Hexagon::STrih_cdnPt_V4 : + case Hexagon::STrih_cdnNotPt_V4 : + case Hexagon::STrih_indexed_cdnPt_V4 : + case Hexagon::STrih_indexed_cdnNotPt_V4 : + case Hexagon::STrih_imm_cdnPt_V4 : + case Hexagon::STrih_imm_cdnNotPt_V4 : + case Hexagon::STrih_indexed_shl_cdnPt_V4 : + case Hexagon::STrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_SThri_cdnPt_V4 : + case Hexagon::POST_SThri_cdnNotPt_V4 : + + // Store word conditionally + case Hexagon::STriw_cdnPt_V4 : + case Hexagon::STriw_cdnNotPt_V4 : + case Hexagon::STriw_indexed_cdnPt_V4 : + case Hexagon::STriw_indexed_cdnNotPt_V4 : + case Hexagon::STriw_imm_cdnPt_V4 : + case Hexagon::STriw_imm_cdnNotPt_V4 : + case Hexagon::STriw_indexed_shl_cdnPt_V4 : + case Hexagon::STriw_indexed_shl_cdnNotPt_V4 : + case Hexagon::POST_STwri_cdnPt_V4 : + case Hexagon::POST_STwri_cdnNotPt_V4 : + + case Hexagon::LDd_GP_cdnPt_V4: + case Hexagon::LDd_GP_cdnNotPt_V4: + case Hexagon::LDb_GP_cdnPt_V4: + case Hexagon::LDb_GP_cdnNotPt_V4: + case Hexagon::LDub_GP_cdnPt_V4: + case Hexagon::LDub_GP_cdnNotPt_V4: + case Hexagon::LDh_GP_cdnPt_V4: + case Hexagon::LDh_GP_cdnNotPt_V4: + case Hexagon::LDuh_GP_cdnPt_V4: + case Hexagon::LDuh_GP_cdnNotPt_V4: + case Hexagon::LDw_GP_cdnPt_V4: + case Hexagon::LDw_GP_cdnNotPt_V4: + case Hexagon::LDrid_GP_cdnPt_V4: + case Hexagon::LDrid_GP_cdnNotPt_V4: + case Hexagon::LDrib_GP_cdnPt_V4: + case Hexagon::LDrib_GP_cdnNotPt_V4: + case Hexagon::LDriub_GP_cdnPt_V4: + case Hexagon::LDriub_GP_cdnNotPt_V4: + case Hexagon::LDrih_GP_cdnPt_V4: + case Hexagon::LDrih_GP_cdnNotPt_V4: + case Hexagon::LDriuh_GP_cdnPt_V4: + case Hexagon::LDriuh_GP_cdnNotPt_V4: + case Hexagon::LDriw_GP_cdnPt_V4: + case Hexagon::LDriw_GP_cdnNotPt_V4: + + case Hexagon::STrid_GP_cdnPt_V4: + case Hexagon::STrid_GP_cdnNotPt_V4: + case Hexagon::STrib_GP_cdnPt_V4: + case Hexagon::STrib_GP_cdnNotPt_V4: + case Hexagon::STrih_GP_cdnPt_V4: + case Hexagon::STrih_GP_cdnNotPt_V4: + case Hexagon::STriw_GP_cdnPt_V4: + case Hexagon::STriw_GP_cdnNotPt_V4: + case Hexagon::STd_GP_cdnPt_V4: + case Hexagon::STd_GP_cdnNotPt_V4: + case Hexagon::STb_GP_cdnPt_V4: + case Hexagon::STb_GP_cdnNotPt_V4: + case Hexagon::STh_GP_cdnPt_V4: + case Hexagon::STh_GP_cdnNotPt_V4: + case Hexagon::STw_GP_cdnPt_V4: + case Hexagon::STw_GP_cdnNotPt_V4: + return true; + } + return false; +} + +static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, + const HexagonInstrInfo *QII) { + assert(QII->isPostIncrement(MI) && "Not a post increment operation."); +#ifndef NDEBUG + // Post Increment means duplicates. Use dense map to find duplicates in the + // list. Caution: Densemap initializes with the minimum of 64 buckets, + // whereas there are at most 5 operands in the post increment. + DenseMap DefRegsSet; + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).isDef()) { + DefRegsSet[MI->getOperand(opNum).getReg()] = 1; + } + + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).isUse()) { + if (DefRegsSet[MI->getOperand(opNum).getReg()]) { + return MI->getOperand(opNum); + } + } +#else + if (MI->getDesc().mayLoad()) { + // The 2nd operand is always the post increment operand in load. + assert(MI->getOperand(1).isReg() && + "Post increment operand has be to a register."); + return (MI->getOperand(1)); + } + if (MI->getDesc().mayStore()) { + // The 1st operand is always the post increment operand in store. + assert(MI->getOperand(0).isReg() && + "Post increment operand has be to a register."); + return (MI->getOperand(0)); + } +#endif + // we should never come here. + llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); +} + +// get the value being stored +static MachineOperand& GetStoreValueOperand(MachineInstr *MI) { + // value being stored is always the last operand. + return (MI->getOperand(MI->getNumOperands()-1)); +} + +// can be new value store? +// Following restrictions are to be respected in convert a store into +// a new value store. +// 1. If an instruction uses auto-increment, its address register cannot +// be a new-value register. Arch Spec 5.4.2.1 +// 2. If an instruction uses absolute-set addressing mode, +// its address register cannot be a new-value register. +// Arch Spec 5.4.2.1.TODO: This is not enabled as +// as absolute-set address mode patters are not implemented. +// 3. If an instruction produces a 64-bit result, its registers cannot be used +// as new-value registers. Arch Spec 5.4.2.2. +// 4. If the instruction that sets a new-value register is conditional, then +// the instruction that uses the new-value register must also be conditional, +// and both must always have their predicates evaluate identically. +// Arch Spec 5.4.2.3. +// 5. There is an implied restriction of a packet can not have another store, +// if there is a new value store in the packet. Corollary, if there is +// already a store in a packet, there can not be a new value store. +// Arch Spec: 3.4.4.2 +bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, + MachineInstr *PacketMI, unsigned DepReg, + std::map MIToSUnit) +{ + // Make sure we are looking at the store + if (!IsNewifyStore(MI)) + return false; + + // Make sure there is dependency and can be new value'ed + if (GetStoreValueOperand(MI).isReg() && + GetStoreValueOperand(MI).getReg() != DepReg) + return false; + + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + const MCInstrDesc& MCID = PacketMI->getDesc(); + // first operand is always the result + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF); + + // if there is already an store in the packet, no can do new value store + // Arch Spec 3.4.4.2. + for (std::vector::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE); ++VI) { + SUnit* PacketSU = MIToSUnit[*VI]; + if (PacketSU->getInstr()->getDesc().mayStore() || + // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME, + // then we don't need this + PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || + PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME) + return false; + } + + if (PacketRC == &Hexagon::DoubleRegsRegClass) { + // new value store constraint: double regs can not feed into new value store + // arch spec section: 5.4.2.2 + return false; + } + + // Make sure it's NOT the post increment register that we are going to + // new value. + if (QII->isPostIncrement(MI) && + MI->getDesc().mayStore() && + GetPostIncrementOperand(MI, QII).getReg() == DepReg) { + return false; + } + + if (QII->isPostIncrement(PacketMI) && + PacketMI->getDesc().mayLoad() && + GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) { + // if source is post_inc, or absolute-set addressing, + // it can not feed into new value store + // r3 = memw(r2++#4) + // memw(r30 + #-1404) = r2.new -> can not be new value store + // arch spec section: 5.4.2.1 + return false; + } + + // If the source that feeds the store is predicated, new value store must + // also be also predicated. + if (QII->isPredicated(PacketMI)) { + if (!QII->isPredicated(MI)) + return false; + + // Check to make sure that they both will have their predicates + // evaluate identically + unsigned predRegNumSrc = 0; + unsigned predRegNumDst = 0; + const TargetRegisterClass* predRegClass = NULL; + + // Get predicate register used in the source instruction + for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { + if ( PacketMI->getOperand(opNum).isReg()) + predRegNumSrc = PacketMI->getOperand(opNum).getReg(); + predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc); + if (predRegClass == &Hexagon::PredRegsRegClass) { + break; + } + } + assert ((predRegClass == &Hexagon::PredRegsRegClass ) && + ("predicate register not found in a predicated PacketMI instruction")); + + // Get predicate register used in new-value store instruction + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + if ( MI->getOperand(opNum).isReg()) + predRegNumDst = MI->getOperand(opNum).getReg(); + predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst); + if (predRegClass == &Hexagon::PredRegsRegClass) { + break; + } + } + assert ((predRegClass == &Hexagon::PredRegsRegClass ) && + ("predicate register not found in a predicated MI instruction")); + + // New-value register producer and user (store) need to satisfy these + // constraints: + // 1) Both instructions should be predicated on the same register. + // 2) If producer of the new-value register is .new predicated then store + // should also be .new predicated and if producer is not .new predicated + // then store should not be .new predicated. + // 3) Both new-value register producer and user should have same predicate + // sense, i.e, either both should be negated or both should be none negated. + + if (( predRegNumDst != predRegNumSrc) || + isDotNewInst(PacketMI) != isDotNewInst(MI) || + GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) { + return false; + } + } + + // Make sure that other than the new-value register no other store instruction + // register has been modified in the same packet. Predicate registers can be + // modified by they should not be modified between the producer and the store + // instruction as it will make them both conditional on different values. + // We already know this to be true for all the instructions before and + // including PacketMI. Howerver, we need to perform the check for the + // remaining instructions in the packet. + + std::vector::iterator VI; + std::vector::iterator VE; + unsigned StartCheck = 0; + + for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); + (VI != VE); ++VI) { + SUnit* TempSU = MIToSUnit[*VI]; + MachineInstr* TempMI = TempSU->getInstr(); + + // Following condition is true for all the instructions until PacketMI is + // reached (StartCheck is set to 0 before the for loop). + // StartCheck flag is 1 for all the instructions after PacketMI. + if (TempMI != PacketMI && !StartCheck) // start processing only after + continue; // encountering PacketMI + + StartCheck = 1; + if (TempMI == PacketMI) // We don't want to check PacketMI for dependence + continue; + + for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) { + if (MI->getOperand(opNum).isReg() && + TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), + QRI)) + return false; + } + } + + // Make sure that for non POST_INC stores: + // 1. The only use of reg is DepReg and no other registers. + // This handles V4 base+index registers. + // The following store can not be dot new. + // Eg. r0 = add(r0, #3)a + // memw(r1+r0<<#2) = r0 + if (!QII->isPostIncrement(MI) && + GetStoreValueOperand(MI).isReg() && + GetStoreValueOperand(MI).getReg() == DepReg) { + for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { + if (MI->getOperand(opNum).isReg() && + MI->getOperand(opNum).getReg() == DepReg) { + return false; + } + } + // 2. If data definition is because of implicit definition of the register, + // do not newify the store. Eg. + // %R9 = ZXTH %R12, %D6, %R12 + // STrih_indexed %R8, 2, %R12; mem:ST2[%scevgep343] + for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { + if (PacketMI->getOperand(opNum).isReg() && + PacketMI->getOperand(opNum).getReg() == DepReg && + PacketMI->getOperand(opNum).isDef() && + PacketMI->getOperand(opNum).isImplicit()) { + return false; + } + } + } + + // Can be dot new store. + return true; +} + +// can this MI to promoted to either +// new value store or new value jump +bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI, + SUnit *PacketSU, unsigned DepReg, + std::map MIToSUnit, + MachineBasicBlock::iterator &MII) +{ + + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + if (!QRI->Subtarget.hasV4TOps() || + !IsNewifyStore(MI)) + return false; + + MachineInstr *PacketMI = PacketSU->getInstr(); + + // Check to see the store can be new value'ed. + if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit)) + return true; + + // Check to see the compare/jump can be new value'ed. + // This is done as a pass on its own. Don't need to check it here. + return false; +} + +// Check to see if an instruction can be dot new +// There are three kinds. +// 1. dot new on predicate - V2/V3/V4 +// 2. dot new on stores NV/ST - V4 +// 3. dot new on jump NV/J - V4 -- This is generated in a pass. +bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, + SUnit *PacketSU, unsigned DepReg, + std::map MIToSUnit, + MachineBasicBlock::iterator &MII, + const TargetRegisterClass* RC ) +{ + // already a dot new instruction + if (isDotNewInst(MI) && !IsNewifyStore(MI)) + return false; + + if (!isNewifiable(MI)) + return false; + + // predicate .new + if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI)) + return true; + else if (RC != &Hexagon::PredRegsRegClass && + !IsNewifyStore(MI)) // MI is not a new-value store + return false; + else { + // Create a dot new machine instruction to see if resources can be + // allocated. If not, bail out now. + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + int NewOpcode = GetDotNewOp(MI->getOpcode()); + const MCInstrDesc &desc = QII->get(NewOpcode); + DebugLoc dl; + MachineInstr *NewMI = + MI->getParent()->getParent()->CreateMachineInstr(desc, dl); + bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); + MI->getParent()->getParent()->DeleteMachineInstr(NewMI); + + if (!ResourcesAvailable) + return false; + + // new value store only + // new new value jump generated as a passes + if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) { + return false; + } + } + return true; +} + +// Go through the packet instructions and search for anti dependency +// between them and DepReg from MI +// Consider this case: +// Trying to add +// a) %R1 = TFRI_cdNotPt %P3, 2 +// to this packet: +// { +// b) %P0 = OR_pp %P3, %P0 +// c) %P3 = TFR_PdRs %R23 +// d) %R1 = TFRI_cdnPt %P3, 4 +// } +// The P3 from a) and d) will be complements after +// a)'s P3 is converted to .new form +// Anti Dep between c) and b) is irrelevant for this case +bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI, + unsigned DepReg, + std::map MIToSUnit) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + SUnit* PacketSUDep = MIToSUnit[MI]; + + for (std::vector::iterator VIN = CurrentPacketMIs.begin(), + VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { + + // We only care for dependencies to predicated instructions + if(!QII->isPredicated(*VIN)) continue; + + // Scheduling Unit for current insn in the packet + SUnit* PacketSU = MIToSUnit[*VIN]; + + // Look at dependencies between current members of the packet + // and predicate defining instruction MI. + // Make sure that dependency is on the exact register + // we care about. + if (PacketSU->isSucc(PacketSUDep)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) && + (PacketSU->Succs[i].getKind() == SDep::Anti) && + (PacketSU->Succs[i].getReg() == DepReg)) { + return true; + } + } + } + } + + return false; +} + + +// Given two predicated instructions, this function detects whether +// the predicates are complements +bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, + MachineInstr* MI2, std::map MIToSUnit) { + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // Currently can only reason about conditional transfers + if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) { + return false; + } + + // Scheduling unit for candidate + SUnit* SU = MIToSUnit[MI1]; + + // One corner case deals with the following scenario: + // Trying to add + // a) %R24 = TFR_cPt %P0, %R25 + // to this packet: + // + // { + // b) %R25 = TFR_cNotPt %P0, %R24 + // c) %P0 = CMPEQri %R26, 1 + // } + // + // On general check a) and b) are complements, but + // presence of c) will convert a) to .new form, and + // then it is not a complement + // We attempt to detect it by analyzing existing + // dependencies in the packet + + // Analyze relationships between all existing members of the packet. + // Look for Anti dependecy on the same predicate reg + // as used in the candidate + for (std::vector::iterator VIN = CurrentPacketMIs.begin(), + VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) { + + // Scheduling Unit for current insn in the packet + SUnit* PacketSU = MIToSUnit[*VIN]; + + // If this instruction in the packet is succeeded by the candidate... + if (PacketSU->isSucc(SU)) { + for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { + // The corner case exist when there is true data + // dependency between candidate and one of current + // packet members, this dep is on predicate reg, and + // there already exist anti dep on the same pred in + // the packet. + if (PacketSU->Succs[i].getSUnit() == SU && + Hexagon::PredRegsRegClass.contains( + PacketSU->Succs[i].getReg()) && + PacketSU->Succs[i].getKind() == SDep::Data && + // Here I know that *VIN is predicate setting instruction + // with true data dep to candidate on the register + // we care about - c) in the above example. + // Now I need to see if there is an anti dependency + // from c) to any other instruction in the + // same packet on the pred reg of interest + RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(), + MIToSUnit)) { + return false; + } + } + } + } + + // If the above case does not apply, check regular + // complement condition. + // Check that the predicate register is the same and + // that the predicate sense is different + // We also need to differentiate .old vs. .new: + // !p0 is not complimentary to p0.new + return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) && + (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) && + (isDotNewInst(MI1) == isDotNewInst(MI2))); +} + +// initPacketizerState - Initialize packetizer flags +void HexagonPacketizerList::initPacketizerState() { + + Dependence = false; + PromotedToDotNew = false; + GlueToNewValueJump = false; + GlueAllocframeStore = false; + FoundSequentialDependence = false; + + return; +} + +// ignorePseudoInstruction - Ignore bundling of pseudo instructions. +bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (MI->isDebugValue()) + return true; + + // We must print out inline assembly + if (MI->isInlineAsm()) + return false; + + // We check if MI has any functional units mapped to it. + // If it doesn't, we ignore the instruction. + const MCInstrDesc& TID = MI->getDesc(); + unsigned SchedClass = TID.getSchedClass(); + const InstrStage* IS = + ResourceTracker->getInstrItins()->beginStage(SchedClass); + unsigned FuncUnits = IS->getUnits(); + return !FuncUnits; +} + +// isSoloInstruction: - Returns true for instructions that must be +// scheduled in their own packet. +bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) { + + if (MI->isInlineAsm()) + return true; + + if (MI->isEHLabel()) + return true; + + // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: + // trap, pause, barrier, icinva, isync, and syncht are solo instructions. + // They must not be grouped with other instructions in a packet. + if (IsSchedBarrier(MI)) + return true; + + return false; +} + +// isLegalToPacketizeTogether: +// SUI is the current instruction that is out side of the current packet. +// SUJ is the current instruction inside the current packet against which that +// SUI will be packetized. +bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + MachineInstr *J = SUJ->getInstr(); + assert(I && J && "Unable to packetize null instruction!"); + + const MCInstrDesc &MCIDI = I->getDesc(); + const MCInstrDesc &MCIDJ = J->getDesc(); + + MachineBasicBlock::iterator II = I; + + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + const HexagonRegisterInfo* QRI = + (const HexagonRegisterInfo *) TM.getRegisterInfo(); + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + // Inline asm cannot go in the packet. + if (I->getOpcode() == Hexagon::INLINEASM) + llvm_unreachable("Should not meet inline asm here!"); + + if (isSoloInstruction(I)) + llvm_unreachable("Should not meet solo instr here!"); + + // A save callee-save register function call can only be in a packet + // with instructions that don't write to the callee-save registers. + if ((QII->isSaveCalleeSavedRegsCall(I) && + DoesModifyCalleeSavedReg(J, QRI)) || + (QII->isSaveCalleeSavedRegsCall(J) && + DoesModifyCalleeSavedReg(I, QRI))) { + Dependence = true; + return false; + } + + // Two control flow instructions cannot go in the same packet. + if (IsControlFlow(I) && IsControlFlow(J)) { + Dependence = true; + return false; + } + + // A LoopN instruction cannot appear in the same packet as a jump or call. + if (IsLoopN(I) && ( IsDirectJump(J) + || MCIDJ.isCall() + || QII->isDeallocRet(J))) { + Dependence = true; + return false; + } + if (IsLoopN(J) && ( IsDirectJump(I) + || MCIDI.isCall() + || QII->isDeallocRet(I))) { + Dependence = true; + return false; + } + + // dealloc_return cannot appear in the same packet as a conditional or + // unconditional jump. + if (QII->isDeallocRet(I) && ( MCIDJ.isBranch() + || MCIDJ.isCall() + || MCIDJ.isBarrier())) { + Dependence = true; + return false; + } + + + // V4 allows dual store. But does not allow second store, if the + // first store is not in SLOT0. New value store, new value jump, + // dealloc_return and memop always take SLOT0. + // Arch spec 3.4.4.2 + if (QRI->Subtarget.hasV4TOps()) { + + if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) { + Dependence = true; + return false; + } + + if ( (QII->isMemOp(J) && MCIDI.mayStore()) + || (MCIDJ.mayStore() && QII->isMemOp(I)) + || (QII->isMemOp(J) && QII->isMemOp(I))) { + Dependence = true; + return false; + } + + //if dealloc_return + if (MCIDJ.mayStore() && QII->isDeallocRet(I)){ + Dependence = true; + return false; + } + + // If an instruction feeds new value jump, glue it. + MachineBasicBlock::iterator NextMII = I; + ++NextMII; + MachineInstr *NextMI = NextMII; + + if (QII->isNewValueJump(NextMI)) { + + bool secondRegMatch = false; + bool maintainNewValueJump = false; + + if (NextMI->getOperand(1).isReg() && + I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) { + secondRegMatch = true; + maintainNewValueJump = true; + } + + if (!secondRegMatch && + I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) { + maintainNewValueJump = true; + } + + for (std::vector::iterator + VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); + (VI != VE && maintainNewValueJump); ++VI) { + SUnit* PacketSU = MIToSUnit[*VI]; + + // NVJ can not be part of the dual jump - Arch Spec: section 7.8 + if (PacketSU->getInstr()->getDesc().isCall()) { + Dependence = true; + break; + } + // Validate + // 1. Packet does not have a store in it. + // 2. If the first operand of the nvj is newified, and the second + // operand is also a reg, it (second reg) is not defined in + // the same packet. + // 3. If the second operand of the nvj is newified, (which means + // first operand is also a reg), first reg is not defined in + // the same packet. + if (PacketSU->getInstr()->getDesc().mayStore() || + PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME || + // Check #2. + (!secondRegMatch && NextMI->getOperand(1).isReg() && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(1).getReg(), QRI)) || + // Check #3. + (secondRegMatch && + PacketSU->getInstr()->modifiesRegister( + NextMI->getOperand(0).getReg(), QRI))) { + Dependence = true; + break; + } + } + if (!Dependence) + GlueToNewValueJump = true; + else + return false; + } + } + + if (SUJ->isSucc(SUI)) { + for (unsigned i = 0; + (i < SUJ->Succs.size()) && !FoundSequentialDependence; + ++i) { + + if (SUJ->Succs[i].getSUnit() != SUI) { + continue; + } + + SDep::Kind DepType = SUJ->Succs[i].getKind(); + + // For direct calls: + // Ignore register dependences for call instructions for + // packetization purposes except for those due to r31 and + // predicate registers. + // + // For indirect calls: + // Same as direct calls + check for true dependences to the register + // used in the indirect call. + // + // We completely ignore Order dependences for call instructions + // + // For returns: + // Ignore register dependences for return instructions like jumpr, + // dealloc return unless we have dependencies on the explicit uses + // of the registers used by jumpr (like r31) or dealloc return + // (like r29 or r30). + // + // TODO: Currently, jumpr is handling only return of r31. So, the + // following logic (specificaly IsCallDependent) is working fine. + // We need to enable jumpr for register other than r31 and then, + // we need to rework the last part, where it handles indirect call + // of that (IsCallDependent) function. Bug 6216 is opened for this. + // + unsigned DepReg = 0; + const TargetRegisterClass* RC = NULL; + if (DepType == SDep::Data) { + DepReg = SUJ->Succs[i].getReg(); + RC = QRI->getMinimalPhysRegClass(DepReg); + } + if ((MCIDI.isCall() || MCIDI.isReturn()) && + (!IsRegDependence(DepType) || + !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) { + /* do nothing */ + } + + // For instructions that can be promoted to dot-new, try to promote. + else if ((DepType == SDep::Data) && + CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) && + PromoteToDotNew(I, DepType, II, RC)) { + PromotedToDotNew = true; + /* do nothing */ + } + + else if ((DepType == SDep::Data) && + (QII->isNewValueJump(I))) { + /* do nothing */ + } + + // For predicated instructions, if the predicates are complements + // then there can be no dependence. + else if (QII->isPredicated(I) && + QII->isPredicated(J) && + ArePredicatesComplements(I, J, MIToSUnit)) { + /* do nothing */ + + } + else if (IsDirectJump(I) && + !MCIDJ.isBranch() && + !MCIDJ.isCall() && + (DepType == SDep::Order)) { + // Ignore Order dependences between unconditional direct branches + // and non-control-flow instructions + /* do nothing */ + } + else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) && + (DepType != SDep::Output)) { + // Ignore all dependences for jumps except for true and output + // dependences + /* do nothing */ + } + + // Ignore output dependences due to superregs. We can + // write to two different subregisters of R1:0 for instance + // in the same cycle + // + + // + // Let the + // If neither I nor J defines DepReg, then this is a + // superfluous output dependence. The dependence must be of the + // form: + // R0 = ... + // R1 = ... + // and there is an output dependence between the two instructions + // with + // DepReg = D0 + // We want to ignore these dependences. + // Ideally, the dependence constructor should annotate such + // dependences. We can then avoid this relatively expensive check. + // + else if (DepType == SDep::Output) { + // DepReg is the register that's responsible for the dependence. + unsigned DepReg = SUJ->Succs[i].getReg(); + + // Check if I and J really defines DepReg. + if (I->definesRegister(DepReg) || + J->definesRegister(DepReg)) { + FoundSequentialDependence = true; + break; + } + } + + // We ignore Order dependences for + // 1. Two loads unless they are volatile. + // 2. Two stores in V4 unless they are volatile. + else if ((DepType == SDep::Order) && + !I->hasVolatileMemoryRef() && + !J->hasVolatileMemoryRef()) { + if (QRI->Subtarget.hasV4TOps() && + // hexagonv4 allows dual store. + MCIDI.mayStore() && MCIDJ.mayStore()) { + /* do nothing */ + } + // store followed by store-- not OK on V2 + // store followed by load -- not OK on all (OK if addresses + // are not aliased) + // load followed by store -- OK on all + // load followed by load -- OK on all + else if ( !MCIDJ.mayStore()) { + /* do nothing */ + } + else { + FoundSequentialDependence = true; + break; + } + } + + // For V4, special case ALLOCFRAME. Even though there is dependency + // between ALLOCAFRAME and subsequent store, allow it to be + // packetized in a same packet. This implies that the store is using + // caller's SP. Hense, offset needs to be updated accordingly. + else if (DepType == SDep::Data + && QRI->Subtarget.hasV4TOps() + && J->getOpcode() == Hexagon::ALLOCFRAME + && (I->getOpcode() == Hexagon::STrid + || I->getOpcode() == Hexagon::STriw + || I->getOpcode() == Hexagon::STrib) + && I->getOperand(0).getReg() == QRI->getStackRegister() + && QII->isValidOffset(I->getOpcode(), + I->getOperand(1).getImm() - + (FrameSize + HEXAGON_LRFP_SIZE))) + { + GlueAllocframeStore = true; + // Since this store is to be glued with allocframe in the same + // packet, it will use SP of the previous stack frame, i.e + // caller's SP. Therefore, we need to recalculate offset according + // to this change. + I->getOperand(1).setImm(I->getOperand(1).getImm() - + (FrameSize + HEXAGON_LRFP_SIZE)); + } + + // + // Skip over anti-dependences. Two instructions that are + // anti-dependent can share a packet + // + else if (DepType != SDep::Anti) { + FoundSequentialDependence = true; + break; + } + } + + if (FoundSequentialDependence) { + Dependence = true; + return false; + } + } + + return true; +} + +// isLegalToPruneDependencies +bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { + MachineInstr *I = SUI->getInstr(); + assert(I && SUJ->getInstr() && "Unable to packetize null instruction!"); + + const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); + + if (Dependence) { + + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old. + if (PromotedToDotNew) { + DemoteToDotOld(I); + } + + // Check if the instruction (must be a store) was glued with an Allocframe + // instruction. If so, restore its offset to its original value, i.e. use + // curent SP instead of caller's SP. + if (GlueAllocframeStore) { + I->getOperand(1).setImm(I->getOperand(1).getImm() + + FrameSize + HEXAGON_LRFP_SIZE); + } + + return false; + } + return true; +} + +MachineBasicBlock::iterator +HexagonPacketizerList::addToPacket(MachineInstr *MI) { + + MachineBasicBlock::iterator MII = MI; + MachineBasicBlock *MBB = MI->getParent(); + + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + + if (GlueToNewValueJump) { + + ++MII; + MachineInstr *nvjMI = MII; + assert(ResourceTracker->canReserveResources(MI)); + ResourceTracker->reserveResources(MI); + if (QII->isExtended(MI) && + !tryAllocateResourcesForConstExt(MI)) { + endPacket(MBB, MI); + ResourceTracker->reserveResources(MI); + assert(canReserveResourcesForConstExt(MI) && + "Ensure that there is a slot"); + reserveResourcesForConstExt(MI); + // Reserve resources for new value jump constant extender. + assert(canReserveResourcesForConstExt(MI) && + "Ensure that there is a slot"); + reserveResourcesForConstExt(nvjMI); + assert(ResourceTracker->canReserveResources(nvjMI) && + "Ensure that there is a slot"); + + } else if ( // Extended instruction takes two slots in the packet. + // Try reserve and allocate 4-byte in the current packet first. + (QII->isExtended(nvjMI) + && (!tryAllocateResourcesForConstExt(nvjMI) + || !ResourceTracker->canReserveResources(nvjMI))) + || // For non-extended instruction, no need to allocate extra 4 bytes. + (!QII->isExtended(nvjMI) && + !ResourceTracker->canReserveResources(nvjMI))) + { + endPacket(MBB, MI); + // A new and empty packet starts. + // We are sure that the resources requirements can be satisfied. + // Therefore, do not need to call "canReserveResources" anymore. + ResourceTracker->reserveResources(MI); + if (QII->isExtended(nvjMI)) + reserveResourcesForConstExt(nvjMI); + } + // Here, we are sure that "reserveResources" would succeed. + ResourceTracker->reserveResources(nvjMI); + CurrentPacketMIs.push_back(MI); + CurrentPacketMIs.push_back(nvjMI); + } else { + if ( QII->isExtended(MI) + && ( !tryAllocateResourcesForConstExt(MI) + || !ResourceTracker->canReserveResources(MI))) + { + endPacket(MBB, MI); + // Check if the instruction was promoted to a dot-new. If so, demote it + // back into a dot-old + if (PromotedToDotNew) { + DemoteToDotOld(MI); + } + reserveResourcesForConstExt(MI); + } + // In case that "MI" is not an extended insn, + // the resource availability has already been checked. + ResourceTracker->reserveResources(MI); + CurrentPacketMIs.push_back(MI); + } + return MII; +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonPacketizer() { + return new HexagonPacketizer(); +} + diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 47384cd..035afe8 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -15,6 +15,7 @@ #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonInstPrinter.h" +#include "HexagonMCInst.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -37,20 +38,50 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { + printInst((const HexagonMCInst*)(MI), O, Annot); +} + +void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O, + StringRef Annot) { const char packetPadding[] = " "; const char startPacket = '{', endPacket = '}'; // TODO: add outer HW loop when it's supported too. if (MI->getOpcode() == Hexagon::ENDLOOP0) { - MCInst Nop; + // Ending a harware loop is different from ending an regular packet. + assert(MI->isEndPacket() && "Loop end must also end the packet"); + + if (MI->isStartPacket()) { + // There must be a packet to end a loop. + // FIXME: when shuffling is always run, this shouldn't be needed. + HexagonMCInst Nop; + StringRef NoAnnot; + + Nop.setOpcode (Hexagon::NOP); + Nop.setStartPacket (MI->isStartPacket()); + printInst (&Nop, O, NoAnnot); + } + + // Close the packet. + if (MI->isEndPacket()) + O << packetPadding << endPacket; - O << packetPadding << startPacket << '\n'; - Nop.setOpcode(Hexagon::NOP); - printInstruction(&Nop, O); - O << packetPadding << endPacket; + printInstruction(MI, O); + } + else { + // Prefix the insn opening the packet. + if (MI->isStartPacket()) + O << packetPadding << startPacket << '\n'; + + printInstruction(MI, O); + + // Suffix the insn closing the packet. + if (MI->isEndPacket()) + // Suffix the packet in a new line always, since the GNU assembler has + // issues with a closing brace on the same line as CONST{32,64}. + O << '\n' << packetPadding << endPacket; } - printInstruction(MI, O); printAnnotation(O, Annot); } @@ -65,22 +96,22 @@ void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else if(MO.isImm()) { printImmOperand(MI, OpNo, O); } else { - assert(false && "Unknown operand"); + llvm_unreachable("Unknown operand"); } } -void HexagonInstPrinter::printImmOperand - (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { O << MI->getOperand(OpNo).getImm(); } void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) const { + raw_ostream &O) const { O << MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printUnsignedImmOperand - (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, + unsigned OpNo, raw_ostream &O) const { O << MI->getOperand(OpNo).getImm(); } @@ -89,13 +120,13 @@ void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, O << -MI->getOperand(OpNo).getImm(); } -void HexagonInstPrinter::printNOneImmOperand - (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { O << -1; } -void HexagonInstPrinter::printMEMriOperand - (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { const MCOperand& MO0 = MI->getOperand(OpNo); const MCOperand& MO1 = MI->getOperand(OpNo + 1); @@ -103,8 +134,8 @@ void HexagonInstPrinter::printMEMriOperand O << " + #" << MO1.getImm(); } -void HexagonInstPrinter::printFrameIndexOperand - (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { +void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { const MCOperand& MO0 = MI->getOperand(OpNo); const MCOperand& MO1 = MI->getOperand(OpNo + 1); diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h index dad4334..902a323 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -14,6 +14,7 @@ #ifndef HEXAGONINSTPRINTER_H #define HEXAGONINSTPRINTER_H +#include "HexagonMCInst.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { @@ -25,6 +26,7 @@ namespace llvm { : MCInstPrinter(MAI, MII, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); StringRef getRegName(unsigned RegNo) const; @@ -33,16 +35,16 @@ namespace llvm { void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; + void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) - const; + void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) @@ -55,7 +57,8 @@ namespace llvm { const; void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; - void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printConstantPool(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const; void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const { printSymbol(MI, OpNo, O, true); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index ed55c3c..7221e90 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -23,14 +23,41 @@ namespace llvm { /// instruction info tracks. /// namespace HexagonII { - // *** The code below must match HexagonInstrFormat*.td *** // + // Insn types. + // *** Must match HexagonInstrFormat*.td *** + enum Type { + TypePSEUDO = 0, + TypeALU32 = 1, + TypeCR = 2, + TypeJR = 3, + TypeJ = 4, + TypeLD = 5, + TypeST = 6, + TypeSYSTEM = 7, + TypeXTYPE = 8, + TypeMEMOP = 9, + TypeNV = 10, + TypePREFIX = 30, // Such as extenders. + TypeMARKER = 31 // Such as end of a HW loop. + }; + + + // MCInstrDesc TSFlags + // *** Must match HexagonInstrFormat*.td *** enum { + // This 5-bit field describes the insn type. + TypePos = 0, + TypeMask = 0x1f, + + // Solo instructions. + SoloPos = 5, + SoloMask = 0x1, // Predicated instructions. - PredicatedPos = 1, + PredicatedPos = 6, PredicatedMask = 0x1 }; diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 8ec5673..8995080 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index bf1deef..6c3e8b6 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -30,6 +30,8 @@ add_llvm_target(MBlazeCodeGen MBlazeELFWriterInfo.cpp ) +add_dependencies(LLVMMBlazeCodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index b4edff0..c288855 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -50,7 +50,7 @@ def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", // MBlaze processors supported. //===----------------------------------------------------------------------===// -def : Processor<"mblaze", MBlazeGenericItineraries, []>; +def : Processor<"mblaze", NoItineraries, []>; def : Processor<"mblaze3", MBlazePipe3Itineraries, []>; def : Processor<"mblaze5", MBlazePipe5Itineraries, []>; diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 55fffe3..e9f340f 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -135,7 +135,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask() { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); unsigned RegNum = getMBlazeRegisterNumbering(Reg); - if (MBlaze::GPRRegisterClass->contains(Reg)) + if (MBlaze::GPRRegClass.contains(Reg)) CPUBitmask |= (1 << RegNum); } @@ -187,7 +187,7 @@ void MBlazeAsmPrinter::EmitFunctionBodyEnd() { //===----------------------------------------------------------------------===// void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this); + MBlazeMCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); @@ -200,7 +200,13 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } printOperand(MI, OpNo, O); return false; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index edfc335..310c25e 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -62,9 +62,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes - addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass); + addRegisterClass(MVT::i32, &MBlaze::GPRRegClass); if (Subtarget->hasFPU()) { - addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass); + addRegisterClass(MVT::f32, &MBlaze::GPRRegClass); setOperationAction(ISD::ConstantFP, MVT::f32, Legal); } @@ -291,12 +291,12 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI, loop->addSuccessor(finish); loop->addSuccessor(loop); - unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned IAMT = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT) .addReg(MI->getOperand(2).getReg()) .addImm(31); - unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned IVAL = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL) .addReg(MI->getOperand(1).getReg()) .addImm(0); @@ -305,14 +305,14 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI, .addReg(IAMT) .addMBB(finish); - unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass); - unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned DST = R.createVirtualRegister(&MBlaze::GPRRegClass); + unsigned NDST = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) .addReg(IVAL).addMBB(MBB) .addReg(NDST).addMBB(loop); - unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); - unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned SAMT = R.createVirtualRegister(&MBlaze::GPRRegClass); + unsigned NAMT = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT) .addReg(IAMT).addMBB(MBB) .addReg(NAMT).addMBB(loop); @@ -500,7 +500,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, case MBlaze::LAN32: opcode = MBlaze::AND; break; } - finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass); + finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass); start->addSuccessor(exit); start->addSuccessor(start); @@ -510,7 +510,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, if (MI->getOpcode() == MBlaze::LAN32) { unsigned tmp = finalReg; - finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass); + finalReg = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg) .addReg(tmp) .addImm(-1); @@ -528,7 +528,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, final->addSuccessor(exit); final->addSuccessor(start); - unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned CMP = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(start, dl, TII->get(MBlaze::CMP), CMP) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()); @@ -543,7 +543,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, } } - unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass); + unsigned CHK = R.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(final, dl, TII->get(MBlaze::SWX)) .addReg(finalReg) .addReg(MI->getOperand(1).getReg()) @@ -681,13 +681,19 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isVarArg, isTailCall. SDValue MBlazeTargetLowering:: -LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // MBlaze does not yet support tail call optimization isTailCall = false; @@ -702,7 +708,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze); // Get a count of how many bytes are to be pushed on the stack. @@ -841,7 +847,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze); @@ -884,7 +890,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze); SDValue StackPtr; @@ -899,9 +905,9 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const TargetRegisterClass *RC; if (RegVT == MVT::i32) - RC = MBlaze::GPRRegisterClass; + RC = &MBlaze::GPRRegClass; else if (RegVT == MVT::f32) - RC = MBlaze::GPRRegisterClass; + RC = &MBlaze::GPRRegClass; else llvm_unreachable("RegVT not supported by LowerFormalArguments"); @@ -964,7 +970,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, StackPtr = DAG.getRegister(StackReg, getPointerTy()); // The last register argument that must be saved is MBlaze::R10 - const TargetRegisterClass *RC = MBlaze::GPRRegisterClass; + const TargetRegisterClass *RC = &MBlaze::GPRRegClass; unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5); unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1); @@ -1016,7 +1022,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze); @@ -1124,14 +1130,14 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, MBlaze::GPRRegisterClass); + return std::make_pair(0U, &MBlaze::GPRRegClass); // TODO: These can't possibly be right, but match what was in // getRegClassForInlineAsmConstraint. case 'd': case 'y': case 'f': if (VT == MVT::f32) - return std::make_pair(0U, MBlaze::GPRRegisterClass); + return std::make_pair(0U, &MBlaze::GPRRegClass); } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 6a79fc1..a01fab5 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -132,13 +132,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index db71434..b5025fc 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -287,7 +287,7 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass); + GlobalBaseReg = RegInfo.createVirtualRegister(&MBlaze::GPRRegClass); BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), GlobalBaseReg).addReg(MBlaze::R20); RegInfo.addLiveIn(MBlaze::R20); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 02a2157..139bf71 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -295,7 +295,7 @@ class BranchI op, bits<5> br, string instr_asm> : // Branch and Link Instructions //===----------------------------------------------------------------------===// class BranchL op, bits<5> br, bits<11> flags, string instr_asm> : - TA { let ra = br; @@ -303,7 +303,7 @@ class BranchL op, bits<5> br, bits<11> flags, string instr_asm> : } class BranchLI op, bits<5> br, string instr_asm> : - TB { let ra = br; diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h index 7b97744..8ab2c9a 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.h +++ b/lib/Target/MBlaze/MBlazeMCInstLower.h @@ -21,18 +21,16 @@ namespace llvm { class MachineInstr; class MachineModuleInfoMachO; class MachineOperand; - class Mangler; /// MBlazeMCInstLower - This class is used to lower an MachineInstr /// into an MCInst. class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower { MCContext &Ctx; - Mangler &Mang; AsmPrinter &Printer; public: - MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) - : Ctx(ctx), Mang(mang), Printer(printer) {} + MBlazeMCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} void Lower(const MachineInstr *MI, MCInst &OutMI) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 4a3ae5f..cd5691c 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -40,11 +40,6 @@ def IIC_WDC : InstrItinClass; def IIC_Pseudo : InstrItinClass; //===----------------------------------------------------------------------===// -// MBlaze generic instruction itineraries. -//===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>; - -//===----------------------------------------------------------------------===// // MBlaze instruction itineraries for three stage pipeline. //===----------------------------------------------------------------------===// include "MBlazeSchedule3.td" diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index d12d142..dc2ad29 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -43,13 +43,6 @@ MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); - - // Compute the issue width of the MBlaze itineraries - computeIssueWidth(); -} - -void MBlazeSubtarget::computeIssueWidth() { - InstrItins.IssueWidth = 1; } bool MBlazeSubtarget:: diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 62393d0..5f82f14 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -68,7 +68,7 @@ TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen MBlaze code. bool MBlazePassConfig::addInstSelector() { - PM->add(createMBlazeISelDag(getMBlazeTargetMachine())); + addPass(createMBlazeISelDag(getMBlazeTargetMachine())); return false; } @@ -76,6 +76,6 @@ bool MBlazePassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MBlazePassConfig::addPreEmitPass() { - PM->add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); + addPass(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine())); return true; } diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp index c9b1636..bfd11a0 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp @@ -98,6 +98,7 @@ public: MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { return new MBlazeMCCodeEmitter(MCII, STI, Ctx); diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index ae82c32..7cc96c6 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -22,6 +22,7 @@ class MCContext; class MCCodeEmitter; class MCInstrInfo; class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; @@ -30,6 +31,7 @@ class raw_ostream; extern Target TheMBlazeTarget; MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index a8f9b52..f9ecaed 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -23,6 +23,8 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) +add_dependencies(LLVMMSP430CodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 1d1094b..86bc183c 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -154,7 +154,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, //===----------------------------------------------------------------------===// void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { - MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); + MSP430MCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 071a2f7..f8b7e14 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -59,13 +59,13 @@ HWMultMode("msp430-hwmult-mode", MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(*tm.getSubtargetImpl()), TM(tm) { + Subtarget(*tm.getSubtargetImpl()) { TD = getTargetData(); // Set up the register classes. - addRegisterClass(MVT::i8, MSP430::GR8RegisterClass); - addRegisterClass(MVT::i16, MSP430::GR16RegisterClass); + addRegisterClass(MVT::i8, &MSP430::GR8RegClass); + addRegisterClass(MVT::i16, &MSP430::GR16RegClass); // Compute derived properties from the register classes computeRegisterProperties(); @@ -226,9 +226,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, default: break; case 'r': // GENERAL_REGS if (VT == MVT::i8) - return std::make_pair(0U, MSP430::GR8RegisterClass); + return std::make_pair(0U, &MSP430::GR8RegClass); - return std::make_pair(0U, MSP430::GR16RegisterClass); + return std::make_pair(0U, &MSP430::GR16RegClass); } } @@ -266,14 +266,19 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, } SDValue -MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // MSP430 target does not yet support tail call optimization. isTailCall = false; @@ -310,7 +315,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430); assert(!isVarArg && "Varargs not supported yet"); @@ -330,8 +335,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, llvm_unreachable(0); } case MVT::i16: - unsigned VReg = - RegInfo.createVirtualRegister(MSP430::GR16RegisterClass); + unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); @@ -391,7 +395,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_MSP430); @@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_MSP430); @@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430); @@ -1024,27 +1028,27 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, default: llvm_unreachable("Invalid shift opcode!"); case MSP430::Shl8: Opc = MSP430::SHL8r1; - RC = MSP430::GR8RegisterClass; + RC = &MSP430::GR8RegClass; break; case MSP430::Shl16: Opc = MSP430::SHL16r1; - RC = MSP430::GR16RegisterClass; + RC = &MSP430::GR16RegClass; break; case MSP430::Sra8: Opc = MSP430::SAR8r1; - RC = MSP430::GR8RegisterClass; + RC = &MSP430::GR8RegClass; break; case MSP430::Sra16: Opc = MSP430::SAR16r1; - RC = MSP430::GR16RegisterClass; + RC = &MSP430::GR16RegClass; break; case MSP430::Srl8: Opc = MSP430::SAR8r1c; - RC = MSP430::GR8RegisterClass; + RC = &MSP430::GR8RegClass; break; case MSP430::Srl16: Opc = MSP430::SAR16r1c; - RC = MSP430::GR16RegisterClass; + RC = &MSP430::GR16RegClass; break; } @@ -1072,8 +1076,8 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, LoopBB->addSuccessor(RemBB); LoopBB->addSuccessor(LoopBB); - unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass); - unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass); + unsigned ShiftAmtReg = RI.createVirtualRegister(&MSP430::GR8RegClass); + unsigned ShiftAmtReg2 = RI.createVirtualRegister(&MSP430::GR8RegClass); unsigned ShiftReg = RI.createVirtualRegister(RC); unsigned ShiftReg2 = RI.createVirtualRegister(RC); unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg(); diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index e372f00..d8ad02f 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -152,12 +152,7 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue @@ -174,7 +169,6 @@ namespace llvm { SelectionDAG &DAG) const; const MSP430Subtarget &Subtarget; - const MSP430TargetMachine &TM; const TargetData *TD; }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index c03ba47..be332f0 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm; MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), - RI(tm, *this), TM(tm) {} + RI(tm, *this) {} void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 04f339b..d79f992 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -42,7 +42,6 @@ namespace MSP430II { class MSP430InstrInfo : public MSP430GenInstrInfo { const MSP430RegisterInfo RI; - MSP430TargetMachine &TM; public: explicit MSP430InstrInfo(MSP430TargetMachine &TM); diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 4348dd5..f003574 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -210,13 +210,13 @@ let isCall = 1 in let Defs = [R12W, R13W, R14W, R15W, SRW], Uses = [SPW] in { def CALLi : II16i<0x0, - (outs), (ins i16imm:$dst, variable_ops), + (outs), (ins i16imm:$dst), "call\t$dst", [(MSP430call imm:$dst)]>; def CALLr : II16r<0x0, - (outs), (ins GR16:$dst, variable_ops), + (outs), (ins GR16:$dst), "call\t$dst", [(MSP430call GR16:$dst)]>; def CALLm : II16m<0x0, - (outs), (ins memsrc:$dst, variable_ops), + (outs), (ins memsrc:$dst), "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>; } diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h index 24151e2..794aa56 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.h +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -21,18 +21,16 @@ namespace llvm { class MachineInstr; class MachineModuleInfoMachO; class MachineOperand; - class Mangler; /// MSP430MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { MCContext &Ctx; - Mangler &Mang; AsmPrinter &Printer; public: - MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) - : Ctx(ctx), Mang(mang), Printer(printer) {} + MSP430MCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} void Lower(const MachineInstr *MI, MCInst &OutMI) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 51ec71a..aed46a2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -96,7 +96,8 @@ BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { } const TargetRegisterClass * -MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { +MSP430RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { return &MSP430::GR16RegClass; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 82ee499..9ee0a03 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -39,7 +39,8 @@ public: const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; - const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index 3f2eb8c..07619d0 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -78,8 +78,4 @@ def GR16 : RegisterClass<"MSP430", [i16], 16, // Frame pointer, sometimes allocable FPW, // Volatile, but not allocable - PCW, SPW, SRW, CGW)> -{ - let SubRegClasses = [(GR8 subreg_8bit)]; -} - + PCW, SPW, SRW, CGW)>; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 3acf96b..817001d 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -60,12 +60,12 @@ TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { bool MSP430PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); + addPass(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel())); return false; } bool MSP430PassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createMSP430BranchSelectionPass()); + addPass(createMSP430BranchSelectionPass()); return false; } diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt index ac21c25..6c7343b 100644 --- a/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,6 +1,5 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) +add_dependencies(LLVMMipsAsmParser MipsCommonTableGen) diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 0500c5d..aab8a01 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -13,26 +13,33 @@ tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen + Mips16FrameLowering.cpp + Mips16InstrInfo.cpp + Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp - MipsEmitGPRestore.cpp - MipsExpandPseudo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp MipsFrameLowering.cpp + MipsLongBranch.cpp MipsMCInstLower.cpp MipsMachineFunction.cpp MipsRegisterInfo.cpp + MipsSEFrameLowering.cpp + MipsSEInstrInfo.cpp + MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp MipsSelectionDAGInfo.cpp ) +add_dependencies(LLVMMipsCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(Disassembler) add_subdirectory(TargetInfo) diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 78dbc06..042b456 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -13,136 +13,87 @@ #include "Mips.h" #include "MipsSubtarget.h" +#include "MipsRegisterInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MathExtras.h" - #include "MipsGenEDInfo.inc" using namespace llvm; typedef MCDisassembler::DecodeStatus DecodeStatus; -/// MipsDisassembler - a disasembler class for Mips32. -class MipsDisassembler : public MCDisassembler { +namespace { + +/// MipsDisassemblerBase - a disasembler class for Mips. +class MipsDisassemblerBase : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - MipsDisassembler(const MCSubtargetInfo &STI, bool bigEndian) : - MCDisassembler(STI), isBigEndian(bigEndian) { - } - - ~MipsDisassembler() { - } + MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {} - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; + virtual ~MipsDisassemblerBase() {} /// getEDInfo - See MCDisassembler. const EDInstInfo *getEDInfo() const; + const MCRegisterInfo *getRegInfo() const { return RegInfo; } + private: + const MCRegisterInfo *RegInfo; +protected: bool isBigEndian; }; - -/// Mips64Disassembler - a disasembler class for Mips64. -class Mips64Disassembler : public MCDisassembler { +/// MipsDisassembler - a disasembler class for Mips32. +class MipsDisassembler : public MipsDisassemblerBase { public: /// Constructor - Initializes the disassembler. /// - Mips64Disassembler(const MCSubtargetInfo &STI, bool bigEndian) : - MCDisassembler(STI), isBigEndian(bigEndian) { - } - - ~Mips64Disassembler() { - } + MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MipsDisassemblerBase(STI, Info, bigEndian) {} /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; - - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; - -private: - bool isBigEndian; + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; }; -const EDInstInfo *MipsDisassembler::getEDInfo() const { - return instInfoMips; -} - -const EDInstInfo *Mips64Disassembler::getEDInfo() const { - return instInfoMips; -} - -// Decoder tables for Mips register -static const unsigned CPURegsTable[] = { - Mips::ZERO, Mips::AT, Mips::V0, Mips::V1, - Mips::A0, Mips::A1, Mips::A2, Mips::A3, - Mips::T0, Mips::T1, Mips::T2, Mips::T3, - Mips::T4, Mips::T5, Mips::T6, Mips::T7, - Mips::S0, Mips::S1, Mips::S2, Mips::S3, - Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::T8, Mips::T9, Mips::K0, Mips::K1, - Mips::GP, Mips::SP, Mips::FP, Mips::RA -}; -static const unsigned FGR32RegsTable[] = { - Mips::F0, Mips::F1, Mips::F2, Mips::F3, - Mips::F4, Mips::F5, Mips::F6, Mips::F7, - Mips::F8, Mips::F9, Mips::F10, Mips::F11, - Mips::F12, Mips::F13, Mips::F14, Mips::F15, - Mips::F16, Mips::F17, Mips::F18, Mips::F18, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, - Mips::F24, Mips::F25, Mips::F26, Mips::F27, - Mips::F28, Mips::F29, Mips::F30, Mips::F31 -}; +/// Mips64Disassembler - a disasembler class for Mips64. +class Mips64Disassembler : public MipsDisassemblerBase { +public: + /// Constructor - Initializes the disassembler. + /// + Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MipsDisassemblerBase(STI, Info, bigEndian) {} -static const unsigned CPU64RegsTable[] = { - Mips::ZERO_64, Mips::AT_64, Mips::V0_64, Mips::V1_64, - Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, - Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64, - Mips::T4_64, Mips::T5_64, Mips::T6_64, Mips::T7_64, - Mips::S0_64, Mips::S1_64, Mips::S2_64, Mips::S3_64, - Mips::S4_64, Mips::S5_64, Mips::S6_64, Mips::S7_64, - Mips::T8_64, Mips::T9_64, Mips::K0_64, Mips::K1_64, - Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 + /// getInstruction - See MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; }; -static const unsigned FGR64RegsTable[] = { - Mips::D0_64, Mips::D1_64, Mips::D2_64, Mips::D3_64, - Mips::D4_64, Mips::D5_64, Mips::D6_64, Mips::D7_64, - Mips::D8_64, Mips::D9_64, Mips::D10_64, Mips::D11_64, - Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, - Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64, - Mips::D20_64, Mips::D21_64, Mips::D22_64, Mips::D23_64, - Mips::D24_64, Mips::D25_64, Mips::D26_64, Mips::D27_64, - Mips::D28_64, Mips::D29_64, Mips::D30_64, Mips::D31_64 -}; +} // end anonymous namespace -static const unsigned AFGR64RegsTable[] = { - Mips::D0, Mips::D1, Mips::D2, Mips::D3, - Mips::D4, Mips::D5, Mips::D6, Mips::D7, - Mips::D8, Mips::D9, Mips::D10, Mips::D11, - Mips::D12, Mips::D13, Mips::D14, Mips::D15 -}; +const EDInstInfo *MipsDisassemblerBase::getEDInfo() const { + return instInfoMips; +} // Forward declare these because the autogenerated code will reference them. // Definitions are further down. @@ -239,25 +190,25 @@ extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, static MCDisassembler *createMipsDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI,true); + return new MipsDisassembler(STI, T.createMCRegInfo(""), true); } static MCDisassembler *createMipselDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI,false); + return new MipsDisassembler(STI, T.createMCRegInfo(""), false); } static MCDisassembler *createMips64Disassembler( const Target &T, const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI,true); + return new Mips64Disassembler(STI, T.createMCRegInfo(""), true); } static MCDisassembler *createMips64elDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI, false); + return new Mips64Disassembler(STI, T.createMCRegInfo(""), false); } extern "C" void LLVMInitializeMipsDisassembler() { @@ -362,6 +313,11 @@ Mips64Disassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const MipsDisassemblerBase *Dis = static_cast(D); + return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); +} + static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -370,7 +326,8 @@ static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(CPU64RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::CPU64RegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -380,8 +337,8 @@ static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::CPURegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -392,7 +349,8 @@ static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -403,7 +361,8 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(FGR32RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -420,15 +379,18 @@ static DecodeStatus DecodeMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - int Reg = (int)fieldFromInstruction32(Insn, 16, 5); - int Base = (int)fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction32(Insn, 16, 5); + unsigned Base = fieldFromInstruction32(Insn, 21, 5); + + Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg); + Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); if(Inst.getOpcode() == Mips::SC){ - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(Reg)); } - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); Inst.addOperand(MCOperand::CreateImm(Offset)); return MCDisassembler::Success; @@ -439,11 +401,14 @@ static DecodeStatus DecodeFMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - int Reg = (int)fieldFromInstruction32(Insn, 16, 5); - int Base = (int)fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction32(Insn, 16, 5); + unsigned Base = fieldFromInstruction32(Insn, 21, 5); - Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[Reg])); - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); + Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); Inst.addOperand(MCOperand::CreateImm(Offset)); return MCDisassembler::Success; @@ -474,10 +439,12 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + if (RegNo > 30 || RegNo %2) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(AFGR64RegsTable[RegNo])); + ; + unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -488,7 +455,7 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, //Currently only hardware register 29 is supported if (RegNo != 29) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 6886b17..b38463d 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "MipsInstPrinter.h" +#include "MipsInstrInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -68,8 +69,25 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) { + switch (MI->getOpcode()) { + default: + break; + case Mips::RDHWR: + case Mips::RDHWR64: + O << "\t.set\tpush\n"; + O << "\t.set\tmips32r2\n"; + } + printInstruction(MI, O); printAnnotation(O, Annot); + + switch (MI->getOpcode()) { + default: + break; + case Mips::RDHWR: + case Mips::RDHWR64: + O << "\n\t.set\tpop"; + } } static void printExpr(const MCExpr *Expr, raw_ostream &OS) { @@ -108,6 +126,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; + case MCSymbolRefExpr::VK_Mips_HIGHER: OS << "%higher("; break; + case MCSymbolRefExpr::VK_Mips_HIGHEST: OS << "%highest("; break; } OS << SRE->getSymbol(); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 76b839b..3d8a6f9 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -16,7 +16,7 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { -// These enumeration declarations were orignally in MipsInstrInfo.h but +// These enumeration declarations were originally in MipsInstrInfo.h but // had to be moved here to avoid circular dependencies between // LLVMMipsCodeGen and LLVMMipsAsmPrinter. namespace Mips { diff --git a/lib/Target/Mips/MCTargetDesc/Makefile b/lib/Target/Mips/MCTargetDesc/Makefile index 7fe2086..22a2721 100644 --- a/lib/Target/Mips/MCTargetDesc/Makefile +++ b/lib/Target/Mips/MCTargetDesc/Makefile @@ -14,3 +14,4 @@ LIBRARYNAME = LLVMMipsDesc CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common + diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 9b4caf6..18961fd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -35,7 +35,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { return 0; case FK_GPRel_4: case FK_Data_4: + case FK_Data_8: case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_GPOFF_HI: + case Mips::fixup_Mips_GPOFF_LO: + case Mips::fixup_Mips_GOT_PAGE: + case Mips::fixup_Mips_GOT_OFST: + case Mips::fixup_Mips_GOT_DISP: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -54,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { break; case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT_Local: - // Get the higher 16-bits. Also add 1 if bit 15 is 1. + // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; + case Mips::fixup_Mips_HIGHER: + // Get the 3rd 16-bits. + Value = ((Value + 0x80008000LL) >> 32) & 0xffff; + break; + case Mips::fixup_Mips_HIGHEST: + // Get the 4th 16-bits. + Value = ((Value + 0x800080008000LL) >> 48) & 0xffff; + break; } return Value; @@ -74,7 +88,8 @@ public: :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit); + return createMipsELFObjectWriter(OS, + MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit); } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided @@ -115,7 +130,8 @@ public: CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); } - uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); + uint64_t Mask = ((uint64_t)(-1) >> + (64 - getFixupKindInfo(Kind).TargetSize)); CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. @@ -156,7 +172,14 @@ public: { "fixup_Mips_TLSLDM", 0, 16, 0 }, { "fixup_Mips_DTPREL_HI", 0, 16, 0 }, { "fixup_Mips_DTPREL_LO", 0, 16, 0 }, - { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_GPOFF_HI", 0, 16, 0 }, + { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, + { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, + { "fixup_Mips_GOT_OFST", 0, 16, 0 }, + { "fixup_Mips_GOT_DISP", 0, 16, 0 }, + { "fixup_Mips_HIGHER", 0, 16, 0 }, + { "fixup_Mips_HIGHEST", 0, 16, 0 } }; if (Kind < FirstTargetFixupKind) @@ -206,6 +229,14 @@ public: /// /// \return - True on success. bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // Check for a less than instruction size number of bytes + // FIXME: 16 bit instructions are not handled yet here. + // We shouldn't be using a hard coded number for instruction size. + if (Count % 4) return false; + + uint64_t NumNops = Count / 4; + for (uint64_t i = 0; i != NumNops; ++i) + OW->Write32(0); return true; } }; // class MipsAsmBackend diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index fb1c5ce..234455e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -79,7 +79,12 @@ namespace MipsII { MO_GPOFF_LO, MO_GOT_DISP, MO_GOT_PAGE, - MO_GOT_OFST + MO_GOT_OFST, + + /// MO_HIGHER/HIGHEST - Represents the highest or higher half word of a + /// 64-bit symbol address. + MO_HIGHER, + MO_HIGHEST }; enum { diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 2091bec..8e84b3f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,7 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); virtual ~MipsELFObjectWriter(); @@ -52,9 +52,11 @@ namespace { }; } -MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI) +MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, - /*HasRelocationAddend*/ false) {} + /*HasRelocationAddend*/ false, + /*IsN64*/ _isN64) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} @@ -101,6 +103,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_4: Type = ELF::R_MIPS_32; break; + case FK_Data_8: + Type = ELF::R_MIPS_64; + break; case FK_GPRel_4: Type = ELF::R_MIPS_GPREL32; break; @@ -148,8 +153,32 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_Mips_PC16: Type = ELF::R_MIPS_PC16; break; + case Mips::fixup_Mips_GOT_PAGE: + Type = ELF::R_MIPS_GOT_PAGE; + break; + case Mips::fixup_Mips_GOT_OFST: + Type = ELF::R_MIPS_GOT_OFST; + break; + case Mips::fixup_Mips_GOT_DISP: + Type = ELF::R_MIPS_GOT_DISP; + break; + case Mips::fixup_Mips_GPOFF_HI: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_HI16, Type); + break; + case Mips::fixup_Mips_GPOFF_LO: + Type = setRType((unsigned)ELF::R_MIPS_GPREL16, Type); + Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); + Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); + break; + case Mips::fixup_Mips_HIGHER: + Type = ELF::R_MIPS_HIGHER; + break; + case Mips::fixup_Mips_HIGHEST: + Type = ELF::R_MIPS_HIGHEST; + break; } - return Type; } @@ -184,10 +213,10 @@ static int CompareOffset(const RelEntry &R0, const RelEntry &R1) { void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, std::vector &Relocs) { - // Call the defualt function first. Relocations are sorted in descending + // Call the default function first. Relocations are sorted in descending // order of r_offset. MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); - + RelLs RelocLs; std::vector Unmatched; @@ -244,6 +273,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian, bool Is64Bit) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI); + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, + (Is64Bit) ? true : false); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 9b76eda..77faec5 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -95,6 +95,27 @@ namespace Mips { // PC relative branch fixup resulting in - R_MIPS_PC16 fixup_Mips_Branch_PCRel, + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 + fixup_Mips_GPOFF_HI, + + // resulting in - R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 + fixup_Mips_GPOFF_LO, + + // resulting in - R_MIPS_PAGE + fixup_Mips_GOT_PAGE, + + // resulting in - R_MIPS_GOT_OFST + fixup_Mips_GOT_OFST, + + // resulting in - R_MIPS_GOT_DISP + fixup_Mips_GOT_DISP, + + // resulting in - R_MIPS_GOT_HIGHER + fixup_Mips_HIGHER, + + // resulting in - R_MIPS_HIGHEST + fixup_Mips_HIGHEST, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 4ed2be0..8dab62d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -91,6 +91,7 @@ public: } // namespace MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { @@ -98,6 +99,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, } MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { @@ -179,7 +181,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isFPImm()) { return static_cast(APFloat(MO.getFPImm()) .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } + } // MO must be an Expr. assert(MO.isExpr()); @@ -193,10 +195,27 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } assert (Kind == MCExpr::SymbolRef); - + Mips::Fixups FixupKind = Mips::Fixups(0); switch(cast(Expr)->getKind()) { + default: llvm_unreachable("Unknown fixup kind!"); + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI : + FixupKind = Mips::fixup_Mips_GPOFF_HI; + break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO : + FixupKind = Mips::fixup_Mips_GPOFF_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE : + FixupKind = Mips::fixup_Mips_GOT_PAGE; + break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST : + FixupKind = Mips::fixup_Mips_GOT_OFST; + break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP : + FixupKind = Mips::fixup_Mips_GOT_DISP; + break; case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; break; @@ -236,7 +255,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TPREL_LO: FixupKind = Mips::fixup_Mips_TPREL_LO; break; - default: + case MCSymbolRefExpr::VK_Mips_HIGHER: + FixupKind = Mips::fixup_Mips_HIGHER; + break; + case MCSymbolRefExpr::VK_Mips_HIGHEST: + FixupKind = Mips::fixup_Mips_HIGHEST; break; } // switch diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 547ccdd..bfcc2a2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -22,6 +22,7 @@ class MCCodeEmitter; class MCContext; class MCInstrInfo; class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class StringRef; class Target; @@ -33,9 +34,11 @@ extern Target TheMips64Target; extern Target TheMips64elTarget; MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index bafadc8..2963f7e 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -24,9 +24,7 @@ namespace llvm { FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); - FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); - FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM); - + FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index cbebe84..8548ae0 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -72,6 +72,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", "Mips64r2", "Mips64r2 ISA Support", [FeatureMips64, FeatureMips32r2]>; +def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true", + "Mips16 mode">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// @@ -83,6 +86,7 @@ def : Proc<"mips32", [FeatureMips32]>; def : Proc<"mips32r2", [FeatureMips32r2]>; def : Proc<"mips64", [FeatureMips64]>; def : Proc<"mips64r2", [FeatureMips64r2]>; +def : Proc<"mips16", [FeatureMips16]>; def MipsAsmWriter : AsmWriter { string AsmWriterClassName = "InstPrinter"; diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp new file mode 100644 index 0000000..030042f --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -0,0 +1,87 @@ +//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16FrameLowering.h" +#include "MipsInstrInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + // Adjust stack. + if (isInt<16>(-StackSize)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); +} + +void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + if (isInt<16>(StackSize)) + // assumes stacksize multiple of 8 + BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize); +} + +bool Mips16FrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + // FIXME: implement. + return true; +} + +bool +Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // FIXME: implement. + return true; +} + +void Mips16FrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { +} + +const MipsFrameLowering * +llvm::createMips16FrameLowering(const MipsSubtarget &ST) { + return new Mips16FrameLowering(ST); +} diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h new file mode 100644 index 0000000..25cc37b --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -0,0 +1,43 @@ +//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16_FRAMEINFO_H +#define MIPS16_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { +class Mips16FrameLowering : public MipsFrameLowering { +public: + explicit Mips16FrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/Mips16InstrFormats.td b/lib/Target/Mips/Mips16InstrFormats.td new file mode 100644 index 0000000..61602b6 --- /dev/null +++ b/lib/Target/Mips/Mips16InstrFormats.td @@ -0,0 +1,663 @@ +//===- Mips16InstrFormats.td - Mips Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe MIPS instructions format +// +// CPU INSTRUCTION FORMATS +// +// funct or f Function field +// +// immediate 4-,5-,8- or 11-bit immediate, branch displacement, or +// or imm address displacement +// +// op 5-bit major operation code +// +// rx 3-bit source or destination register +// +// ry 3-bit source or destination register +// +// rz 3-bit source or destination register +// +// sa 3- or 5-bit shift amount +// +//===----------------------------------------------------------------------===// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +// +class Format16 val> { + bits<5> Value = val; +} + +def Pseudo16 : Format16<0>; +def FrmI16 : Format16<1>; +def FrmRI16 : Format16<2>; +def FrmRR16 : Format16<3>; +def FrmRRI16 : Format16<4>; +def FrmRRR16 : Format16<5>; +def FrmRRI_A16 : Format16<6>; +def FrmSHIFT16 : Format16<7>; +def FrmI8_TYPE16 : Format16<8>; +def FrmI8_MOVR3216 : Format16<9>; +def FrmI8_MOV32R16 : Format16<10>; +def FrmI8_SVRS16 : Format16<11>; +def FrmJAL16 : Format16<12>; +def FrmJALX16 : Format16<13>; +def FrmEXT_I16 : Format16<14>; +def FrmASMACRO16 : Format16<15>; +def FrmEXT_RI16 : Format16<16>; +def FrmEXT_RRI16 : Format16<17>; +def FrmEXT_RRI_A16 : Format16<18>; +def FrmEXT_SHIFT16 : Format16<19>; +def FrmEXT_I816 : Format16<20>; +def FrmEXT_I8_SVRS16 : Format16<21>; +def FrmOther16 : Format16<22>; // Instruction w/ a custom format + +// Base class for Mips 16 Format +// This class does not depend on the instruction size +// +class MipsInst16_Base pattern, + InstrItinClass itin, Format16 f>: Instruction +{ + Format16 Form = f; + + let Namespace = "Mips"; + + let OutOperandList = outs; + let InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + let Itinerary = itin; + + // + // Attributes specific to Mips instructions... + // + bits<5> FormBits = Form.Value; + + // TSFlags layout should be kept in sync with MipsInstrInfo.h. + let TSFlags{4-0} = FormBits; + + let Predicates = [InMips16Mode]; +} + +// +// Generic Mips 16 Format +// +class MipsInst16 pattern, + InstrItinClass itin, Format16 f>: + MipsInst16_Base +{ + field bits<16> Inst; + bits<5> Opcode = 0; + + // Top 5 bits are the 'opcode' field + let Inst{15-11} = Opcode; +} + +// +// For 32 bit extended instruction forms. +// +class MipsInst16_32 pattern, + InstrItinClass itin, Format16 f>: + MipsInst16_Base +{ + field bits<32> Inst; + +} + +class MipsInst16_EXTEND pattern, + InstrItinClass itin, Format16 f>: + MipsInst16_32 +{ + let Inst{31-27} = 0b11110; +} + + + +// Mips Pseudo Instructions Format +class MipsPseudo16 pattern>: + MipsInst16 { + let isCodeGenOnly = 1; + let isPseudo = 1; +} + + +//===----------------------------------------------------------------------===// +// Format I instruction class in Mips : <|opcode|imm11|> +//===----------------------------------------------------------------------===// + +class FI16 op, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin>: + MipsInst16 +{ + bits<11> imm11; + + let Opcode = op; + + let Inst{10-0} = imm11; +} + +//===----------------------------------------------------------------------===// +// Format RI instruction class in Mips : <|opcode|rx|imm8|> +//===----------------------------------------------------------------------===// + +class FRI16 op, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<8> imm8; + + let Opcode = op; + + let Inst{10-8} = rx; + let Inst{7-0} = imm8; +} + +//===----------------------------------------------------------------------===// +// Format RR instruction class in Mips : <|opcode|rx|ry|funct|> +//===----------------------------------------------------------------------===// + +class FRR16 _funct, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> ry; + bits<5> funct; + + let Opcode = 0b11101; + let funct = _funct; + + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-0} = funct; +} + +// +// For conversion functions. +// +class FRR_SF16 _funct, bits<3> _subfunct, dag outs, dag ins, + string asmstr, list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> subfunct; + bits<5> funct; + + let Opcode = 0b11101; // RR + let funct = _funct; + let subfunct = _subfunct; + + let Inst{10-8} = rx; + let Inst{7-5} = subfunct; + let Inst{4-0} = funct; +} + +// +// just used for breakpoint (hardware and software) instructions. +// +class FC16 _funct, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<6> _code; // code is a keyword in tablegen + bits<5> funct; + + let Opcode = 0b11101; // RR + let funct = _funct; + + let Inst{10-5} = _code; + let Inst{4-0} = funct; +} + +// +// J(AL)R(C) subformat +// +class FRR16_JALRC _nd, bits<1> _l, bits<1> r_a, + dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<1> nd; + bits<1> l; + bits<1> ra; + + let nd = _nd; + let l = _l; + let ra = r_a; + + let Opcode = 0b11101; + + let Inst{10-8} = rx; + let Inst{7} = nd; + let Inst{6} = l; + let Inst{5} = ra; + let Inst{4-0} = 0; +} + +//===----------------------------------------------------------------------===// +// Format RRI instruction class in Mips : <|opcode|rx|ry|imm5|> +//===----------------------------------------------------------------------===// + +class FRRI16 op, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> ry; + bits<5> imm5; + + let Opcode = op; + + + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-0} = imm5; +} + +//===----------------------------------------------------------------------===// +// Format RRR instruction class in Mips : <|opcode|rx|ry|rz|f|> +//===----------------------------------------------------------------------===// + +class FRRR16 _f, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> ry; + bits<3> rz; + bits<2> f; + + let Opcode = 0b11100; + let f = _f; + + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-2} = rz; + let Inst{1-0} = f; +} + +//===----------------------------------------------------------------------===// +// Format RRI-A instruction class in Mips : <|opcode|rx|ry|f|imm4|> +//===----------------------------------------------------------------------===// + +class FRRI_A16 _f, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> ry; + bits<1> f; + bits<4> imm4; + + let Opcode = 0b01000; + let f = _f; + + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4} = f; + let Inst{3-0} = imm4; +} + +//===----------------------------------------------------------------------===// +// Format Shift instruction class in Mips : <|opcode|rx|ry|sa|f|> +//===----------------------------------------------------------------------===// + +class FSHIFT16 _f, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> rx; + bits<3> ry; + bits<3> sa; + bits<2> f; + + let Opcode = 0b00110; + let f = _f; + + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-2} = sa; + let Inst{1-0} = f; +} + +//===----------------------------------------------------------------------===// +// Format i8 instruction class in Mips : <|opcode|funct|imm8> +//===----------------------------------------------------------------------===// + +class FI816 _func, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<3> func; + bits<8> imm8; + + let Opcode = 0b01100; + let func = _func; + + let Inst{10-8} = func; + let Inst{7-0} = imm8; +} + +//===----------------------------------------------------------------------===// +// Format i8_MOVR32 instruction class in Mips : <|opcode|func|ry|r32> +//===----------------------------------------------------------------------===// + +class FI8_MOVR3216 pattern, InstrItinClass itin>: + MipsInst16 +{ + + bits<4> ry; + bits<4> r32; + + let Opcode = 0b01100; + + let Inst{10-8} = 0b111; + let Inst{7-4} = ry; + let Inst{3-0} = r32; + +} + + + +//===----------------------------------------------------------------------===// +// Format i8_MOV32R instruction class in Mips : <|opcode|func|r32|rz> +//===----------------------------------------------------------------------===// + +class FI8_MOV32R16 pattern, InstrItinClass itin>: + MipsInst16 +{ + + bits<3> func; + bits<5> r32; + bits<3> rz; + + + let Opcode = 0b01100; + + let Inst{10-8} = 0b101; + let Inst{7-5} = r32{2-0}; + let Inst{4-3} = r32{4-3}; + let Inst{2-0} = rz; + +} + +//===----------------------------------------------------------------------===// +// Format i8_SVRS instruction class in Mips : +// <|opcode|svrs|s|ra|s0|s1|framesize> +//===----------------------------------------------------------------------===// + +class FI8_SVRS16 _s, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16 +{ + bits<1> s; + bits<1> ra = 0; + bits<1> s0 = 0; + bits<1> s1 = 0; + bits<4> framesize = 0; + + let s =_s; + let Opcode = 0b01100; + + let Inst{10-8} = 0b100; + let Inst{7} = s; + let Inst{6} = ra; + let Inst{5} = s0; + let Inst{4} = s1; + let Inst{3-0} = framesize; + +} + +//===----------------------------------------------------------------------===// +// Format JAL instruction class in Mips16 : +// <|opcode|svrs|s|ra|s0|s1|framesize> +//===----------------------------------------------------------------------===// + +class FJAL16 _X, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_32 +{ + bits<1> X; + bits<26> imm26; + + + let X = _X; + + let Inst{31-27} = 0b00011; + let Inst{26} = X; + let Inst{25-21} = imm26{20-16}; + let Inst{20-16} = imm26{25-21}; + let Inst{15-0} = imm26{15-0}; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-I instruction class in Mips16 : +// <|EXTEND|imm10:5|imm15:11|op|0|0|0|0|0|0|imm4:0> +//===----------------------------------------------------------------------===// + +class FEXT_I16 _eop, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<16> imm16; + bits<5> eop; + + let eop = _eop; + + let Inst{26-21} = imm16{10-5}; + let Inst{20-16} = imm16{15-11}; + let Inst{15-11} = eop; + let Inst{10-5} = 0; + let Inst{4-0} = imm16{4-0}; + +} + +//===----------------------------------------------------------------------===// +// Format ASMACRO instruction class in Mips16 : +// +//===----------------------------------------------------------------------===// + +class FASMACRO16 pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<3> select; + bits<3> p4; + bits<5> p3; + bits<5> RRR = 0b11100; + bits<3> p2; + bits<3> p1; + bits<5> p0; + + + let Inst{26-24} = select; + let Inst{23-21} = p4; + let Inst{20-16} = p3; + let Inst{15-11} = RRR; + let Inst{10-8} = p2; + let Inst{7-5} = p1; + let Inst{4-0} = p0; + +} + + +//===----------------------------------------------------------------------===// +// Format EXT-RI instruction class in Mips16 : +// <|EXTEND|imm10:5|imm15:11|op|rx|0|0|0|imm4:0> +//===----------------------------------------------------------------------===// + +class FEXT_RI16 _op, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<16> imm16; + bits<5> op; + bits<3> rx; + + let op = _op; + + let Inst{26-21} = imm16{10-5}; + let Inst{20-16} = imm16{15-11}; + let Inst{15-11} = op; + let Inst{10-8} = rx; + let Inst{7-5} = 0; + let Inst{4-0} = imm16{4-0}; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-RRI instruction class in Mips16 : +// <|EXTEND|imm10:5|imm15:11|op|rx|ry|imm4:0> +//===----------------------------------------------------------------------===// + +class FEXT_RRI16 _op, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<5> op; + bits<16> imm16; + bits<3> rx; + bits<3> ry; + + let op=_op; + + let Inst{26-21} = imm16{10-5}; + let Inst{20-16} = imm16{15-11}; + let Inst{15-11} = op; + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-0} = imm16{4-0}; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-RRI-A instruction class in Mips16 : +// <|EXTEND|imm10:4|imm14:11|RRI-A|rx|ry|f|imm3:0> +//===----------------------------------------------------------------------===// + +class FEXT_RRI_A16 _f, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<15> imm15; + bits<3> rx; + bits<3> ry; + bits<1> f; + + let f = _f; + + let Inst{26-20} = imm15{10-4}; + let Inst{19-16} = imm15{14-11}; + let Inst{15-11} = 0b01000; + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4} = f; + let Inst{3-0} = imm15{3-0}; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-SHIFT instruction class in Mips16 : +// <|EXTEND|sa 4:0|s5|0|SHIFT|rx|ry|0|f> +//===----------------------------------------------------------------------===// + +class FEXT_SHIFT16 _f, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<6> sa6; + bits<3> rx; + bits<3> ry; + bits<2> f; + + let f = _f; + + let Inst{26-22} = sa6{4-0}; + let Inst{21} = sa6{5}; + let Inst{20-16} = 0; + let Inst{15-11} = 0b00110; + let Inst{10-8} = rx; + let Inst{7-5} = ry; + let Inst{4-2} = 0; + let Inst{1-0} = f; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-I8 instruction class in Mips16 : +// <|EXTEND|imm10:5|imm15:11|I8|funct|0|imm4:0> +//===----------------------------------------------------------------------===// + +class FEXT_I816 _funct, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<16> imm16; + bits<5> I8; + bits<3> funct; + + let funct = _funct; + let I8 = 0b0110; + + let Inst{26-21} = imm16{10-5}; + let Inst{20-16} = imm16{15-11}; + let Inst{15-11} = I8; + let Inst{10-8} = funct; + let Inst{7-5} = 0; + let Inst{4-0} = imm16{4-0}; + +} + +//===----------------------------------------------------------------------===// +// Format EXT-I8_SVRS instruction class in Mips16 : +// <|EXTEND|xsregs|framesize7:4|aregs|I8|SVRS|s|ra|s0|s1|framesize3:0> +//===----------------------------------------------------------------------===// + +class FEXT_I8_SVRS16 s_, dag outs, dag ins, string asmstr, + list pattern, InstrItinClass itin>: + MipsInst16_EXTEND +{ + bits<3> xsregs =0; + bits<8> framesize =0; + bits<3> aregs =0; + bits<5> I8 = 0b01100; + bits<3> SVRS = 0b100; + bits<1> s; + bits<1> ra = 0; + bits<1> s0 = 0; + bits<1> s1 = 0; + + let s= s_; + + let Inst{26-24} = xsregs; + let Inst{23-20} = framesize{7-4}; + let Inst{19} = 0; + let Inst{18-16} = aregs; + let Inst{15-11} = I8; + let Inst{10-8} = SVRS; + let Inst{7} = s; + let Inst{6} = ra; + let Inst{5} = s0; + let Inst{4} = s1; + let Inst{3-0} = framesize{3-0}; + + +} + + + diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp new file mode 100644 index 0000000..2bc286b --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -0,0 +1,132 @@ +//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16InstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), + RI(*tm.getSubtargetImpl(), *this) {} + +const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned Mips16InstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned Mips16InstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::Mov32R16; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void Mips16InstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +void Mips16InstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA16: + ExpandRetRA16(MBB, MI, Mips::JrRa16); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + assert(false && "Implement this function."); + return 0; +} + +unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return 0; +} + +void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); +} + +const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { + return new Mips16InstrInfo(TM); +} diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h new file mode 100644 index 0000000..260c5b6 --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -0,0 +1,76 @@ +//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16INSTRUCTIONINFO_H +#define MIPS16INSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "Mips16RegisterInfo.h" + +namespace llvm { + +class Mips16InstrInfo : public MipsInstrInfo { + const Mips16RegisterInfo RI; + +public: + explicit Mips16InstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td new file mode 100644 index 0000000..94cf984 --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -0,0 +1,419 @@ +//===- Mips16InstrInfo.td - Target Description for Mips16 -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips16 instructions. +// +//===----------------------------------------------------------------------===// + +// +// RRR-type instruction format +// + +class FRRR16_ins _f, string asmstr, InstrItinClass itin> : + FRRR16<_f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $rx, $ry"), [], itin>; + +// +// I8_MOV32R instruction format (used only by MOV32R instruction) +// +class FI8_MOV32R16_ins: + FI8_MOV32R16<(outs CPURegs:$r32), (ins CPU16Regs:$rz), + !strconcat(asmstr, "\t$r32, $rz"), [], itin>; + +// +// EXT-RI instruction format +// + +class FEXT_RI16_ins_base _op, string asmstr, string asmstr2, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins simm16:$imm), + !strconcat(asmstr, asmstr2), [], itin>; + +class FEXT_RI16_ins _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $imm", itin>; + +class FEXT_RI16_PC_ins _op, string asmstr, InstrItinClass itin>: + FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; + + +class FEXT_2RI16_ins _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm"), [], itin> { + let Constraints = "$rx_ = $rx"; +} + + +// +// RR-type instruction format +// + +class FRR16_ins f, string asmstr, InstrItinClass itin> : + FRR16 { +} + +class FRxRxRy16_ins f, string asmstr, + InstrItinClass itin> : + FRR16 { + let Constraints = "$rx = $rz"; +} + +let rx=0 in +class FRR16_JALRC_RA_only_ins nd_, bits<1> l_, + string asmstr, InstrItinClass itin>: + FRR16_JALRC ; + +// +// EXT-RRI instruction format +// + +class FEXT_RRI16_mem_ins op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16; + +class FEXT_RRI16_mem2_ins op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16; + +// +// EXT-SHIFT instruction format +// +class FEXT_SHIFT16_ins _f, string asmstr, InstrItinClass itin>: + FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), + !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; + +// +// Address operand +def mem16 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops CPU16Regs, simm16); + let EncoderMethod = "getMemEncoding"; +} + +// +// Some general instruction class info +// +// + +class ArithLogic16Defs { + bits<5> shamt = 0; + bit isCommutable = isCom; + bit isReMaterializable = 1; + bit neverHasSideEffects = 1; +} + +// + +// Format: ADDIU rx, immediate MIPS16e +// Purpose: Add Immediate Unsigned Word (2-Operand, Extended) +// To add a constant to a 32-bit integer. +// +def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; + +def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, + ArithLogic16Defs<0>; + +// + +// Format: ADDIU rx, pc, immediate MIPS16e +// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended) +// To add a constant to the program counter. +// +def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; +// +// Format: ADDU rz, rx, ry MIPS16e +// Purpose: Add Unsigned Word (3-Operand) +// To add 32-bit integers. +// + +def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: AND rx, ry MIPS16e +// Purpose: AND +// To do a bitwise logical AND. + +def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: JR ra MIPS16e +// Purpose: Jump Register Through Register ra +// To execute a branch to the instruction address in the return +// address register. +// + +def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; + +// +// Format: LB ry, offset(rx) MIPS16e +// Purpose: Load Byte (Extended) +// To load a byte from memory as a signed value. +// +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; + +// +// Format: LBU ry, offset(rx) MIPS16e +// Purpose: Load Byte Unsigned (Extended) +// To load a byte from memory as a unsigned value. +// +def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; + +// +// Format: LH ry, offset(rx) MIPS16e +// Purpose: Load Halfword signed (Extended) +// To load a halfword from memory as a signed value. +// +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; + +// +// Format: LHU ry, offset(rx) MIPS16e +// Purpose: Load Halfword unsigned (Extended) +// To load a halfword from memory as an unsigned value. +// +def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; + +// +// Format: LI rx, immediate MIPS16e +// Purpose: Load Immediate (Extended) +// To load a constant into a GPR. +// +def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; + +// +// Format: LW ry, offset(rx) MIPS16e +// Purpose: Load Word (Extended) +// To load a word from memory as a signed value. +// +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; + +// +// Format: MOVE r32, rz MIPS16e +// Purpose: Move +// To move the contents of a GPR to a GPR. +// +def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>; + +// +// Format: NEG rx, ry MIPS16e +// Purpose: Negate +// To negate an integer value. +// +def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>; + +// +// Format: NOT rx, ry MIPS16e +// Purpose: Not +// To complement an integer value +// +def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>; + +// +// Format: OR rx, ry MIPS16e +// Purpose: Or +// To do a bitwise logical OR. +// +def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize} +// (All args are optional) MIPS16e +// Purpose: Restore Registers and Deallocate Stack Frame +// To deallocate a stack frame before exit from a subroutine, +// restoring return address and static registers, and adjusting +// stack +// + +// fixed form for restoring RA and the frame +// for direct object emitter, encoding needs to be adjusted for the +// frame size +// +let ra=1, s=0,s0=0,s1=0 in +def RestoreRaF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "restore \t$$ra, $frame_size", [], IILoad >; + +// +// Format: SAVE {ra,}{s0/s1/s0-1,}{framesize} (All arguments are optional) +// MIPS16e +// Purpose: Save Registers and Set Up Stack Frame +// To set up a stack frame on entry to a subroutine, +// saving return address and static registers, and adjusting stack +// +let ra=1, s=1,s0=0,s1=0 in +def SaveRaF16: + FI8_SVRS16<0b1, (outs), (ins uimm16:$frame_size), + "save \t$$ra, $frame_size", [], IILoad >; + +// +// Format: SB ry, offset(rx) MIPS16e +// Purpose: Store Byte (Extended) +// To store a byte to memory. +// +def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; + +// +// Format: SH ry, offset(rx) MIPS16e +// Purpose: Store Halfword (Extended) +// To store a halfword to memory. +// +def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; + +// +// Format: SLL rx, ry, sa MIPS16e +// Purpose: Shift Word Left Logical (Extended) +// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits. +// +def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; + +// +// Format: SLLV ry, rx MIPS16e +// Purpose: Shift Word Left Logical Variable +// To execute a left-shift of a word by a variable number of bits. +// +def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; + + +// +// Format: SRAV ry, rx MIPS16e +// Purpose: Shift Word Right Arithmetic Variable +// To execute an arithmetic right-shift of a word by a variable +// number of bits. +// +def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>; + + +// +// Format: SRA rx, ry, sa MIPS16e +// Purpose: Shift Word Right Arithmetic (Extended) +// To execute an arithmetic right-shift of a word by a fixed +// number of bits—1 to 8 bits. +// +def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>; + + +// +// Format: SRLV ry, rx MIPS16e +// Purpose: Shift Word Right Logical Variable +// To execute a logical right-shift of a word by a variable +// number of bits. +// +def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>; + + +// +// Format: SRL rx, ry, sa MIPS16e +// Purpose: Shift Word Right Logical (Extended) +// To execute a logical right-shift of a word by a fixed +// number of bits—1 to 31 bits. +// +def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>; + +// +// Format: SUBU rz, rx, ry MIPS16e +// Purpose: Subtract Unsigned Word +// To subtract 32-bit integers +// +def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; + +// +// Format: SW ry, offset(rx) MIPS16e +// Purpose: Store Word (Extended) +// To store a word to memory. +// +def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; + +// +// Format: XOR rx, ry MIPS16e +// Purpose: Xor +// To do a bitwise logical XOR. +// +def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>; + +class Mips16Pat : Pat { + let Predicates = [InMips16Mode]; +} + +// Unary Arith/Logic +// +class ArithLogicU_pat : + Mips16Pat<(OpNode CPU16Regs:$r), + (I CPU16Regs:$r)>; + +def: ArithLogicU_pat; +def: ArithLogicU_pat; + +class ArithLogic16_pat : + Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r), + (I CPU16Regs:$l, CPU16Regs:$r)>; + +def: ArithLogic16_pat; +def: ArithLogic16_pat; +def: ArithLogic16_pat; +def: ArithLogic16_pat; +def: ArithLogic16_pat; + +// Arithmetic and logical instructions with 2 register operands. + +class ArithLogicI16_pat : + Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm), + (I CPU16Regs:$in, imm_type:$imm)>; + +def: ArithLogicI16_pat; +def: ArithLogicI16_pat; +def: ArithLogicI16_pat; +def: ArithLogicI16_pat; + +class shift_rotate_reg16_pat : + Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra), + (I CPU16Regs:$r, CPU16Regs:$ra)>; + +def: shift_rotate_reg16_pat; +def: shift_rotate_reg16_pat; +def: shift_rotate_reg16_pat; + +class LoadM16_pat : + Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>; + +def: LoadM16_pat; +def: LoadM16_pat; +def: LoadM16_pat; +def: LoadM16_pat; +def: LoadM16_pat; + +class StoreM16_pat : + Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>; + +def: StoreM16_pat; +def: StoreM16_pat; +def: StoreM16_pat; + + +// Jump and Link (Call) +let isCall=1, hasDelaySlot=1 in +def JumpLinkReg16: + FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs), + "jalr \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch>; + +// Mips16 pseudos +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, + hasExtraSrcRegAllocReq = 1 in +def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; + +// Small immediates +def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; + +def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), + (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp new file mode 100644 index 0000000..c15d1bf --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -0,0 +1,111 @@ +//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16RegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void Mips16RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always + // referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object + // is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, + // its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); + + +} diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h new file mode 100644 index 0000000..3f4b3a7 --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -0,0 +1,37 @@ +//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16REGISTERINFO_H +#define MIPS16REGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class Mips16RegisterInfo : public MipsRegisterInfo { +public: + Mips16RegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 0382869..20fc178 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -49,21 +49,24 @@ class Div64 func, string instr_asm, InstrItinClass itin>: Div; multiclass Atomic2Ops64 { - def #NAME# : Atomic2Ops, Requires<[NotN64]>; - def _P8 : Atomic2Ops, Requires<[IsN64]> { + def #NAME# : Atomic2Ops, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : Atomic2Ops, + Requires<[IsN64, HasStandardEncoding]> { let isCodeGenOnly = 1; } } multiclass AtomicCmpSwap64 { - def #NAME# : AtomicCmpSwap, Requires<[NotN64]>; + def #NAME# : AtomicCmpSwap, + Requires<[NotN64, HasStandardEncoding]>; def _P8 : AtomicCmpSwap, - Requires<[IsN64]> { + Requires<[IsN64, HasStandardEncoding]> { let isCodeGenOnly = 1; } } } -let usesCustomInserter = 1, Predicates = [HasMips64], +let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding], DecoderNamespace = "Mips64" in { defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64; defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64; @@ -106,9 +109,15 @@ def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>; def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; +let Pattern = [] in { +def DSLL32 : shift_rotate_imm64<0x3c, 0x00, "dsll32", shl>; +def DSRL32 : shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl>; +def DSRA32 : shift_rotate_imm64<0x3f, 0x00, "dsra32", sra>; +} } // Rotate Instructions -let Predicates = [HasMips64r2], DecoderNamespace = "Mips64" in { +let Predicates = [HasMips64r2, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>; def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>; } @@ -137,18 +146,34 @@ defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; defm ULD : LoadM64<0x37, "uld", load_u, 1>; defm USD : StoreM64<0x3f, "usd", store_u, 1>; +/// load/store left/right +let isCodeGenOnly = 1 in { + defm LWL64 : LoadLeftRightM64<0x22, "lwl", MipsLWL>; + defm LWR64 : LoadLeftRightM64<0x26, "lwr", MipsLWR>; + defm SWL64 : StoreLeftRightM64<0x2a, "swl", MipsSWL>; + defm SWR64 : StoreLeftRightM64<0x2e, "swr", MipsSWR>; +} +defm LDL : LoadLeftRightM64<0x1a, "ldl", MipsLDL>; +defm LDR : LoadLeftRightM64<0x1b, "ldr", MipsLDR>; +defm SDL : StoreLeftRightM64<0x2c, "sdl", MipsSDL>; +defm SDR : StoreLeftRightM64<0x2d, "sdr", MipsSDR>; + /// Load-linked, Store-conditional -def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>; -def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]> { +def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, + Requires<[NotN64, HasStandardEncoding]>; +def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, + Requires<[IsN64, HasStandardEncoding]> { let isCodeGenOnly = 1; } -def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; -def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]> { +def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, + Requires<[NotN64, HasStandardEncoding]>; +def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, + Requires<[IsN64, HasStandardEncoding]> { let isCodeGenOnly = 1; } /// Jump and Branch Instructions -def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; +def JR64 : IndirectBranch; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; @@ -183,74 +208,75 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; -def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; +def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; } let Uses = [SP_64], DecoderNamespace = "Mips64" in -def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, - Requires<[IsN64]> { - let isCodeGenOnly = 1; -} +def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, + Requires<[IsN64, HasStandardEncoding]>; let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware; def DEXT : ExtBase<3, "dext", CPU64Regs>; def DINS : InsBase<7, "dins", CPU64Regs>; -def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "dsll\t$rd, $rt, 32", [], IIAlu>; -def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; -let isCodeGenOnly = 1 in -def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; +let isCodeGenOnly = 1, rs = 0, shamt = 0 in { + def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "dsll\t$rd, $rt, 32", [], IIAlu>; + def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; + def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; +} } //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// // extended loads -let Predicates = [NotN64] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; +let Predicates = [NotN64, HasStandardEncoding] in { + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>; } -let Predicates = [IsN64] in { - def : Pat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; - def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; - def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; +let Predicates = [IsN64, HasStandardEncoding] in { + def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>; + def : MipsPat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>; + def : MipsPat<(zextloadi32_u addr:$a), + (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>; } // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>; - -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), - (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), - (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), - (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; -def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), + (DADDiu ZERO_64, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), + (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), + (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), + (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; def : WrapperPat; def : WrapperPat; @@ -270,19 +296,22 @@ defm : SetgePats; defm : SetgeImmPats; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, + Requires<[IsN64, HasStandardEncoding]>; // truncate -def : Pat<(i32 (trunc CPU64Regs:$src)), - (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; +def : MipsPat<(i32 (trunc CPU64Regs:$src)), + (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, + Requires<[IsN64, HasStandardEncoding]>; // 32-to-64-bit extension -def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; -def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; -def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : MipsPat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>; +def : MipsPat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; // Sign extend in register -def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>; +def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), + (SLL64_64 CPU64Regs:$src)>; -// bswap pattern -def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; +// bswap MipsPattern +def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 8206cfc..00ff754 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,29 +13,29 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "MipsAsmPrinter.h" #include "Mips.h" +#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" +#include "MipsMCInstLower.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -43,19 +43,6 @@ using namespace llvm; -void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) { - MCInst TmpInst; - - MCInstLowering.Lower(MI, TmpInst); - OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tat")); - OutStreamer.EmitInstruction(TmpInst); - if (MipsFI->getEmitNOAT()) - OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); - OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); -} - bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { MipsFI = MF.getInfo(); AsmPrinter::runOnMachineFunction(MF); @@ -71,84 +58,33 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - unsigned Opc = MI->getOpcode(); - MCInst TmpInst0; - SmallVector MCInsts; - - switch (Opc) { - case Mips::ULW: - case Mips::ULH: - case Mips::ULHu: - case Mips::USW: - case Mips::USH: - case Mips::ULW_P8: - case Mips::ULH_P8: - case Mips::ULHu_P8: - case Mips::USW_P8: - case Mips::USH_P8: - case Mips::ULD: - case Mips::ULW64: - case Mips::ULH64: - case Mips::ULHu64: - case Mips::USD: - case Mips::USW64: - case Mips::USH64: - case Mips::ULD_P8: - case Mips::ULW64_P8: - case Mips::ULH64_P8: - case Mips::ULHu64_P8: - case Mips::USD_P8: - case Mips::USW64_P8: - case Mips::USH64_P8: { - if (OutStreamer.hasRawTextSupport()) { - EmitInstrWithMacroNoAT(MI); - return; - } - - MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); - for (SmallVector::iterator I = MCInsts.begin(); I - != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); - - return; - } - case Mips::CPRESTORE: { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); - int64_t Offset = MO.getImm(); - - if (OutStreamer.hasRawTextSupport()) { - if (!isInt<16>(Offset)) { - EmitInstrWithMacroNoAT(MI); + // Direct object specific instruction lowering + if (!OutStreamer.hasRawTextSupport()) + switch (MI->getOpcode()) { + case Mips::DSLL: + case Mips::DSRL: + case Mips::DSRA: + assert(MI->getNumOperands() == 3 && + "Invalid no. of machine operands for shift!"); + assert(MI->getOperand(2).isImm()); + int64_t Shift = MI->getOperand(2).getImm(); + if (Shift > 31) { + MCInst TmpInst0; + MCInstLowering.LowerLargeShift(MI, TmpInst0, Shift - 32); + OutStreamer.EmitInstruction(TmpInst0); return; } - } else { - MCInstLowering.LowerCPRESTORE(Offset, MCInsts); - - for (SmallVector::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); - - return; + break; } - break; - } - case Mips::SETGP01: { - MCInstLowering.LowerSETGP01(MI, MCInsts); - - for (SmallVector::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); - - return; - } - default: - break; - } + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); - MCInstLowering.Lower(MI, TmpInst0); - OutStreamer.EmitInstruction(TmpInst0); + do { + MCInst TmpInst0; + MCInstLowering.Lower(I++, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); + } while ((I != E) && I->isInsideBundle()); } //===----------------------------------------------------------------------===// @@ -197,9 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector &CSI = MFI->getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. - unsigned CPURegSize = Mips::CPURegsRegisterClass->getSize(); - unsigned FGR32RegSize = Mips::FGR32RegisterClass->getSize(); - unsigned AFGR64RegSize = Mips::AFGR64RegisterClass->getSize(); + unsigned CPURegSize = Mips::CPURegsRegClass.getSize(); + unsigned FGR32RegSize = Mips::FGR32RegClass.getSize(); + unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; unsigned i, e = CSI.size(); @@ -207,11 +143,11 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { // Set FPU Bitmask. for (i = 0; i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (Mips::CPURegsRegisterClass->contains(Reg)) + if (Mips::CPURegsRegClass.contains(Reg)) break; unsigned RegNum = getMipsRegisterNumbering(Reg); - if (Mips::AFGR64RegisterClass->contains(Reg)) { + if (Mips::AFGR64RegClass.contains(Reg)) { FPUBitmask |= (3 << RegNum); CSFPRegsSize += AFGR64RegSize; HasAFGR64Reg = true; @@ -283,8 +219,15 @@ const char *MipsAsmPrinter::getCurrentABIString() const { } void MipsAsmPrinter::EmitFunctionEntryLabel() { - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { + if (Subtarget->inMips16Mode()) + OutStreamer.EmitRawText(StringRef("\t.set\tmips16")); + else + OutStreamer.EmitRawText(StringRef("\t.set\tnomips16")); + // leave out until FSF available gas has micromips changes + // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips")); OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); + } OutStreamer.EmitLabel(CurrentFnSym); } @@ -295,10 +238,6 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { emitFrameDirective(); - bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) && - Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() && - MipsFI->globalBaseRegFixed(); - if (OutStreamer.hasRawTextSupport()) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -306,20 +245,9 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(OS.str()); OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); - - // Emit .cpload directive if needed. - if (EmitCPLoad) - OutStreamer.EmitRawText(StringRef("\t.cpload\t$25")); - OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); if (MipsFI->getEmitNOAT()) OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); - } else if (EmitCPLoad) { - SmallVector MCInsts; - MCInstLowering.LowerCPLOAD(MCInsts); - for (SmallVector::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); } } @@ -382,14 +310,99 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* } // Print out an operand for an inline asm expression. -bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, +bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. - printOperand(MI, OpNo, O); + const MachineOperand &MO = MI->getOperand(OpNum); + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI,OpNum,AsmVariant,ExtraCode,O); + case 'X': // hex const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << StringRef(utohexstr(MO.getImm())).lower(); + return false; + case 'x': // hex const int (low 16 bits) + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << "0x" << StringRef(utohexstr(MO.getImm() & 0xffff)).lower(); + return false; + case 'd': // decimal const int + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'm': // decimal const int minus 1 + if ((MO.getType()) != MachineOperand::MO_Immediate) + return true; + O << MO.getImm() - 1; + return false; + case 'z': { + // $0 if zero, regular printing otherwise + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + int64_t Val = MO.getImm(); + if (Val) + O << Val; + else + O << "$0"; + return false; + } + case 'D': // Second part of a double word register operand + case 'L': // Low order register of a double word register operand + case 'M': // High order register of a double word register operand + { + if (OpNum == 0) + return true; + const MachineOperand &FlagsOP = MI->getOperand(OpNum - 1); + if (!FlagsOP.isImm()) + return true; + unsigned Flags = FlagsOP.getImm(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + // Number of registers represented by this operand. We are looking + // for 2 for 32 bit mode and 1 for 64 bit mode. + if (NumVals != 2) { + if (Subtarget->isGP64bit() && NumVals == 1 && MO.isReg()) { + unsigned Reg = MO.getReg(); + O << '$' << MipsInstPrinter::getRegisterName(Reg); + return false; + } + return true; + } + + unsigned RegOp = OpNum; + if (!Subtarget->isGP64bit()){ + // Endianess reverses which register holds the high or low value + // between M and L. + switch(ExtraCode[0]) { + case 'M': + RegOp = (Subtarget->isLittle()) ? OpNum + 1 : OpNum; + break; + case 'L': + RegOp = (Subtarget->isLittle()) ? OpNum : OpNum + 1; + break; + case 'D': // Always the second part + RegOp = OpNum + 1; + } + if (RegOp >= MI->getNumOperands()) + return true; + const MachineOperand &MO = MI->getOperand(RegOp); + if (!MO.isReg()) + return true; + unsigned Reg = MO.getReg(); + O << '$' << MipsInstPrinter::getRegisterName(Reg); + return false; + } + } + } + } + + printOperand(MI, OpNum, O); return false; } @@ -398,11 +411,12 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. + return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isReg() && "unexpected inline asm memory operand"); O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")"; + return false; } @@ -450,7 +464,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_BlockAddress: { - MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress()); + MCSymbol *BA = GetBlockAddressSymbol(MO.getBlockAddress()); O << BA->getName(); break; } @@ -511,7 +525,7 @@ printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) { void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { - const MachineOperand& MO = MI->getOperand(opNum); + const MachineOperand &MO = MI->getOperand(opNum); O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 4b7e1d3..8aadefd 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -145,6 +145,58 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips FastCC Calling Convention +//===----------------------------------------------------------------------===// +def CC_MipsO32_FastCC : CallingConv<[ + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>, + + // Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned. + CCIfType<[f64], CCAssignToStack<8, 8>> +]>; + +def CC_MipsN_FastCC : CallingConv<[ + // Integer arguments are passed in integer registers. + CCIfType<[i64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, T0_64, T1_64, + T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, + T8_64, V1_64]>>, + + // f64 arguments are passed in double-precision floating pointer registers. + CCIfType<[f64], CCAssignToReg<[D0_64, D1_64, D2_64, D3_64, D4_64, D5_64, + D6_64, D7_64, D8_64, D9_64, D10_64, D11_64, + D12_64, D13_64, D14_64, D15_64, D16_64, D17_64, + D18_64, D19_64]>>, + + // Stack parameter slots for i64 and f64 are 64-bit doublewords and + // 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def CC_Mips_FastCC : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // Integer arguments are passed in integer registers. All scratch registers, + // except for AT, V0 and T9, are available to be used as argument registers. + CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, + T7, T8, V1]>>, + + // f32 arguments are passed in single-precision floating pointer registers. + CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, + F11, F12, F13, F14, F15, F16, F17, F18, F19]>>, + + // Stack parameter slots for i32 and f32 are 32-bit words and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + CCIfSubtarget<"isABI_EABI()", CCDelegateTo>, + CCIfSubtarget<"isABI_O32()", CCDelegateTo>, + CCDelegateTo +]>; + +//===----------------------------------------------------------------------===// // Mips Calling Convention Dispatch //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 7d81902..cb7022b 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -145,8 +145,8 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E; ++I) emitInstruction(*I); } } while (MCE.finishFunction(MF)); @@ -258,7 +258,7 @@ void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV, void MipsCodeEmitter:: emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, ES, 0, 0, false)); + Reloc, ES, 0, 0)); } void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index da33680..b12b1f2 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -61,41 +61,54 @@ multiclass MovzPats0 { - def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; - def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; - def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; - def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; + def : MipsPat< + (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat< + (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; } multiclass MovzPats1 { - def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>; +} + +multiclass MovzPats2 { + def : MipsPat< + (select (i32 (seteq CRC:$lhs, immZExt16:$uimm16)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (XORiOp CRC:$lhs, immZExt16:$uimm16), DRC:$F)>; } multiclass MovnPats { - def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : Pat<(select CRC:$cond, DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; - def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), - (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select CRC:$cond, DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>; + def : MipsPat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F), + (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>; } // Instantiation of instructions. def MOVZ_I_I : CondMovIntInt; -let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_I64 : CondMovIntInt; def MOVZ_I64_I : CondMovIntInt { let isCodeGenOnly = 1; @@ -106,7 +119,8 @@ let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { } def MOVN_I_I : CondMovIntInt; -let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVN_I_I64 : CondMovIntInt; def MOVN_I64_I : CondMovIntInt { let isCodeGenOnly = 1; @@ -118,21 +132,22 @@ let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { def MOVZ_I_S : CondMovIntFP; def MOVZ_I64_S : CondMovIntFP, - Requires<[HasMips64]> { + Requires<[HasMips64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } def MOVN_I_S : CondMovIntFP; def MOVN_I64_S : CondMovIntFP, - Requires<[HasMips64]> { + Requires<[HasMips64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } -let Predicates = [NotFP64bit] in { +let Predicates = [NotFP64bit, HasStandardEncoding] in { def MOVZ_I_D32 : CondMovIntFP; def MOVN_I_D32 : CondMovIntFP; } -let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CondMovIntFP; def MOVZ_I64_D64 : CondMovIntFP { let isCodeGenOnly = 1; @@ -145,24 +160,25 @@ let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in { def MOVT_I : CondMovFPInt; def MOVT_I64 : CondMovFPInt, - Requires<[HasMips64]> { + Requires<[HasMips64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } def MOVF_I : CondMovFPInt; def MOVF_I64 : CondMovFPInt, - Requires<[HasMips64]> { + Requires<[HasMips64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } def MOVT_S : CondMovFPFP; def MOVF_S : CondMovFPFP; -let Predicates = [NotFP64bit] in { +let Predicates = [NotFP64bit, HasStandardEncoding] in { def MOVT_D32 : CondMovFPFP; def MOVF_D32 : CondMovFPFP; } -let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def MOVT_D64 : CondMovFPFP; def MOVF_D64 : CondMovFPFP; } @@ -170,7 +186,8 @@ let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { // Instantiation of conditional move patterns. defm : MovzPats0; defm : MovzPats1; -let Predicates = [HasMips64] in { +defm : MovzPats2; +let Predicates = [HasMips64, HasStandardEncoding] in { defm : MovzPats0; defm : MovzPats0; @@ -179,10 +196,13 @@ let Predicates = [HasMips64] in { defm : MovzPats1; defm : MovzPats1; defm : MovzPats1; + defm : MovzPats2; + defm : MovzPats2; + defm : MovzPats2; } defm : MovnPats; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64, HasStandardEncoding] in { defm : MovnPats; defm : MovnPats; defm : MovnPats; @@ -191,19 +211,19 @@ let Predicates = [HasMips64] in { defm : MovzPats0; defm : MovzPats1; defm : MovnPats; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64, HasStandardEncoding] in { defm : MovzPats0; defm : MovzPats1; defm : MovnPats; } -let Predicates = [NotFP64bit] in { +let Predicates = [NotFP64bit, HasStandardEncoding] in { defm : MovzPats0; defm : MovzPats1; defm : MovnPats; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit, HasStandardEncoding] in { defm : MovzPats0; defm : MovzPats0; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index debf2f1..2bba8a3 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -36,12 +36,21 @@ static cl::opt EnableDelaySlotFiller( cl::desc("Fill the Mips delay slots useful instructions."), cl::Hidden); +// This option can be used to silence complaints by machine verifier passes. +static cl::opt SkipDelaySlotFiller( + "skip-mips-delay-filler", + cl::init(false), + cl::desc("Skip MIPS' delay slot filling pass."), + cl::Hidden); + namespace { struct Filler : public MachineFunctionPass { + typedef MachineBasicBlock::instr_iterator InstrIter; + typedef MachineBasicBlock::reverse_instr_iterator ReverseInstrIter; TargetMachine &TM; const TargetInstrInfo *TII; - MachineBasicBlock::iterator LastFiller; + InstrIter LastFiller; static char ID; Filler(TargetMachine &tm) @@ -53,6 +62,9 @@ namespace { bool runOnMachineBasicBlock(MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &F) { + if (SkipDelaySlotFiller) + return false; + bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) @@ -61,27 +73,27 @@ namespace { } bool isDelayFiller(MachineBasicBlock &MBB, - MachineBasicBlock::iterator candidate); + InstrIter candidate); - void insertCallUses(MachineBasicBlock::iterator MI, - SmallSet& RegDefs, - SmallSet& RegUses); + void insertCallUses(InstrIter MI, + SmallSet &RegDefs, + SmallSet &RegUses); - void insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet& RegDefs, - SmallSet& RegUses); + void insertDefsUses(InstrIter MI, + SmallSet &RegDefs, + SmallSet &RegUses); - bool IsRegInSet(SmallSet& RegSet, + bool IsRegInSet(SmallSet &RegSet, unsigned Reg); - bool delayHasHazard(MachineBasicBlock::iterator candidate, + bool delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet &RegDefs, SmallSet &RegUses); bool - findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler); + findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, + InstrIter &Filler); }; @@ -93,14 +105,14 @@ namespace { bool Filler:: runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; - LastFiller = MBB.end(); + LastFiller = MBB.instr_end(); - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + for (InstrIter I = MBB.instr_begin(); I != MBB.instr_end(); ++I) if (I->hasDelaySlot()) { ++FilledSlots; Changed = true; - MachineBasicBlock::iterator D; + InstrIter D; if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) { MBB.splice(llvm::next(I), &MBB, D); @@ -111,6 +123,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { // Record the filler instruction that filled the delay slot. // The instruction after it will be visited in the next iteration. LastFiller = ++I; + + // Set InsideBundle bit so that the machine verifier doesn't expect this + // instruction to be a terminator. + LastFiller->setIsInsideBundle(); } return Changed; @@ -123,8 +139,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { } bool Filler::findDelayInstr(MachineBasicBlock &MBB, - MachineBasicBlock::iterator slot, - MachineBasicBlock::iterator &Filler) { + InstrIter slot, + InstrIter &Filler) { SmallSet RegDefs; SmallSet RegUses; @@ -133,13 +149,13 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, bool sawLoad = false; bool sawStore = false; - for (MachineBasicBlock::reverse_iterator I(slot); I != MBB.rend(); ++I) { + for (ReverseInstrIter I(slot); I != MBB.instr_rend(); ++I) { // skip debug value if (I->isDebugValue()) continue; // Convert to forward iterator. - MachineBasicBlock::iterator FI(llvm::next(I).base()); + InstrIter FI(llvm::next(I).base()); if (I->hasUnmodeledSideEffects() || I->isInlineAsm() @@ -165,7 +181,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, return false; } -bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, +bool Filler::delayHasHazard(InstrIter candidate, bool &sawLoad, bool &sawStore, SmallSet &RegDefs, SmallSet &RegUses) { @@ -213,9 +229,9 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, } // Insert Defs and Uses of MI into the sets RegDefs and RegUses. -void Filler::insertDefsUses(MachineBasicBlock::iterator MI, - SmallSet& RegDefs, - SmallSet& RegUses) { +void Filler::insertDefsUses(InstrIter MI, + SmallSet &RegDefs, + SmallSet &RegUses) { // If MI is a call or return, just examine the explicit non-variadic operands. MCInstrDesc MCID = MI->getDesc(); unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : @@ -240,14 +256,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, } //returns true if the Reg or its alias is in the RegSet. -bool Filler::IsRegInSet(SmallSet& RegSet, unsigned Reg) { - if (RegSet.count(Reg)) - return true; - // check Aliased Registers - for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg); - *Alias; ++Alias) - if (RegSet.count(*Alias)) +bool Filler::IsRegInSet(SmallSet &RegSet, unsigned Reg) { + // Check Reg and all aliased Registers. + for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); + AI.isValid(); ++AI) + if (RegSet.count(*AI)) return true; - return false; } diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp deleted file mode 100644 index 119d1a8..0000000 --- a/lib/Target/Mips/MipsEmitGPRestore.cpp +++ /dev/null @@ -1,97 +0,0 @@ -//===-- MipsEmitGPRestore.cpp - Emit GP Restore Instruction ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass emits instructions that restore $gp right -// after jalr instructions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "emit-gp-restore" - -#include "Mips.h" -#include "MipsTargetMachine.h" -#include "MipsMachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/Statistic.h" - -using namespace llvm; - -namespace { - struct Inserter : public MachineFunctionPass { - - TargetMachine &TM; - const TargetInstrInfo *TII; - - static char ID; - Inserter(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } - - virtual const char *getPassName() const { - return "Mips Emit GP Restore"; - } - - bool runOnMachineFunction(MachineFunction &F); - }; - char Inserter::ID = 0; -} // end of anonymous namespace - -bool Inserter::runOnMachineFunction(MachineFunction &F) { - MipsFunctionInfo *MipsFI = F.getInfo(); - - if ((TM.getRelocationModel() != Reloc::PIC_) || - (!MipsFI->globalBaseRegFixed())) - return false; - - bool Changed = false; - int FI = MipsFI->getGPFI(); - - for (MachineFunction::iterator MFI = F.begin(), MFE = F.end(); - MFI != MFE; ++MFI) { - MachineBasicBlock& MBB = *MFI; - MachineBasicBlock::iterator I = MFI->begin(); - - // If MBB is a landing pad, insert instruction that restores $gp after - // EH_LABEL. - if (MBB.isLandingPad()) { - // Find EH_LABEL first. - for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ; - - // Insert lw. - ++I; - DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI) - .addImm(0); - Changed = true; - } - - while (I != MFI->end()) { - if (I->getOpcode() != Mips::JALR) { - ++I; - continue; - } - - DebugLoc dl = I->getDebugLoc(); - // emit lw $gp, ($gp save slot on stack) after jalr - BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI) - .addImm(0); - Changed = true; - } - } - - return Changed; -} - -/// createMipsEmitGPRestorePass - Returns a pass that emits instructions that -/// restores $gp clobbered by jalr instructions. -FunctionPass *llvm::createMipsEmitGPRestorePass(MipsTargetMachine &tm) { - return new Inserter(tm); -} - diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp deleted file mode 100644 index baeae97..0000000 --- a/lib/Target/Mips/MipsExpandPseudo.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//===-- MipsExpandPseudo.cpp - Expand Pseudo Instructions ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass expands pseudo instructions into target instructions after register -// allocation but before post-RA scheduling. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mips-expand-pseudo" - -#include "Mips.h" -#include "MipsTargetMachine.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/Statistic.h" - -using namespace llvm; - -namespace { - struct MipsExpandPseudo : public MachineFunctionPass { - - TargetMachine &TM; - const TargetInstrInfo *TII; - - static char ID; - MipsExpandPseudo(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } - - virtual const char *getPassName() const { - return "Mips PseudoInstrs Expansion"; - } - - bool runOnMachineFunction(MachineFunction &F); - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - - private: - void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator); - void ExpandExtractElementF64(MachineBasicBlock&, - MachineBasicBlock::iterator); - }; - char MipsExpandPseudo::ID = 0; -} // end of anonymous namespace - -bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) { - bool Changed = false; - - for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I) - Changed |= runOnMachineBasicBlock(*I); - - return Changed; -} - -bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { - - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { - const MCInstrDesc& MCid = I->getDesc(); - - switch(MCid.getOpcode()) { - default: - ++I; - continue; - case Mips::SETGP2: - // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9" - BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu), - I->getOperand(0).getReg()) - .addReg(Mips::V0).addReg(I->getOperand(1).getReg()); - break; - case Mips::BuildPairF64: - ExpandBuildPairF64(MBB, I); - break; - case Mips::ExtractElementF64: - ExpandExtractElementF64(MBB, I); - break; - } - - // delete original instr - MBB.erase(I++); - Changed = true; - } - - return Changed; -} - -void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, - MachineBasicBlock::iterator I) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); - const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); - DebugLoc dl = I->getDebugLoc(); - const uint16_t* SubReg = - TM.getRegisterInfo()->getSubRegisters(DstReg); - - // mtc1 Lo, $fp - // mtc1 Hi, $fp + 1 - BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg); - BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg); -} - -void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, - MachineBasicBlock::iterator I) { - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); - unsigned N = I->getOperand(2).getImm(); - const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); - DebugLoc dl = I->getDebugLoc(); - const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); - - BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N)); -} - -/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo -/// instrs into real instrs -FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) { - return new MipsExpandPseudo(tm); -} diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index f8ea3d0..8c0474b 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -15,6 +15,7 @@ #include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -81,6 +82,14 @@ using namespace llvm; // //===----------------------------------------------------------------------===// +const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM, + const MipsSubtarget &ST) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16FrameLowering(ST); + + return llvm::createMipsSEFrameLowering(ST); +} + // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -89,238 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } - -bool MipsFrameLowering::targetHandlesStackFrameRounding() const { - return true; -} - -// Build an instruction sequence to load an immediate that is too large to fit -// in 16-bit and add the result to Reg. -static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, - const MipsInstrInfo &TII, MachineBasicBlock& MBB, - MachineBasicBlock::iterator II, DebugLoc DL) { - unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; - unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate AnalyzeImm; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq. - for (++Inst; Inst != Seq.end(); ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); -} - -void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo(); - const MipsRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const MipsInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // First, compute final stack size. - unsigned RegSize = STI.isGP32bit() ? 4 : 8; - unsigned StackAlign = getStackAlignment(); - unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? - (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : - MipsFI->getMaxCallFrameSize(); - uint64_t StackSize = RoundUpToAlignment(LocalVarAreaOffset, StackAlign) + - RoundUpToAlignment(MFI->getStackSize(), StackAlign); - - // Update stack size - MFI->setStackSize(StackSize); - - // Emit instructions that set the global base register if the target ABI is - // O32. - if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() && - !MipsFI->globalBaseRegFixed()) { - // See MipsInstrInfo.td for explanation. - MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock(); - MF.insert(&MBB, NewEntry); - NewEntry->addSuccessor(&MBB); - - // Copy live in registers. - for (MachineBasicBlock::livein_iterator R = MBB.livein_begin(); - R != MBB.livein_end(); ++R) - NewEntry->addLiveIn(*R); - - BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01), - Mips::V0); - } - - // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->adjustsStack()) return; - - MachineModuleInfo &MMI = MF.getMMI(); - std::vector &Moves = MMI.getFrameMoves(); - MachineLocation DstML, SrcML; - - // Adjust stack. - if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); - else { // Expand immediate that doesn't fit in 16-bit. - MipsFI->setEmitNOAT(); - expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); - } - - // emit ".cfi_def_cfa_offset StackSize" - MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); - DstML = MachineLocation(MachineLocation::VirtualFP); - SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); - Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); - - const std::vector &CSI = MFI->getCalleeSavedInfo(); - - if (CSI.size()) { - // Find the instruction past the last instruction that saves a callee-saved - // register to the stack. - for (unsigned i = 0; i < CSI.size(); ++i) - ++MBBI; - - // Iterate over list of callee-saved registers and emit .cfi_offset - // directives. - MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); - - // If Reg is a double precision register, emit two cfa_offsets, - // one for each of the paired single precision registers. - if (Mips::AFGR64RegisterClass->contains(Reg)) { - const uint16_t *SubRegs = RegInfo->getSubRegisters(Reg); - MachineLocation DstML0(MachineLocation::VirtualFP, Offset); - MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); - MachineLocation SrcML0(*SubRegs); - MachineLocation SrcML1(*(SubRegs + 1)); - - if (!STI.isLittle()) - std::swap(SrcML0, SrcML1); - - Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); - Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); - } - else { - // Reg is either in CPURegs or FGR32. - DstML = MachineLocation(MachineLocation::VirtualFP, Offset); - SrcML = MachineLocation(Reg); - Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); - } - } - } - - // if framepointer enabled, set it to point to the stack pointer. - if (hasFP(MF)) { - // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); - - // emit ".cfi_def_cfa_register $fp" - MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); - DstML = MachineLocation(FP); - SrcML = MachineLocation(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); - } - - // Restore GP from the saved stack location - if (MipsFI->needGPSaveRestore()) { - unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI()); - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset) - .addReg(Mips::GP); - } -} - -void MipsFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const MipsInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - DebugLoc dl = MBBI->getDebugLoc(); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // if framepointer enabled, restore the stack pointer. - if (hasFP(MF)) { - // Find the first instruction that restores a callee-saved register. - MachineBasicBlock::iterator I = MBBI; - - for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) - --I; - - // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); - } - - // Get the number of bytes from FrameInfo - uint64_t StackSize = MFI->getStackSize(); - - if (!StackSize) - return; - - // Adjust stack. - if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize) - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); - else // Expand immediate that doesn't fit in 16-bit. - expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); -} - -void MipsFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineRegisterInfo& MRI = MF.getRegInfo(); - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - - // FIXME: remove this code if register allocator can correctly mark - // $fp and $ra used or unused. - - // Mark $fp and $ra as used or unused. - if (hasFP(MF)) - MRI.setPhysRegUsed(FP); - - // The register allocator might determine $ra is used after seeing - // instruction "jr $ra", but we do not want PrologEpilogInserter to insert - // instructions to save/restore $ra unless there is a function call. - // To correct this, $ra is explicitly marked unused if there is no - // function call. - if (MF.getFrameInfo()->hasCalls()) - MRI.setPhysRegUsed(Mips::RA); - else { - MRI.setPhysRegUnused(Mips::RA); - MRI.setPhysRegUnused(Mips::RA_64); - } -} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index bd1d89f..ed7b7fe 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -27,23 +27,19 @@ protected: public: explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0), - STI(sti) { - } + : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0, + sti.hasMips64() ? 16 : 8), STI(sti) {} - bool targetHandlesStackFrameRounding() const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + static const MipsFrameLowering *create(MipsTargetMachine &TM, + const MipsSubtarget &ST); bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; }; +/// Create MipsInstrInfo objects. +const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST); +const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST); + } // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index f0651c6..5a97c17 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -125,20 +125,19 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - bool FixGlobalBaseReg = MipsFI->globalBaseRegFixed(); - - if (Subtarget.isABI_O32() && FixGlobalBaseReg) - // $gp is the global base register. - V0 = V1 = GlobalBaseReg; - else { - const TargetRegisterClass *RC; - RC = Subtarget.isABI_N64() ? - Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - } + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else if (Subtarget.inMips16Mode()) + RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); if (Subtarget.isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); @@ -150,10 +149,25 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { const GlobalValue *FName = MF.getFunction(); BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - } else if (MF.getTarget().getRelocationModel() == Reloc::Static) { + return; + } + + if (Subtarget.inMips16Mode()) { + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { // Set global register to __gnu_local_gp. // // lui $v0, %hi(__gnu_local_gp) @@ -162,27 +176,48 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - } else { - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - } else if (!MipsFI->globalBaseRegFixed()) { - assert(Subtarget.isABI_O32()); - - BuildMI(MBB, I, DL, TII.get(Mips::SETGP2), GlobalBaseReg) - .addReg(Mips::T9); - } + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); } bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, @@ -207,12 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E; ++U) { + E = MRI->use_end(); U != E;) { MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); MachineInstr *MI = MO.getParent(); + ++U; // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo())) + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; MO.setReg(ZeroReg); @@ -253,21 +290,6 @@ bool MipsDAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); - // If Parent is an unaligned f32 load or store, select a (base + index) - // floating point load/store instruction (luxc1 or suxc1). - const LSBaseSDNode* LS = 0; - - if (Parent && (LS = dyn_cast(Parent))) { - EVT VT = LS->getMemoryVT(); - - if (VT.getSizeInBits() / 8 > LS->getAlignment()) { - assert(TLI.allowsUnalignedMemoryAccesses(VT) && - "Unaligned loads/stores not supported for this type."); - if (VT == MVT::f32) - return false; - } - } - // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); @@ -316,17 +338,20 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1); - if (isa(LoVal.getOperand(0)) || - isa(LoVal.getOperand(0))) { + SDValue LoVal = Addr.getOperand(1), Opnd0 = LoVal.getOperand(0); + if (isa(Opnd0) || isa(Opnd0) || + isa(Opnd0)) { Base = Addr.getOperand(0); - Offset = LoVal.getOperand(0); + Offset = Opnd0; return true; } } // If an indexed floating point load/store can be emitted, return false. - if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + const LSBaseSDNode *LS = dyn_cast(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64()) return false; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ace47ab..c5207c6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -81,6 +81,14 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; case MipsISD::Ins: return "MipsISD::Ins"; + case MipsISD::LWL: return "MipsISD::LWL"; + case MipsISD::LWR: return "MipsISD::LWR"; + case MipsISD::SWL: return "MipsISD::SWL"; + case MipsISD::SWR: return "MipsISD::SWR"; + case MipsISD::LDL: return "MipsISD::LDL"; + case MipsISD::LDR: return "MipsISD::LDR"; + case MipsISD::SDL: return "MipsISD::SDL"; + case MipsISD::SDR: return "MipsISD::SDR"; default: return NULL; } } @@ -98,20 +106,25 @@ MipsTargetLowering(MipsTargetMachine &TM) setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // Set up the register classes - addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass); + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); if (HasMips64) - addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass); + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->inMips16Mode()) { + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + addRegisterClass(MVT::i32, &Mips::CPURARegRegClass); + } if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, Mips::FGR32RegisterClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); // When dealing with single precision only, use libcalls if (!Subtarget->isSingleFloat()) { if (HasMips64) - addRegisterClass(MVT::f64, Mips::FGR64RegisterClass); + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); else - addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass); + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); } } @@ -139,15 +152,18 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -161,7 +177,14 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + } + + if (!HasMips64) { + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } setOperationAction(ISD::SDIV, MVT::i32, Expand); @@ -192,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); @@ -199,9 +224,6 @@ MipsTargetLowering(MipsTargetMachine &TM) if (!Subtarget->hasMips64r2()) setOperationAction(ISD::ROTR, MVT::i64, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); @@ -243,9 +265,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setInsertFencesForAtomic(true); - if (Subtarget->isSingleFloat()) - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - if (!Subtarget->hasSEInReg()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -261,6 +280,13 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i64, Expand); } + if (HasMips64) { + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom); + setTruncStoreAction(MVT::i64, MVT::i32, Custom); + } + setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); @@ -268,6 +294,7 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::ADD); setMinFunctionAlignment(HasMips64 ? 3 : 2); @@ -276,6 +303,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); + + maxStoresPerMemcpy = 16; } bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { @@ -284,10 +313,7 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { switch (SVT) { case MVT::i64: case MVT::i32: - case MVT::i16: return true; - case MVT::f32: - return Subtarget->hasMips32r2Or64(); default: return false; } @@ -305,17 +331,17 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { +static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. - SDNode* ADDCNode = ADDENode->getOperand(2).getNode(); + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); if (ADDCNode->getOpcode() != ISD::ADDC) return false; SDValue MultHi = ADDENode->getOperand(0); SDValue MultLo = ADDCNode->getOperand(0); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -378,17 +404,17 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { +static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. - SDNode* SUBCNode = SUBENode->getOperand(2).getNode(); + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); if (SUBCNode->getOpcode() != ISD::SUBC) return false; SDValue MultHi = SUBENode->getOperand(1); SDValue MultLo = SUBCNode->getOperand(1); - SDNode* MultNode = MultHi.getNode(); + SDNode *MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -443,9 +469,9 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { return true; } -static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -456,9 +482,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); @@ -469,9 +495,9 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } -static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -546,7 +572,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) { // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. -static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { +static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; @@ -568,7 +594,7 @@ static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { } // Creates and returns a CMovFPT/F node. -static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, +static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { bool invert = InvertFPCondCode((Mips::CondCode) cast(Cond.getOperand(2)) @@ -578,9 +604,9 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); @@ -604,16 +630,16 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, const DebugLoc DL = N->getDebugLoc(); ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); SDValue True = N->getOperand(1); - + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); - + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } -static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, size, pos @@ -651,9 +677,9 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize, MVT::i32)); } -static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, +static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { + const MipsSubtarget *Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 @@ -705,6 +731,33 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0)); } +static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Add = N->getOperand(1); + + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue Lo = Add.getOperand(1); + + if ((Lo.getOpcode() != MipsISD::Lo) || + (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable)) + return SDValue(); + + EVT ValTy = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0), + Add.getOperand(0)); + return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -720,11 +773,13 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) case ISD::UDIVREM: return PerformDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: - return PerformSELECTCombine(N, DAG, DCI, Subtarget); + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DAG, DCI, Subtarget); + case ISD::ADD: + return PerformADDCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -737,19 +792,25 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); } return SDValue(); } @@ -784,7 +845,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { /* static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, DebugLoc dl, - const MipsSubtarget* Subtarget, + const MipsSubtarget *Subtarget, const TargetInstrInfo *TII, bool isFPCmp, unsigned Opc) { // There is no need to expand CMov instructions if target has @@ -1440,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // Misc Lower Operation implementation //===----------------------------------------------------------------------===// SDValue MipsTargetLowering:: -LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const -{ - MachineFunction &MF = DAG.getMachineFunction(); - MipsFunctionInfo *MipsFI = MF.getInfo(); - unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP; - - assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= - cast(Op.getOperand(2).getNode())->getZExtValue() && - "Cannot lower if the alignment of the allocated space is larger than \ - that of the stack."); - - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - // Get a reference from Mips stack pointer - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy()); - - // Subtract the dynamic size from the actual stack size to - // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size); - - // The Sub result contains the new stack start address, so it - // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue()); - - // This node always has two return values: a new stack pointer - // value and a chain - SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other); - SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); - SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; - - return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3); -} - -SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is @@ -1512,6 +1537,19 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const Op.getDebugLoc()); } +SDValue MipsTargetLowering:: +LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT Ty = Op.getOperand(0).getValueType(); + SDValue Cond = DAG.getNode(ISD::SETCC, DL, getSetCCResultType(Ty), + Op.getOperand(0), Op.getOperand(1), + Op.getOperand(4)); + + return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2), + Op.getOperand(3)); +} + SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = CreateFPCmp(DAG, Op); @@ -1614,10 +1652,13 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - // General Dynamic TLS Model - bool LocalDynamic = GV->hasInternalLinkage(); - unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD; + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + // General Dynamic and Local Dynamic TLS Model. + unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM + : MipsII::MO_TLSGD; + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag); SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), TGA); @@ -1632,16 +1673,16 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const Entry.Ty = PtrTy; Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(DAG.getEntryNode(), PtrTy, + TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy, false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, TlsGetAddr, Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; - if (!LocalDynamic) + if (model != TLSModel::LocalDynamic) return Ret; SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, @@ -1655,7 +1696,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const } SDValue Offset; - if (GV->isDeclaration()) { + if (model == TLSModel::InitialExec) { // Initial Exec TLS Model SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_GOTTPREL); @@ -1666,6 +1707,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const false, false, false, 0); } else { // Local Exec TLS Model + assert(model == TLSModel::LocalExec); SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_TPREL_HI); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, @@ -1942,9 +1984,26 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + // check the depth + assert((cast(Op.getOperand(0))->getZExtValue() == 0) && + "Return address can be determined only for current frame."); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + EVT VT = Op.getValueType(); + unsigned RA = IsN64 ? Mips::RA_64 : Mips::RA; + MFI->setReturnAddressIsTaken(true); + + // Return RA, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), Op.getDebugLoc(), Reg, VT); +} + // TODO: set SType according to the desired memory barrier behavior. SDValue -MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { +MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { unsigned SType = 0; DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), @@ -1952,7 +2011,7 @@ MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const { } SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, - SelectionDAG& DAG) const { + SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; @@ -1961,6 +2020,210 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, DAG.getConstant(SType, MVT::i32)); } +SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + + // if shamt < 32: + // lo = (shl lo, shamt) + // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) + // else: + // lo = 0 + // hi = (shl lo, shamt[4:0]) + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, MVT::i32)); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, + DAG.getConstant(1, MVT::i32)); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, ShiftRight1Lo, + Not); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo); + SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, MVT::i32, Lo, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(0x20, MVT::i32)); + Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, + DAG.getConstant(0, MVT::i32), ShiftLeftLo); + Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, 2, DL); +} + +SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + DebugLoc DL = Op.getDebugLoc(); + SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + + // if shamt < 32: + // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) + // if isSRA: + // hi = (sra hi, shamt) + // else: + // hi = (srl hi, shamt) + // else: + // if isSRA: + // lo = (sra hi, shamt[4:0]) + // hi = (sra hi, 31) + // else: + // lo = (srl hi, shamt[4:0]) + // hi = 0 + SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, + DAG.getConstant(-1, MVT::i32)); + SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, + DAG.getConstant(1, MVT::i32)); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, MVT::i32, ShiftLeft1Hi, Not); + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, MVT::i32, Lo, Shamt); + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i32, ShiftLeftHi, ShiftRightLo); + SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, MVT::i32, + Hi, Shamt); + SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, + DAG.getConstant(0x20, MVT::i32)); + SDValue Shift31 = DAG.getNode(ISD::SRA, DL, MVT::i32, Hi, + DAG.getConstant(31, MVT::i32)); + Lo = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftRightHi, Or); + Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, + IsSRA ? Shift31 : DAG.getConstant(0, MVT::i32), + ShiftRightHi); + + SDValue Ops[2] = {Lo, Hi}; + return DAG.getMergeValues(Ops, 2, DL); +} + +static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, + SDValue Chain, SDValue Src, unsigned Offset) { + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); + EVT BasePtrVT = Ptr.getValueType(); + DebugLoc DL = LD->getDebugLoc(); + SDVTList VTList = DAG.getVTList(VT, MVT::Other); + + if (Offset) + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, + DAG.getConstant(Offset, BasePtrVT)); + + SDValue Ops[] = { Chain, Ptr, Src }; + return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, + LD->getMemOperand()); +} + +// Expand an unaligned 32 or 64-bit integer load node. +SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode *LD = cast(Op); + EVT MemVT = LD->getMemoryVT(); + + // Return if load is aligned or if MemVT is neither i32 nor i64. + if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || + ((MemVT != MVT::i32) && (MemVT != MVT::i64))) + return SDValue(); + + bool IsLittle = Subtarget->isLittle(); + EVT VT = Op.getValueType(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT); + + assert((VT == MVT::i32) || (VT == MVT::i64)); + + // Expand + // (set dst, (i64 (load baseptr))) + // to + // (set tmp, (ldl (add baseptr, 7), undef)) + // (set dst, (ldr baseptr, tmp)) + if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) { + SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef, + IsLittle ? 7 : 0); + return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL, + IsLittle ? 0 : 7); + } + + SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef, + IsLittle ? 3 : 0); + SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL, + IsLittle ? 0 : 3); + + // Expand + // (set dst, (i32 (load baseptr))) or + // (set dst, (i64 (sextload baseptr))) or + // (set dst, (i64 (extload baseptr))) + // to + // (set tmp, (lwl (add baseptr, 3), undef)) + // (set dst, (lwr baseptr, tmp)) + if ((VT == MVT::i32) || (ExtType == ISD::SEXTLOAD) || + (ExtType == ISD::EXTLOAD)) + return LWR; + + assert((VT == MVT::i64) && (ExtType == ISD::ZEXTLOAD)); + + // Expand + // (set dst, (i64 (zextload baseptr))) + // to + // (set tmp0, (lwl (add baseptr, 3), undef)) + // (set tmp1, (lwr baseptr, tmp0)) + // (set tmp2, (shl tmp1, 32)) + // (set dst, (srl tmp2, 32)) + DebugLoc DL = LD->getDebugLoc(); + SDValue Const32 = DAG.getConstant(32, MVT::i32); + SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32); + SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32); + SDValue Ops[] = { SRL, LWR.getValue(1) }; + return DAG.getMergeValues(Ops, 2, DL); +} + +static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, + SDValue Chain, unsigned Offset) { + SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); + EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); + DebugLoc DL = SD->getDebugLoc(); + SDVTList VTList = DAG.getVTList(MVT::Other); + + if (Offset) + Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, + DAG.getConstant(Offset, BasePtrVT)); + + SDValue Ops[] = { Chain, Value, Ptr }; + return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, + SD->getMemOperand()); +} + +// Expand an unaligned 32 or 64-bit integer store node. +SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *SD = cast(Op); + EVT MemVT = SD->getMemoryVT(); + + // Return if store is aligned or if MemVT is neither i32 nor i64. + if ((SD->getAlignment() >= MemVT.getSizeInBits() / 8) || + ((MemVT != MVT::i32) && (MemVT != MVT::i64))) + return SDValue(); + + bool IsLittle = Subtarget->isLittle(); + SDValue Value = SD->getValue(), Chain = SD->getChain(); + EVT VT = Value.getValueType(); + + // Expand + // (store val, baseptr) or + // (truncstore val, baseptr) + // to + // (swl val, (add baseptr, 3)) + // (swr val, baseptr) + if ((VT == MVT::i32) || SD->isTruncatingStore()) { + SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain, + IsLittle ? 3 : 0); + return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3); + } + + assert(VT == MVT::i64); + + // Expand + // (store val, baseptr) + // to + // (sdl val, (add baseptr, 7)) + // (sdr val, baseptr) + SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0); + return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -2153,11 +2416,11 @@ static unsigned getNextIntArgReg(unsigned Reg) { // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector, 16>& RegsToPass, - SmallVector& MemOpChains, int& LastFI, +WriteByValArg(SDValue Chain, DebugLoc dl, + SmallVector, 16> &RegsToPass, + SmallVector &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MVT PtrType, bool isLittle) { unsigned LocMemOffset = VA.getLocMemOffset(); unsigned Offset = 0; @@ -2229,26 +2492,26 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, return; } - // Create a fixed object on stack at offset LocMemOffset and copy - // remaining part of byval arg to it using memcpy. + // Copy remaining part of byval arg using memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, DAG.getConstant(Offset, MVT::i32)); - LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(RemainingSize, MVT::i32), - std::min(ByValAlign, (unsigned)4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } // Copy Mips64 byVal arg to registers and stack. void static -PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, - SmallVector, 16>& RegsToPass, - SmallVector& MemOpChains, int& LastFI, +PassByValArg64(SDValue Chain, DebugLoc dl, + SmallVector, 16> &RegsToPass, + SmallVector &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, EVT PtrTy, bool isLittle) { unsigned ByValSize = Flags.getByValSize(); unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); @@ -2318,30 +2581,35 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, assert(MemCpySize && "MemCpySize must not be zero."); - // Create a fixed object on stack at offset LocMemOffset and copy - // remainder of byval arg to it with memcpy. + // Copy remainder of byval arg to it with memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, DAG.getConstant(Offset, PtrTy)); - LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. SDValue -MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // MIPs target does not yet support tail call optimization. isTailCall = false; @@ -2356,7 +2624,9 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeCallOperands(Outs, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); else if (HasMips64) AnalyzeMips64CallOperands(CCInfo, Outs); @@ -2365,54 +2635,32 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - - // Chain is the output chain of the last Load/Store or CopyToReg node. - // ByValChain is the output chain of the last Memcpy node created for copying - // byval arguments to the stack. - SDValue Chain, CallSeqStart, ByValChain; - SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); - ByValChain = InChain; - - // If this is the first call, create a stack frame object that points to - // a location to which .cprestore saves $gp. - if (IsO32 && IsPIC && MipsFI->globalBaseRegFixed() && !MipsFI->getGPFI()) - MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true)); - - // Get the frame index of the stack frame object that points to the location - // of dynamically allocated area on the stack. - int DynAllocFI = MipsFI->getDynAllocFI(); + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. - if (IsO32) + if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); - unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); - - if (MaxCallFrameSize < NextStackOffset) { - MipsFI->setMaxCallFrameSize(NextStackOffset); - - // Set the offsets relative to $sp of the $gp restore slot and dynamically - // allocated stack space. These offsets must be aligned to a boundary - // determined by the stack alignment of the ABI. - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = (NextStackOffset + StackAlignment - 1) / - StackAlignment * StackAlignment; + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); - if (MipsFI->needGPSaveRestore()) - MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset); + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, + IsN64 ? Mips::SP_64 : Mips::SP, + getPointerTy()); - MFI->setObjectOffset(DynAllocFI, NextStackOffset); - } + if (MipsFI->getMaxCallFrameSize() < NextStackOffset) + MipsFI->setMaxCallFrameSize(NextStackOffset); // With EABI is it possible to have 16 args on registers. SmallVector, 16> RegsToPass; SmallVector MemOpChains; - int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; - // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; @@ -2425,11 +2673,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); if (IsO32) - WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); else - PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; @@ -2479,29 +2727,14 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Register can't get to this point... assert(VA.isMemLoc()); - // Create the frame index object for this incoming parameter - LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), true); - SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); - // emit ISD::STORE whichs stores the // parameter value to a stack Location + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } - // Extend range of indices of frame objects for outgoing arguments that were - // created during this function call. Skip this step if no such objects were - // created. - if (LastFI) - MipsFI->extendOutArgFIRange(FirstFI, LastFI); - - // If a memcpy has been created to copy a byval arg to a stack, replace the - // chain input of CallSeqStart with ByValChain. - if (InChain != ByValChain) - DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, - NextStackOffsetVal); - // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2565,6 +2798,9 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } } + // T9 register operand. + SDValue T9; + // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { @@ -2572,7 +2808,19 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); - Callee = DAG.getRegister(T9Reg, getPointerTy()); + + if (Subtarget->inMips16Mode()) + T9 = DAG.getRegister(T9Reg, getPointerTy()); + else + Callee = DAG.getRegister(T9Reg, getPointerTy()); + } + + // Insert node "GP copy globalreg" before call to function. + // Lazy-binding stubs require GP to point to the GOT. + if (IsPICCall) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty))); } // Build a sequence of copy-to-reg nodes chained together with token @@ -2600,6 +2848,10 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add T9 register operand. + if (T9.getNode()) + Ops.push_back(T9); + // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -2613,8 +2865,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NextStackOffset, true), + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); @@ -2635,7 +2886,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_Mips); @@ -2654,9 +2905,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector& OutChains, + std::vector &OutChains, SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, const Argument *FuncArg) { unsigned LocMem = VA.getLocMemOffset(); unsigned FirstWord = LocMem / 4; @@ -2668,7 +2919,7 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, break; unsigned SrcReg = O32IntRegs[CurWord]; - unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass); + unsigned Reg = AddLiveIn(MF, SrcReg, &Mips::CPURegsRegClass); SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, DAG.getConstant(i * 4, MVT::i32)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), @@ -2681,8 +2932,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, // Create frame object on stack and copy registers used for byval passing to it. static unsigned CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, - std::vector& OutChains, SelectionDAG &DAG, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + std::vector &OutChains, SelectionDAG &DAG, + const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl &InVals, MipsFunctionInfo *MipsFI, EVT PtrTy, const Argument *FuncArg) { @@ -2705,7 +2956,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, // Copy arg registers. for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs); ++Reg, ++I) { - unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass); + unsigned VReg = AddLiveIn(MF, *Reg, &Mips::CPU64RegsRegClass); SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, DAG.getConstant(I * 8, PtrTy)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), @@ -2741,7 +2992,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (IsO32) + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FastCC); + else if (IsO32) CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -2781,13 +3034,13 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const TargetRegisterClass *RC; if (RegVT == MVT::i32) - RC = Mips::CPURegsRegisterClass; + RC = &Mips::CPURegsRegClass; else if (RegVT == MVT::i64) - RC = Mips::CPU64RegsRegisterClass; + RC = &Mips::CPU64RegsRegClass; else if (RegVT == MVT::f32) - RC = Mips::FGR32RegisterClass; + RC = &Mips::FGR32RegClass; else if (RegVT == MVT::f64) - RC = HasMips64 ? Mips::FGR64RegisterClass : Mips::AFGR64RegisterClass; + RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass; else llvm_unreachable("RegVT not supported by FormalArguments Lowering"); @@ -2861,8 +3114,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. - const TargetRegisterClass *RC - = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass; + const TargetRegisterClass *RC = IsO32 ? + (const TargetRegisterClass*)&Mips::CPURegsRegClass : + (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; unsigned RegSize = RC->getSize(); int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; @@ -2926,7 +3180,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); @@ -2972,11 +3226,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // Return on Mips is always a "jr $ra" if (Flag.getNode()) - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, - Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag); - else // Return Void - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, - Chain, DAG.getRegister(Mips::RA, MVT::i32)); + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain, Flag); + + // Return Void + return DAG.getNode(MipsISD::Ret, dl, MVT::Other, Chain); } //===----------------------------------------------------------------------===// @@ -2995,13 +3248,19 @@ getConstraintType(const std::string &Constraint) const // unless generating MIPS16 code. // 'y' : Equivalent to r; retained for // backwards compatibility. - // 'f' : Floating Point registers. + // 'c' : A register suitable for use in an indirect + // jump. This will always be $25 for -mabicalls. + // 'l' : The lo register. 1 word storage. + // 'x' : The hilo register pair. Double word storage. if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; case 'd': case 'y': case 'f': + case 'c': + case 'l': + case 'x': return C_RegisterClass; } } @@ -3035,6 +3294,22 @@ MipsTargetLowering::getSingleConstraintMatchWeight( if (type->isFloatTy()) weight = CW_Register; break; + case 'c': // $25 for indirect jumps + case 'l': // lo register + case 'x': // hilo register pair + if (type->isIntegerTy()) + weight = CW_SpecificReg; + break; + case 'I': // signed 16 bit immediate + case 'J': // integer zero + case 'K': // unsigned 16 bit immediate + case 'L': // signed 32 bit immediate where lower 16 bits are 0 + case 'N': // immediate in the range of -65535 to -1 (inclusive) + case 'O': // signed 15 bit immediate (+- 16383) + case 'P': // immediate in the range of 65535 to 1 (inclusive) + if (isa(CallOperandVal)) + weight = CW_Constant; + break; } return weight; } @@ -3050,30 +3325,152 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': - if (VT == MVT::i32) - return std::make_pair(0U, Mips::CPURegsRegisterClass); - assert(VT == MVT::i64 && "Unexpected type."); - return std::make_pair(0U, Mips::CPU64RegsRegisterClass); + if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) + return std::make_pair(0U, &Mips::CPURegsRegClass); + if (VT == MVT::i64 && !HasMips64) + return std::make_pair(0U, &Mips::CPURegsRegClass); + if (VT == MVT::i64 && HasMips64) + return std::make_pair(0U, &Mips::CPU64RegsRegClass); + // This will generate an error message + return std::make_pair(0u, static_cast(0)); case 'f': if (VT == MVT::f32) - return std::make_pair(0U, Mips::FGR32RegisterClass); + return std::make_pair(0U, &Mips::FGR32RegClass); if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) { if (Subtarget->isFP64bit()) - return std::make_pair(0U, Mips::FGR64RegisterClass); - else - return std::make_pair(0U, Mips::AFGR64RegisterClass); + return std::make_pair(0U, &Mips::FGR64RegClass); + return std::make_pair(0U, &Mips::AFGR64RegClass); } + break; + case 'c': // register suitable for indirect jump + if (VT == MVT::i32) + return std::make_pair((unsigned)Mips::T9, &Mips::CPURegsRegClass); + assert(VT == MVT::i64 && "Unexpected type."); + return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass); + case 'l': // register suitable for indirect jump + if (VT == MVT::i32) + return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass); + return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass); + case 'x': // register suitable for indirect jump + // Fixme: Not triggering the use of both hi and low + // This will generate an error message + return std::make_pair(0u, static_cast(0)); } } return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector&Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + // Only support length 1 constraints for now. + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; // This will fall through to the generic implementation + case 'I': // Signed 16 bit constant + // If this fails, the parent routine will give an error + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if (isInt<16>(Val)) { + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + case 'J': // integer zero + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getZExtValue(); + if (Val == 0) { + Result = DAG.getTargetConstant(0, Type); + break; + } + } + return; + case 'K': // unsigned 16 bit immediate + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + uint64_t Val = (uint64_t)C->getZExtValue(); + if (isUInt<16>(Val)) { + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + case 'L': // signed 32 bit immediate where lower 16 bits are 0 + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){ + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + case 'N': // immediate in the range of -65535 to -1 (inclusive) + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if ((Val >= -65535) && (Val <= -1)) { + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + case 'O': // signed 15 bit immediate + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if ((isInt<15>(Val))) { + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + case 'P': // immediate in the range of 1 to 65535 (inclusive) + if (ConstantSDNode *C = dyn_cast(Op)) { + EVT Type = Op.getValueType(); + int64_t Val = C->getSExtValue(); + if ((Val <= 65535) && (Val >= 1)) { + Result = DAG.getTargetConstant(Val, Type); + break; + } + } + return; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + bool MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Mips target isn't yet aware of offsets. return false; } +EVT MipsTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const { + if (Subtarget->hasMips64()) + return MVT::i64; + + return MVT::i32; +} + bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index c36f40f..95ea8fa 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -79,7 +79,17 @@ namespace llvm { Sync, Ext, - Ins + Ins, + + // Load/Store Left/Right nodes. + LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, + LWR, + SWL, + SWR, + LDL, + LDR, + SDL, + SDR }; } @@ -122,19 +132,25 @@ namespace llvm { // Lower Operand specifics SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + bool IsSRA) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -144,13 +160,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue @@ -176,8 +186,22 @@ namespace llvm { getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops + /// vector. If it is invalid, don't add anything to Ops. If hasMemory is + /// true it means one of the asm constraint of the inline asm instruction + /// being processed is 'm'. + virtual void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const; + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 14d8f1e..3e78c45 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -54,10 +54,14 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, + AssemblerPredicate<"FeatureFP64Bit">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, + AssemblerPredicate<"!FeatureFP64Bit">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, + AssemblerPredicate<"FeatureSingleFloat">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, + AssemblerPredicate<"!FeatureSingleFloat">; // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ @@ -97,7 +101,7 @@ class FPStore op, string opstr, RegisterClass RC, Operand MemOpnd>: } // FP indexed load. class FPIdxLoad funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp = null_frag>: FFMemIdx { @@ -106,7 +110,7 @@ class FPIdxLoad funct, string opstr, RegisterClass DRC, // FP indexed store. class FPIdxStore funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp= null_frag>: FFMemIdx { @@ -117,15 +121,15 @@ class FPIdxStore funct, string opstr, RegisterClass DRC, multiclass FFR1_W_M funct, string opstr> { def _S : FFR1; def _D32 : FFR1, - Requires<[NotFP64bit]>; + Requires<[NotFP64bit, HasStandardEncoding]>; def _D64 : FFR1, - Requires<[IsFP64bit]> { + Requires<[IsFP64bit, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } // Instructions that convert an FP value to 64-bit fixed point. -let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in +let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in multiclass FFR1_L_M funct, string opstr> { def _S : FFR1; def _D64 : FFR1; @@ -135,9 +139,9 @@ multiclass FFR1_L_M funct, string opstr> { multiclass FFR1P_M funct, string opstr, SDNode OpNode> { def _S : FFR1P; def _D32 : FFR1P, - Requires<[NotFP64bit]>; + Requires<[NotFP64bit, HasStandardEncoding]>; def _D64 : FFR1P, - Requires<[IsFP64bit]> { + Requires<[IsFP64bit, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } @@ -146,9 +150,9 @@ multiclass FFR2P_M funct, string opstr, SDNode OpNode, bit isComm = 0> { let isCommutable = isComm in { def _S : FFR2P; def _D32 : FFR2P, - Requires<[NotFP64bit]>; + Requires<[NotFP64bit, HasStandardEncoding]>; def _D64 : FFR2P, - Requires<[IsFP64bit]> { + Requires<[IsFP64bit, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } @@ -185,13 +189,13 @@ def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>; def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>; -let Predicates = [NotFP64bit] in { +let Predicates = [NotFP64bit, HasStandardEncoding] in { def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>; def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; } -let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { +let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in { def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; @@ -199,7 +203,7 @@ let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; } -let Predicates = [NoNaNsFPMath] in { +let Predicates = [NoNaNsFPMath, HasStandardEncoding] in { defm FABS : FFR1P_M<0x5, "abs", fabs>; defm FNEG : FFR1P_M<0x7, "neg", fneg>; } @@ -242,14 +246,14 @@ def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt), def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, - Requires<[NotFP64bit]>; + Requires<[NotFP64bit, HasStandardEncoding]>; def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, - Requires<[IsFP64bit]> { + Requires<[IsFP64bit, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } /// Floating Point Memory Instructions -let Predicates = [IsN64], DecoderNamespace = "Mips64" in { +let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in { def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>; def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>; def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> { @@ -260,81 +264,91 @@ let Predicates = [IsN64], DecoderNamespace = "Mips64" in { } } -let Predicates = [NotN64] in { +let Predicates = [NotN64, HasStandardEncoding] in { def LWC1 : FPLoad<0x31, "lwc1", FGR32, mem>; def SWC1 : FPStore<0x39, "swc1", FGR32, mem>; } -let Predicates = [NotN64, HasMips64], DecoderNamespace = "Mips64" in { +let Predicates = [NotN64, HasMips64, HasStandardEncoding], + DecoderNamespace = "Mips64" in { def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; } -let Predicates = [NotN64, NotMips64] in { +let Predicates = [NotN64, NotMips64, HasStandardEncoding] in { def LDC1 : FPLoad<0x35, "ldc1", AFGR64, mem>; def SDC1 : FPStore<0x3d, "sdc1", AFGR64, mem>; } // Indexed loads and stores. -let Predicates = [HasMips32r2Or64] in { +let Predicates = [HasMips32r2Or64, HasStandardEncoding] in { def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; - def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>; def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; - def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>; } -let Predicates = [HasMips32r2, NotMips64] in { +let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in { def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>; def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; } -let Predicates = [HasMips64, NotN64], DecoderNamespace="Mips64" in { +let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in { def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; } // n64 -let Predicates = [IsN64], isCodeGenOnly=1 in { +let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; - def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; - def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>; def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; } +// Load/store doubleword indexed unaligned. +let Predicates = [NotMips64, HasStandardEncoding] in { + def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>; + def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>; +} + +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace="Mips64" in { + def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>; + def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>; +} + /// Floating-point Aritmetic defm FADD : FFR2P_M<0x00, "add", fadd, 1>; defm FDIV : FFR2P_M<0x03, "div", fdiv>; defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>; defm FSUB : FFR2P_M<0x01, "sub", fsub>; -let Predicates = [HasMips32r2] in { +let Predicates = [HasMips32r2, HasStandardEncoding] in { def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>; def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>; } -let Predicates = [HasMips32r2, NoNaNsFPMath] in { +let Predicates = [HasMips32r2, NoNaNsFPMath, HasStandardEncoding] in { def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>; def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>; } -let Predicates = [HasMips32r2, NotFP64bit] in { +let Predicates = [HasMips32r2, NotFP64bit, HasStandardEncoding] in { def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>; def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>; } -let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in { +let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStandardEncoding] in { def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>; def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>; } -let Predicates = [HasMips32r2, IsFP64bit], isCodeGenOnly=1 in { +let Predicates = [HasMips32r2, IsFP64bit, HasStandardEncoding], isCodeGenOnly=1 in { def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>; def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>; } -let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath], isCodeGenOnly=1 in { +let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding], + isCodeGenOnly=1 in { def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>; def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>; } @@ -391,8 +405,10 @@ class FCMP fmt, RegisterClass RC, string typestr> : /// Floating Point Compare let Defs=[FCR31] in { def FCMP_S32 : FCMP<0x10, FGR32, "s">; - def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>; - def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]> { + def FCMP_D32 : FCMP<0x11, AFGR64, "d">, + Requires<[NotFP64bit, HasStandardEncoding]>; + def FCMP_D64 : FCMP<0x11, FGR64, "d">, + Requires<[IsFP64bit, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } @@ -400,69 +416,59 @@ let Defs=[FCR31] in { //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : - MipsPseudo<(outs AFGR64:$dst), - (ins CPURegs:$lo, CPURegs:$hi), "", - [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; + PseudoSE<(outs AFGR64:$dst), + (ins CPURegs:$lo, CPURegs:$hi), "", + [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - MipsPseudo<(outs CPURegs:$dst), - (ins AFGR64:$src, i32imm:$n), "", - [(set CPURegs:$dst, - (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; + PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "", + [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// -def : Pat<(f32 fpimm0), (MTC1 ZERO)>; -def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; - -def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; -def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; - -let Predicates = [NotFP64bit] in { - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; - def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; - def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; -} - -let Predicates = [IsFP64bit] in { - def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>; - def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; - - def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>; - def : Pat<(f32 (sint_to_fp CPU64Regs:$src)), - (CVT_S_L (DMTC1 CPU64Regs:$src))>; - def : Pat<(f64 (sint_to_fp CPU64Regs:$src)), - (CVT_D64_L (DMTC1 CPU64Regs:$src))>; - - def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; - def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; - def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; - - def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; - def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; -} - -// Patterns for unaligned floating point loads and stores. -let Predicates = [HasMips32r2Or64, NotN64] in { - def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; - def : Pat<(store_u FGR32:$src, CPURegs:$addr), - (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; -} - -let Predicates = [IsN64] in { - def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; - def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), - (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; +def : MipsPat<(f32 fpimm0), (MTC1 ZERO)>; +def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; + +def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; +def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; + +let Predicates = [NotFP64bit, HasStandardEncoding] in { + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D32_W (MTC1 CPURegs:$src))>; + def : MipsPat<(i32 (fp_to_sint AFGR64:$src)), + (MFC1 (TRUNC_W_D32 AFGR64:$src))>; + def : MipsPat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; +} + +let Predicates = [IsFP64bit, HasStandardEncoding] in { + def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>; + def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; + + def : MipsPat<(f64 (sint_to_fp CPURegs:$src)), + (CVT_D64_W (MTC1 CPURegs:$src))>; + def : MipsPat<(f32 (sint_to_fp CPU64Regs:$src)), + (CVT_S_L (DMTC1 CPU64Regs:$src))>; + def : MipsPat<(f64 (sint_to_fp CPU64Regs:$src)), + (CVT_D64_L (DMTC1 CPU64Regs:$src))>; + + def : MipsPat<(i32 (fp_to_sint FGR64:$src)), + (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>; + def : MipsPat<(i64 (fp_to_sint FGR64:$src)), + (DMFC1 (TRUNC_L_D64 FGR64:$src))>; + + def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; + def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 841eba0..8feb853 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -72,20 +72,33 @@ class MipsInst pattern, field bits<32> SoftFail = 0; } +// Mips32/64 Instruction Format +class InstSE pattern, + InstrItinClass itin, Format f>: + MipsInst { + let Predicates = [HasStandardEncoding]; +} + // Mips Pseudo Instructions Format class MipsPseudo pattern>: - MipsInst { + MipsInst { let isCodeGenOnly = 1; let isPseudo = 1; } +// Mips32/64 Pseudo Instruction Format +class PseudoSE pattern>: + MipsPseudo { + let Predicates = [HasStandardEncoding]; +} + //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> //===----------------------------------------------------------------------===// class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst + InstSE { bits<5> rd; bits<5> rs; @@ -108,7 +121,7 @@ class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FI op, dag outs, dag ins, string asmstr, list pattern, - InstrItinClass itin>: MipsInst + InstrItinClass itin>: InstSE { bits<5> rt; bits<5> rs; @@ -123,7 +136,7 @@ class FI op, dag outs, dag ins, string asmstr, list pattern, class BranchBase op, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst + InstSE { bits<5> rs; bits<5> rt; @@ -141,7 +154,7 @@ class BranchBase op, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FJ op, dag outs, dag ins, string asmstr, list pattern, - InstrItinClass itin>: MipsInst + InstrItinClass itin>: InstSE { bits<26> addr; @@ -169,7 +182,7 @@ class FJ op, dag outs, dag ins, string asmstr, list pattern, class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, string asmstr, list pattern> : - MipsInst + InstSE { bits<5> fd; bits<5> fs; @@ -193,7 +206,7 @@ class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, //===----------------------------------------------------------------------===// class FFI op, dag outs, dag ins, string asmstr, list pattern>: - MipsInst + InstSE { bits<5> ft; bits<5> base; @@ -211,7 +224,7 @@ class FFI op, dag outs, dag ins, string asmstr, list pattern>: //===----------------------------------------------------------------------===// class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : - MipsInst + InstSE { bits<5> fs; bits<5> ft; @@ -232,7 +245,7 @@ class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : class FCMOV _tf, dag outs, dag ins, string asmstr, list pattern> : - MipsInst + InstSE { bits<5> rd; bits<5> rs; @@ -253,7 +266,7 @@ class FCMOV _tf, dag outs, dag ins, string asmstr, class FFCMOV _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, list pattern> : - MipsInst + InstSE { bits<5> fd; bits<5> fs; @@ -300,7 +313,7 @@ class FFR2P funct, bits<5> fmt, string opstr, // Floating point madd/msub/nmadd/nmsub. class FFMADDSUB funct, bits<3> fmt, dag outs, dag ins, string asmstr, list pattern> - : MipsInst { + : InstSE { bits<5> fd; bits<5> fr; bits<5> fs; @@ -318,7 +331,7 @@ class FFMADDSUB funct, bits<3> fmt, dag outs, dag ins, string asmstr, // FP indexed load/store instructions. class FFMemIdx funct, dag outs, dag ins, string asmstr, list pattern> : - MipsInst + InstSE { bits<5> base; bits<5> index; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index a3a18bf..50e3eb5 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "MipsMachineFunction.h" @@ -26,67 +27,19 @@ using namespace llvm; -MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) +MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - TM(tm), IsN64(TM.getSubtarget().isABI_N64()), - RI(*TM.getSubtargetImpl(), *this), - UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} + TM(tm), UncondBrOpc(UncondBr) {} -const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { - return RI; -} - -static bool isZeroImm(const MachineOperand &op) { - return op.isImm() && op.getImm() == 0; -} - -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned MipsInstrInfo:: -isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || - (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || - (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || - (Opc == Mips::LDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } +const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16InstrInfo(TM); - return 0; + return llvm::createMipsSEInstrInfo(TM); } -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned MipsInstrInfo:: -isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || - (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || - (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || - (Opc == Mips::SDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } - return 0; +bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const { + return op.isImm() && op.getImm() == 0; } /// insertNoop - If data hazard condition is found insert the target nop @@ -98,78 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -void MipsInstrInfo:: -copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0, ZeroReg = 0; - - if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. - if (Mips::CPURegsRegClass.contains(SrcReg)) - Opc = Mips::ADDu, ZeroReg = Mips::ZERO; - else if (Mips::CCRRegClass.contains(SrcReg)) - Opc = Mips::CFC1; - else if (Mips::FGR32RegClass.contains(SrcReg)) - Opc = Mips::MFC1; - else if (SrcReg == Mips::HI) - Opc = Mips::MFHI, SrcReg = 0; - else if (SrcReg == Mips::LO) - Opc = Mips::MFLO, SrcReg = 0; - } - else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. - if (Mips::CCRRegClass.contains(DestReg)) - Opc = Mips::CTC1; - else if (Mips::FGR32RegClass.contains(DestReg)) - Opc = Mips::MTC1; - else if (DestReg == Mips::HI) - Opc = Mips::MTHI, DestReg = 0; - else if (DestReg == Mips::LO) - Opc = Mips::MTLO, DestReg = 0; - } - else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_S; - else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D32; - else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D64; - else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) - Opc = Mips::MOVCCRToCCR; - else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. - if (Mips::CPU64RegsRegClass.contains(SrcReg)) - Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; - else if (SrcReg == Mips::HI64) - Opc = Mips::MFHI64, SrcReg = 0; - else if (SrcReg == Mips::LO64) - Opc = Mips::MFLO64, SrcReg = 0; - else if (Mips::FGR64RegClass.contains(SrcReg)) - Opc = Mips::DMFC1; - } - else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. - if (DestReg == Mips::HI64) - Opc = Mips::MTHI64, DestReg = 0; - else if (DestReg == Mips::LO64) - Opc = Mips::MTLO64, DestReg = 0; - else if (Mips::FGR64RegClass.contains(DestReg)) - Opc = Mips::DMTC1; - } - - assert(Opc && "Cannot copy registers"); - - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); - - if (DestReg) - MIB.addReg(DestReg, RegState::Define); - - if (ZeroReg) - MIB.addReg(ZeroReg); - - if (SrcReg) - MIB.addReg(SrcReg, getKillRegState(KillSrc)); -} - -static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, - unsigned Flag) { +MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); @@ -178,60 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, MFI.getObjectSize(FI), Align); } -void MipsInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); - - unsigned Opc = 0; - - if (RC == Mips::CPURegsRegisterClass) - Opc = IsN64 ? Mips::SW_P8 : Mips::SW; - else if (RC == Mips::CPU64RegsRegisterClass) - Opc = IsN64 ? Mips::SD_P8 : Mips::SD; - else if (RC == Mips::FGR32RegisterClass) - Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; - else if (RC == Mips::AFGR64RegisterClass) - Opc = Mips::SDC1; - else if (RC == Mips::FGR64RegisterClass) - Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); -} - -void MipsInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); - unsigned Opc = 0; - - if (RC == Mips::CPURegsRegisterClass) - Opc = IsN64 ? Mips::LW_P8 : Mips::LW; - else if (RC == Mips::CPU64RegsRegisterClass) - Opc = IsN64 ? Mips::LD_P8 : Mips::LD; - else if (RC == Mips::FGR32RegisterClass) - Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; - else if (RC == Mips::AFGR64RegisterClass) - Opc = Mips::LDC1; - else if (RC == Mips::FGR64RegisterClass) - Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) - .addMemOperand(MMO); -} - MachineInstr* MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, @@ -245,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, // Branch Analysis //===----------------------------------------------------------------------===// -static unsigned GetAnalyzableBrOpc(unsigned Opc) { - return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || - Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || - Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || - Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || - Opc == Mips::J) ? - Opc : 0; -} - -/// GetOppositeBranchOpc - Return the inverse of the specified -/// opcode, e.g. turning BEQ to BNE. -unsigned Mips::GetOppositeBranchOpc(unsigned Opc) -{ - switch (Opc) { - default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ: return Mips::BNE; - case Mips::BNE: return Mips::BEQ; - case Mips::BGTZ: return Mips::BLEZ; - case Mips::BGEZ: return Mips::BLTZ; - case Mips::BLTZ: return Mips::BGEZ; - case Mips::BLEZ: return Mips::BGTZ; - case Mips::BEQ64: return Mips::BNE64; - case Mips::BNE64: return Mips::BEQ64; - case Mips::BGTZ64: return Mips::BLEZ64; - case Mips::BGEZ64: return Mips::BLTZ64; - case Mips::BLTZ64: return Mips::BGEZ64; - case Mips::BLEZ64: return Mips::BGTZ64; - case Mips::BC1T: return Mips::BC1F; - case Mips::BC1F: return Mips::BC1T; - } -} - -static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, - MachineBasicBlock *&BB, - SmallVectorImpl& Cond) { +void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); @@ -450,7 +246,62 @@ ReverseBranchCondition(SmallVectorImpl &Cond) const { assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm())); + Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm())); return false; } +/// Return the number of bytes of code the specified instruction may be. +unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + return MI->getDesc().getSize(); + case TargetOpcode::INLINEASM: { // Inline Asm: Variable size. + const MachineFunction *MF = MI->getParent()->getParent(); + const char *AsmStr = MI->getOperand(0).getSymbolName(); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); + } + } +} + +unsigned +llvm::Mips::loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, + MachineBasicBlock::iterator II, DebugLoc DL, + bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst) { + MipsAnalyzeImmediate AnalyzeImm; + unsigned Size = IsN64 ? 64 : 32; + unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi; + unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO; + unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT; + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, LastInstrIsADDiu); + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + + if (LastInst && (Seq.size() == 1)) { + *LastInst = *Inst; + return 0; + } + + // The first instruction can be a LUi, which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == LUi) + BuildMI(MBB, II, DL, TII.get(LUi), ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + else + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + // Build the remaining instructions in Seq. Skip the last instruction if + // LastInst is not 0. + for (++Inst; Inst != Seq.end() - !!LastInst; ++Inst) + BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) + .addImm(SignExtend64<16>(Inst->ImmOpnd)); + + if (LastInst) + *LastInst = *Inst; + + return Seq.size() - !!LastInst; +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 4be727d..7d56259 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,6 +15,7 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "MipsAnalyzeImmediate.h" #include "MipsRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" @@ -24,87 +25,85 @@ namespace llvm { -namespace Mips { - /// GetOppositeBranchOpc - Return the inverse of the specified - /// opcode, e.g. turning BEQ to BNE. - unsigned GetOppositeBranchOpc(unsigned Opc); -} - class MipsInstrInfo : public MipsGenInstrInfo { +protected: MipsTargetMachine &TM; - bool IsN64; - const MipsRegisterInfo RI; unsigned UncondBrOpc; + public: - explicit MipsInstrInfo(MipsTargetMachine &TM); + explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const MipsRegisterInfo &getRegisterInfo() const; - - /// isLoadFromStackSlot - If the specified machine instruction is a direct - /// load from a stack slot, return the virtual or physical register number of - /// the destination along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - /// isStoreToStackSlot - If the specified machine instruction is a direct - /// store to a stack slot, return the virtual or physical register number of - /// the source reg along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + static const MipsInstrInfo *create(MipsTargetMachine &TM); /// Branch Analysis virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; -private: - void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, - const SmallVectorImpl& Cond) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; -public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, DebugLoc DL) const; - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MipsRegisterInfo &getRegisterInfo() const = 0; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0; + + /// Return the number of bytes of code the specified instruction may be. + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + +protected: + bool isZeroImm(const MachineOperand &op) const; + + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0; + + void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl &Cond) const; + + void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl& Cond) const; }; +namespace Mips { + /// Emit a series of instructions to load an immediate. All instructions + /// except for the last one are emitted. The function returns the number of + /// MachineInstrs generated. The opcode-immediate pair of the last + /// instruction is returned in LastInst, if it is not 0. + unsigned + loadImmediate(int64_t Imm, bool IsN64, const TargetInstrInfo &TII, + MachineBasicBlock& MBB, MachineBasicBlock::iterator II, + DebugLoc DL, bool LastInstrIsADDiu, + MipsAnalyzeImmediate::Inst *LastInst); +} + +/// Create MipsInstrInfo objects. +const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); +const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); + } #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 873d2bd..da15d4d 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -11,17 +11,11 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// - -include "MipsInstrFormats.td" //===----------------------------------------------------------------------===// // Mips profiles and nodes //===----------------------------------------------------------------------===// -def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, @@ -49,6 +43,10 @@ def SDT_Ins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, SDTCisSameAs<0, 4>]>; +def SDTMipsLoadLR : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisPtrTy<1>, + SDTCisSameAs<0, 2>]>; + // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, @@ -72,8 +70,7 @@ def MipsTprelLo : SDNode<"MipsISD::TprelLo", SDTIntUnaryOp>; def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>; // Return -def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain, - SDNPOptInGlue]>; +def MipsRet : SDNode<"MipsISD::Ret", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, @@ -118,6 +115,23 @@ def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain]>; def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>; def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; +def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def MipsLWR : SDNode<"MipsISD::LWR", SDTMipsLoadLR, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def MipsSWL : SDNode<"MipsISD::SWL", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def MipsSWR : SDNode<"MipsISD::SWR", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def MipsLDL : SDNode<"MipsISD::LDL", SDTMipsLoadLR, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def MipsLDR : SDNode<"MipsISD::LDR", SDTMipsLoadLR, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def MipsSDL : SDNode<"MipsISD::SDL", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def MipsSDR : SDNode<"MipsISD::SDR", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -145,12 +159,26 @@ def IsN64 : Predicate<"Subtarget.isABI_N64()">, AssemblerPredicate<"FeatureN64">; def NotN64 : Predicate<"!Subtarget.isABI_N64()">, AssemblerPredicate<"!FeatureN64">; +def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, + AssemblerPredicate<"FeatureMips16">; def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, AssemblerPredicate<"FeatureMips32">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, AssemblerPredicate<"FeatureMips32">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, AssemblerPredicate<"FeatureMips32">; +def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">, + AssemblerPredicate<"!FeatureMips16">; + +class MipsPat : Pat { + let Predicates = [HasStandardEncoding]; +} + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "MipsInstrFormats.td" //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -190,6 +218,7 @@ def mem : Operand { def mem64 : Operand { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); + let EncoderMethod = "getMemEncoding"; } def mem_ea : Operand { @@ -252,7 +281,8 @@ def immZExt5 : ImmLeaf; // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. -def addr : ComplexPattern; +def addr : + ComplexPattern; //===----------------------------------------------------------------------===// // Pattern fragment for load/store @@ -418,21 +448,13 @@ class StoreM op, string instr_asm, PatFrag OpNode, RegisterClass RC, let isPseudo = Pseudo; } -// Unaligned Memory Load/Store -let canFoldAsLoad = 1 in -class LoadUnAlign op, RegisterClass RC, Operand MemOpnd>: - FMem {} - -class StoreUnAlign op, RegisterClass RC, Operand MemOpnd>: - FMem {} - // 32-bit load. multiclass LoadM32 op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { def #NAME# : LoadM, - Requires<[NotN64]>; + Requires<[NotN64, HasStandardEncoding]>; def _P8 : LoadM, - Requires<[IsN64]> { + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -442,31 +464,21 @@ multiclass LoadM32 op, string instr_asm, PatFrag OpNode, multiclass LoadM64 op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { def #NAME# : LoadM, - Requires<[NotN64]>; + Requires<[NotN64, HasStandardEncoding]>; def _P8 : LoadM, - Requires<[IsN64]> { + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } -// 32-bit load. -multiclass LoadUnAlign32 op> { - def #NAME# : LoadUnAlign, - Requires<[NotN64]>; - def _P8 : LoadUnAlign, - Requires<[IsN64]> { - let DecoderNamespace = "Mips64"; - let isCodeGenOnly = 1; - } -} // 32-bit store. multiclass StoreM32 op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { def #NAME# : StoreM, - Requires<[NotN64]>; + Requires<[NotN64, HasStandardEncoding]>; def _P8 : StoreM, - Requires<[IsN64]> { + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -476,20 +488,69 @@ multiclass StoreM32 op, string instr_asm, PatFrag OpNode, multiclass StoreM64 op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { def #NAME# : StoreM, - Requires<[NotN64]>; + Requires<[NotN64, HasStandardEncoding]>; def _P8 : StoreM, - Requires<[IsN64]> { + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } -// 32-bit store. -multiclass StoreUnAlign32 op> { - def #NAME# : StoreUnAlign, - Requires<[NotN64]>; - def _P8 : StoreUnAlign, - Requires<[IsN64]> { +// Load/Store Left/Right +let canFoldAsLoad = 1 in +class LoadLeftRight op, string instr_asm, SDNode OpNode, + RegisterClass RC, Operand MemOpnd> : + FMem { + string Constraints = "$src = $rt"; +} + +class StoreLeftRight op, string instr_asm, SDNode OpNode, + RegisterClass RC, Operand MemOpnd>: + FMem; + +// 32-bit load left/right. +multiclass LoadLeftRightM32 op, string instr_asm, SDNode OpNode> { + def #NAME# : LoadLeftRight, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : LoadLeftRight, + Requires<[IsN64, HasStandardEncoding]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } +} + +// 64-bit load left/right. +multiclass LoadLeftRightM64 op, string instr_asm, SDNode OpNode> { + def #NAME# : LoadLeftRight, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : LoadLeftRight, + Requires<[IsN64, HasStandardEncoding]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } +} + +// 32-bit store left/right. +multiclass StoreLeftRightM32 op, string instr_asm, SDNode OpNode> { + def #NAME# : StoreLeftRight, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : StoreLeftRight, + Requires<[IsN64, HasStandardEncoding]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } +} + +// 64-bit store left/right. +multiclass StoreLeftRightM64 op, string instr_asm, SDNode OpNode> { + def #NAME# : StoreLeftRight, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : StoreLeftRight, + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -503,6 +564,7 @@ class CBranch op, string instr_asm, PatFrag cond_op, RegisterClass RC>: let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } class CBranchZero op, bits<5> _rt, string instr_asm, PatFrag cond_op, @@ -514,6 +576,7 @@ class CBranchZero op, bits<5> _rt, string instr_asm, PatFrag cond_op, let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; + let Defs = [AT]; } // SetCC @@ -541,8 +604,9 @@ class JumpFJ op, string instr_asm>: let isTerminator=1; let isBarrier=1; let hasDelaySlot = 1; - let Predicates = [RelocStatic]; + let Predicates = [RelocStatic, HasStandardEncoding]; let DecoderMethod = "DecodeJumpTarget"; + let Defs = [AT]; } // Unconditional branch @@ -555,23 +619,37 @@ class UncondBranch op, string instr_asm>: let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 1; - let Predicates = [RelocPIC]; + let Predicates = [RelocPIC, HasStandardEncoding]; + let Defs = [AT]; } -let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, - isIndirectBranch = 1 in -class JumpFR op, bits<6> func, string instr_asm, RegisterClass RC>: - FR { +// Base class for indirect branch and return instruction classes. +let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in +class JumpFR pattern>: + FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", pattern, IIBranch> { let rt = 0; let rd = 0; let shamt = 0; } +// Indirect branch +class IndirectBranch: JumpFR { + let isBranch = 1; + let isIndirectBranch = 1; +} + +// Return instruction +class RetBase: JumpFR { + let isReturn = 1; + let isCodeGenOnly = 1; + let hasCtrlDep = 1; + let hasExtraSrcRegAllocReq = 1; +} + // Jump and Link (Call) -let isCall=1, hasDelaySlot=1 in { +let isCall=1, hasDelaySlot=1, Defs = [RA] in { class JumpLink op, string instr_asm>: - FJ { let DecoderMethod = "DecodeJumpTarget"; @@ -579,7 +657,7 @@ let isCall=1, hasDelaySlot=1 in { class JumpLinkReg op, bits<6> func, string instr_asm, RegisterClass RC>: - FR { let rt = 0; let rd = 31; @@ -587,7 +665,7 @@ let isCall=1, hasDelaySlot=1 in { } class BranchLink _rt, RegisterClass RC>: - FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16, variable_ops), + FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16), !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> { let rt = _rt; } @@ -644,16 +722,18 @@ class MoveToLOHI func, string instr_asm, RegisterClass RC, let neverHasSideEffects = 1; } -class EffectiveAddress : - FMem<0x09, (outs RC:$rt), (ins Mem:$addr), - instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>; +class EffectiveAddress opc, string instr_asm, RegisterClass RC, Operand Mem> : + FMem { + let isCodeGenOnly = 1; +} // Count Leading Ones/Zeros in Word class CountLeading0 func, string instr_asm, RegisterClass RC>: FR<0x1c, func, (outs RC:$rd), (ins RC:$rs), !strconcat(instr_asm, "\t$rd, $rs"), [(set RC:$rd, (ctlz RC:$rs))], IIAlu>, - Requires<[HasBitCount]> { + Requires<[HasBitCount, HasStandardEncoding]> { let shamt = 0; let rt = rd; } @@ -662,7 +742,7 @@ class CountLeading1 func, string instr_asm, RegisterClass RC>: FR<0x1c, func, (outs RC:$rd), (ins RC:$rs), !strconcat(instr_asm, "\t$rd, $rs"), [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>, - Requires<[HasBitCount]> { + Requires<[HasBitCount, HasStandardEncoding]> { let shamt = 0; let rt = rd; } @@ -675,7 +755,7 @@ class SignExtInReg sa, string instr_asm, ValueType vt, [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> { let rs = 0; let shamt = sa; - let Predicates = [HasSEInReg]; + let Predicates = [HasSEInReg, HasStandardEncoding]; } // Subword Swap @@ -684,7 +764,7 @@ class SubwordSwap func, bits<5> sa, string instr_asm, RegisterClass RC>: !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> { let rs = 0; let shamt = sa; - let Predicates = [HasSwap]; + let Predicates = [HasSwap, HasStandardEncoding]; let neverHasSideEffects = 1; } @@ -705,7 +785,7 @@ class ExtBase _funct, string instr_asm, RegisterClass RC>: bits<5> sz; let rd = sz; let shamt = pos; - let Predicates = [HasMips32r2]; + let Predicates = [HasMips32r2, HasStandardEncoding]; } class InsBase _funct, string instr_asm, RegisterClass RC>: @@ -718,20 +798,22 @@ class InsBase _funct, string instr_asm, RegisterClass RC>: bits<5> sz; let rd = sz; let shamt = pos; - let Predicates = [HasMips32r2]; + let Predicates = [HasMips32r2, HasStandardEncoding]; let Constraints = "$src = $rt"; } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). class Atomic2Ops : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), - !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), + !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; multiclass Atomic2Ops32 { - def #NAME# : Atomic2Ops, Requires<[NotN64]>; - def _P8 : Atomic2Ops, Requires<[IsN64]> { + def #NAME# : Atomic2Ops, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : Atomic2Ops, + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } @@ -739,13 +821,15 @@ multiclass Atomic2Ops32 { // Atomic Compare & Swap. class AtomicCmpSwap : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), - !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), + !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; multiclass AtomicCmpSwap32 { - def #NAME# : AtomicCmpSwap, Requires<[NotN64]>; - def _P8 : AtomicCmpSwap, Requires<[IsN64]> { + def #NAME# : AtomicCmpSwap, + Requires<[NotN64, HasStandardEncoding]>; + def _P8 : AtomicCmpSwap, + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } } @@ -767,12 +851,15 @@ class SCBase Opc, string opstring, RegisterClass RC, Operand Mem> : // Pseudo instructions //===----------------------------------------------------------------------===// -// As stack alignment is always done with addiu, we need a 16-bit immediate -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt), +// Return RA. +let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in +def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>; + +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), "!ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), +def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), "!ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -782,31 +869,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. let neverHasSideEffects = 1 in -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp), - ".cprestore\t$loc", []>; - -// For O32 ABI & PIC & non-fixed global base register, the following instruction -// seqeunce is emitted to set the global base register: -// -// 0. lui $2, %hi(_gp_disp) -// 1. addiu $2, $2, %lo(_gp_disp) -// 2. addu $globalbasereg, $2, $t9 -// -// SETGP01 is emitted during Prologue/Epilogue insertion and then converted to -// instructions 0 and 1 in the sequence above during MC lowering. -// SETGP2 is emitted just before register allocation and converted to -// instruction 2 just prior to post-RA scheduling. -// -// These pseudo instructions are needed to ensure no instructions are inserted -// before or between instructions 0 and 1, which is a limitation imposed by -// GNU linker. - -let isTerminator = 1, isBarrier = 1 in -def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>; - -let neverHasSideEffects = 1 in -def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "", - []>; +def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp), + ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32; @@ -876,7 +940,7 @@ def SRLV : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>; def SRAV : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>; // Rotate Instructions -let Predicates = [HasMips32r2] in { +let Predicates = [HasMips32r2, HasStandardEncoding] in { def ROTR : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>; def ROTRV : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>; } @@ -899,15 +963,15 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>; defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; defm USW : StoreM32<0x2b, "usw", store_u, 1>; -/// Primitives for unaligned -defm LWL : LoadUnAlign32<0x22>; -defm LWR : LoadUnAlign32<0x26>; -defm SWL : StoreUnAlign32<0x2A>; -defm SWR : StoreUnAlign32<0x2E>; +/// load/store left/right +defm LWL : LoadLeftRightM32<0x22, "lwl", MipsLWL>; +defm LWR : LoadLeftRightM32<0x26, "lwr", MipsLWR>; +defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>; +defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>; let hasSideEffects = 1 in -def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", - [(MipsSync imm:$stype)], NoItinerary, FrmOther> +def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype", + [(MipsSync imm:$stype)], NoItinerary, FrmOther> { bits<5> stype; let Opcode = 0; @@ -917,19 +981,23 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", } /// Load-linked, Store-conditional -def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>; -def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]> { +def LL : LLBase<0x30, "ll", CPURegs, mem>, + Requires<[NotN64, HasStandardEncoding]>; +def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } -def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>; -def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]> { +def SC : SCBase<0x38, "sc", CPURegs, mem>, + Requires<[NotN64, HasStandardEncoding]>; +def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, + Requires<[IsN64, HasStandardEncoding]> { let DecoderNamespace = "Mips64"; } /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; +def JR : IndirectBranch; def B : UncondBranch<0x04, "b">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; def BNE : CBranch<0x05, "bne", setne, CPURegs>; @@ -938,15 +1006,16 @@ def BGTZ : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>; def BLEZ : CBranchZero<0x06, 0, "blez", setle, CPURegs>; def BLTZ : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>; +let rt = 0, rs = 0, isBranch = 1, isTerminator = 1, isBarrier = 1, + hasDelaySlot = 1, Defs = [RA] in +def BAL_BR: FI<0x1, (outs), (ins brtarget:$imm16), "bal\t$imm16", [], IIBranch>; + def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; -let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1, - isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in - def RET : FR <0x00, 0x08, (outs), (ins CPURegs:$target), - "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; +def RET : RetBase; /// Multiply and Divide Instructions. def MULT : Mult32<0x18, "mult", IIImul>; @@ -978,17 +1047,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; @@ -999,7 +1064,7 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>; // MUL is a assembly macro in the current used ISAs. In recent ISA's // it is a real instruction. def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, - Requires<[HasMips32]>; + Requires<[HasMips32, HasStandardEncoding]>; def RDHWR : ReadHardware; @@ -1011,67 +1076,67 @@ def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// // Small immediates -def : Pat<(i32 immSExt16:$in), - (ADDiu ZERO, imm:$in)>; -def : Pat<(i32 immZExt16:$in), - (ORi ZERO, imm:$in)>; -def : Pat<(i32 immLow16Zero:$in), - (LUi (HI16 imm:$in))>; +def : MipsPat<(i32 immSExt16:$in), + (ADDiu ZERO, imm:$in)>; +def : MipsPat<(i32 immZExt16:$in), + (ORi ZERO, imm:$in)>; +def : MipsPat<(i32 immLow16Zero:$in), + (LUi (HI16 imm:$in))>; // Arbitrary immediates -def : Pat<(i32 imm:$imm), +def : MipsPat<(i32 imm:$imm), (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>; -// Carry patterns -def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), - (SUBu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), - (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$src, immSExt16:$imm), - (ADDiu CPURegs:$src, imm:$imm)>; +// Carry MipsPatterns +def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs), + (SUBu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs), + (ADDu CPURegs:$lhs, CPURegs:$rhs)>; +def : MipsPat<(addc CPURegs:$src, immSExt16:$imm), + (ADDiu CPURegs:$src, imm:$imm)>; // Call -def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)), - (JAL tglobaladdr:$dst)>; -def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), - (JAL texternalsym:$dst)>; -//def : Pat<(MipsJmpLink CPURegs:$dst), -// (JALR CPURegs:$dst)>; +def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), + (JAL tglobaladdr:$dst)>; +def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)), + (JAL texternalsym:$dst)>; +//def : MipsPat<(MipsJmpLink CPURegs:$dst), +// (JALR CPURegs:$dst)>; // hi/lo relocs -def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; -def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; -def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; -def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; -def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; - -def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; -def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; -def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; -def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; -def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; - -def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), - (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), - (ADDiu CPURegs:$hi, tblockaddress:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), - (ADDiu CPURegs:$hi, tjumptable:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), - (ADDiu CPURegs:$hi, tconstpool:$lo)>; -def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), - (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; +def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; + +def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; +def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; +def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; +def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; +def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; + +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), + (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), + (ADDiu CPURegs:$hi, tblockaddress:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), + (ADDiu CPURegs:$hi, tjumptable:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), + (ADDiu CPURegs:$hi, tconstpool:$lo)>; +def : MipsPat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; // gp_rel relocs -def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), - (ADDiu CPURegs:$gp, tglobaladdr:$in)>; -def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), - (ADDiu CPURegs:$gp, tconstpool:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), + (ADDiu CPURegs:$gp, tglobaladdr:$in)>; +def : MipsPat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), + (ADDiu CPURegs:$gp, tconstpool:$in)>; // wrapper_pic class WrapperPat: - Pat<(MipsWrapper RC:$gp, node:$in), - (ADDiuOp RC:$gp, node:$in)>; + MipsPat<(MipsWrapper RC:$gp, node:$in), + (ADDiuOp RC:$gp, node:$in)>; def : WrapperPat; def : WrapperPat; @@ -1081,58 +1146,58 @@ def : WrapperPat; def : WrapperPat; // Mips does not have "not", so we expand our way -def : Pat<(not CPURegs:$in), - (NOR CPURegs:$in, ZERO)>; +def : MipsPat<(not CPURegs:$in), + (NOR CPURegs:$in, ZERO)>; // extended loads -let Predicates = [NotN64] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; +let Predicates = [NotN64, HasStandardEncoding] in { + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>; } -let Predicates = [IsN64] in { - def : Pat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; - def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; +let Predicates = [IsN64, HasStandardEncoding] in { + def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>; + def : MipsPat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>; } // peepholes -let Predicates = [NotN64] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; +let Predicates = [NotN64, HasStandardEncoding] in { + def : MipsPat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>; } -let Predicates = [IsN64] in { - def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; - def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; +let Predicates = [IsN64, HasStandardEncoding] in { + def : MipsPat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>; + def : MipsPat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>; } // brcond patterns multiclass BrcondPats { -def : Pat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), - (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), - (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst), + (BNEOp RC:$lhs, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst), + (BEQOp RC:$lhs, ZEROReg, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), - (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst), + (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), - (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; +def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst), + (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>; -def : Pat<(brcond RC:$cond, bb:$dst), - (BNEOp RC:$cond, ZEROReg, bb:$dst)>; +def : MipsPat<(brcond RC:$cond, bb:$dst), + (BNEOp RC:$cond, ZEROReg, bb:$dst)>; } defm : BrcondPats; @@ -1140,39 +1205,39 @@ defm : BrcondPats; // setcc patterns multiclass SeteqPats { - def : Pat<(seteq RC:$lhs, RC:$rhs), - (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setne RC:$lhs, RC:$rhs), - (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; + def : MipsPat<(seteq RC:$lhs, RC:$rhs), + (SLTiuOp (XOROp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setne RC:$lhs, RC:$rhs), + (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>; } multiclass SetlePats { - def : Pat<(setle RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; - def : Pat<(setule RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setle RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>; + def : MipsPat<(setule RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>; } multiclass SetgtPats { - def : Pat<(setgt RC:$lhs, RC:$rhs), - (SLTOp RC:$rhs, RC:$lhs)>; - def : Pat<(setugt RC:$lhs, RC:$rhs), - (SLTuOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setgt RC:$lhs, RC:$rhs), + (SLTOp RC:$rhs, RC:$lhs)>; + def : MipsPat<(setugt RC:$lhs, RC:$rhs), + (SLTuOp RC:$rhs, RC:$lhs)>; } multiclass SetgePats { - def : Pat<(setge RC:$lhs, RC:$rhs), - (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, RC:$rhs), - (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, RC:$rhs), + (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, RC:$rhs), + (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>; } multiclass SetgeImmPats { - def : Pat<(setge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; - def : Pat<(setuge RC:$lhs, immSExt16:$rhs), - (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>; + def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs), + (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>; } defm : SeteqPats; @@ -1182,10 +1247,10 @@ defm : SetgePats; defm : SetgeImmPats; // select MipsDynAlloc -def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; +def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>; // bswap pattern -def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; +def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; //===----------------------------------------------------------------------===// // Floating Point Support @@ -1195,3 +1260,8 @@ include "MipsInstrFPU.td" include "Mips64InstrInfo.td" include "MipsCondMov.td" +// +// Mips16 + +include "Mips16InstrFormats.td" +include "Mips16InstrInfo.td" diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 76ca3e1..052046a 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -27,7 +27,52 @@ using namespace llvm; void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)Old; + const unsigned NopInstr = 0x0; + + // If the functions are in the same memory segment, insert PC-region branch. + if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) { + unsigned *OldInstruction = (unsigned *)Old; + *OldInstruction = 0x08000000; + unsigned JTargetAddr = NewAddr & 0x0FFFFFFC; + + JTargetAddr >>= 2; + *OldInstruction |= JTargetAddr; + + // Insert a NOP. + OldInstruction++; + *OldInstruction = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 2 * 4); + } else { + // We need to clear hint bits from the instruction, in case it is 'jr ra'. + const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008; + unsigned* CurrentInstr = (unsigned*)Old; + unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask; + unsigned* NextInstr = CurrentInstr + 1; + unsigned NextInstrHintClear = (*NextInstr) & HintMask; + + // Do absolute jump if there are 2 or more instructions before return from + // the old function. + if ((CurrInstrHintClear != ReturnSequence) && + (NextInstrHintClear != ReturnSequence)) { + const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000; + const unsigned JrT0Instr = 0x01000008; + // lui t0, high 16 bit of the NewAddr + (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16); + // addiu t0, t0, low 16 bit of the NewAddr + (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff); + // jr t0 + (*(CurrentInstr++)) = JrT0Instr; + (*CurrentInstr) = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 4 * 4); + } else { + // Unsupported case + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + } + } } /// JITCompilerFunction - This contains the address of the JIT function used to @@ -154,8 +199,8 @@ TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { return Result; } -void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { +void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) { JCE.emitAlignment(4); void *Addr = (void*) (JCE.getCurrentPCValue()); if (!sys::Memory::setRangeWritable(Addr, 16)) @@ -193,7 +238,7 @@ void *MipsJITInfo::emitFunctionStub(const Function* F, void *Fn, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { + unsigned NumRelocs, unsigned char *GOTBase) { for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index f4c4ae8..637a318 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -45,8 +45,8 @@ class MipsJITInfo : public TargetJITInfo { /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); + virtual void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE); /// getLazyResolverFunction - Expose the lazy resolver to the JIT. virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); @@ -55,7 +55,7 @@ class MipsJITInfo : public TargetJITInfo { /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); + unsigned NumRelocs, unsigned char *GOTBase); /// Initialize - Initialize internal stage for the function being JITted. void Initialize(const MachineFunction &MF, bool isPIC) { diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp new file mode 100644 index 0000000..f78203f --- /dev/null +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -0,0 +1,419 @@ +//===-- MipsLongBranch.cpp - Emit long branches ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands a branch or jump instruction into a long branch if its +// offset is too large to fit into its immediate field. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-long-branch" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +STATISTIC(LongBranches, "Number of long branches."); + +static cl::opt SkipLongBranch( + "skip-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Skip long branch pass."), + cl::Hidden); + +static cl::opt ForceLongBranch( + "force-mips-long-branch", + cl::init(false), + cl::desc("MIPS: Expand all branches to long format."), + cl::Hidden); + +namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + + struct MBBInfo { + uint64_t Size; + bool HasLongBranch; + MachineInstr *Br; + + MBBInfo() : Size(0), HasLongBranch(false), Br(0) {} + }; + + class MipsLongBranch : public MachineFunctionPass { + + public: + static char ID; + MipsLongBranch(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), + TII(static_cast(tm.getInstrInfo())) {} + + virtual const char *getPassName() const { + return "Mips Long Branch"; + } + + bool runOnMachineFunction(MachineFunction &F); + + private: + void splitMBB(MachineBasicBlock *MBB); + void initMBBInfo(); + int64_t computeOffset(const MachineInstr *Br); + void replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, + MachineBasicBlock *MBBOpnd); + void expandToLongBranch(MBBInfo &Info); + + const TargetMachine &TM; + const MipsInstrInfo *TII; + MachineFunction *MF; + SmallVector MBBInfos; + }; + + char MipsLongBranch::ID = 0; +} // end of anonymous namespace + +/// createMipsLongBranchPass - Returns a pass that converts branches to long +/// branches. +FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) { + return new MipsLongBranch(tm); +} + +/// Iterate over list of Br's operands and search for a MachineBasicBlock +/// operand. +static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { + for (unsigned I = 0, E = Br.getDesc().getNumOperands(); I < E; ++I) { + const MachineOperand &MO = Br.getOperand(I); + + if (MO.isMBB()) + return MO.getMBB(); + } + + assert(false && "This instruction does not have an MBB operand."); + return 0; +} + +// Traverse the list of instructions backwards until a non-debug instruction is +// found or it reaches E. +static ReverseIter getNonDebugInstr(ReverseIter B, ReverseIter E) { + for (; B != E; ++B) + if (!B->isDebugValue()) + return B; + + return E; +} + +// Split MBB if it has two direct jumps/branches. +void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) { + ReverseIter End = MBB->rend(); + ReverseIter LastBr = getNonDebugInstr(MBB->rbegin(), End); + + // Return if MBB has no branch instructions. + if ((LastBr == End) || + (!LastBr->isConditionalBranch() && !LastBr->isUnconditionalBranch())) + return; + + ReverseIter FirstBr = getNonDebugInstr(llvm::next(LastBr), End); + + // MBB has only one branch instruction if FirstBr is not a branch + // instruction. + if ((FirstBr == End) || + (!FirstBr->isConditionalBranch() && !FirstBr->isUnconditionalBranch())) + return; + + assert(!FirstBr->isIndirectBranch() && "Unexpected indirect branch found."); + + // Create a new MBB. Move instructions in MBB to the newly created MBB. + MachineBasicBlock *NewMBB = + MF->CreateMachineBasicBlock(MBB->getBasicBlock()); + + // Insert NewMBB and fix control flow. + MachineBasicBlock *Tgt = getTargetMBB(*FirstBr); + NewMBB->transferSuccessors(MBB); + NewMBB->removeSuccessor(Tgt); + MBB->addSuccessor(NewMBB); + MBB->addSuccessor(Tgt); + MF->insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB); + + NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end()); +} + +// Fill MBBInfos. +void MipsLongBranch::initMBBInfo() { + // Split the MBBs if they have two branches. Each basic block should have at + // most one branch after this loop is executed. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;) + splitMBB(I++); + + MF->RenumberBlocks(); + MBBInfos.clear(); + MBBInfos.resize(MF->size()); + + for (unsigned I = 0, E = MBBInfos.size(); I < E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I); + + // Compute size of MBB. + for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin(); + MI != MBB->instr_end(); ++MI) + MBBInfos[I].Size += TII->GetInstSizeInBytes(&*MI); + + // Search for MBB's branch instruction. + ReverseIter End = MBB->rend(); + ReverseIter Br = getNonDebugInstr(MBB->rbegin(), End); + + if ((Br != End) && !Br->isIndirectBranch() && + (Br->isConditionalBranch() || + (Br->isUnconditionalBranch() && + TM.getRelocationModel() == Reloc::PIC_))) + MBBInfos[I].Br = (++Br).base(); + } +} + +// Compute offset of branch in number of bytes. +int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { + int64_t Offset = 0; + int ThisMBB = Br->getParent()->getNumber(); + int TargetMBB = getTargetMBB(*Br)->getNumber(); + + // Compute offset of a forward branch. + if (ThisMBB < TargetMBB) { + for (int N = ThisMBB + 1; N < TargetMBB; ++N) + Offset += MBBInfos[N].Size; + + return Offset + 4; + } + + // Compute offset of a backward branch. + for (int N = ThisMBB; N >= TargetMBB; --N) + Offset += MBBInfos[N].Size; + + return -Offset + 4; +} + +// Replace Br with a branch which has the opposite condition code and a +// MachineBasicBlock operand MBBOpnd. +void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, + DebugLoc DL, MachineBasicBlock *MBBOpnd) { + unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode()); + const MCInstrDesc &NewDesc = TII->get(NewOpc); + + MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); + + for (unsigned I = 0, E = Br->getDesc().getNumOperands(); I < E; ++I) { + MachineOperand &MO = Br->getOperand(I); + + if (!MO.isReg()) { + assert(MO.isMBB() && "MBB operand expected."); + break; + } + + MIB.addReg(MO.getReg()); + } + + MIB.addMBB(MBBOpnd); + + Br->eraseFromParent(); +} + +// Expand branch instructions to long branches. +void MipsLongBranch::expandToLongBranch(MBBInfo &I) { + I.HasLongBranch = true; + + bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; + unsigned ABI = TM.getSubtarget().getTargetABI(); + bool N64 = ABI == MipsSubtarget::N64; + + MachineBasicBlock::iterator Pos; + MachineBasicBlock *MBB = I.Br->getParent(), *TgtMBB = getTargetMBB(*I.Br); + DebugLoc DL = I.Br->getDebugLoc(); + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator FallThroughMBB = ++MachineFunction::iterator(MBB); + MachineBasicBlock *LongBrMBB = MF->CreateMachineBasicBlock(BB); + + MF->insert(FallThroughMBB, LongBrMBB); + MBB->removeSuccessor(TgtMBB); + MBB->addSuccessor(LongBrMBB); + + if (IsPIC) { + // $longbr: + // addiu $sp, $sp, -regsize * 2 + // sw $ra, 0($sp) + // bal $baltgt + // sw $a3, regsize($sp) + // $baltgt: + // lui $a3, %hi($baltgt) + // lui $at, %hi($tgt) + // addiu $a3, $a3, %lo($baltgt) + // addiu $at, $at, %lo($tgt) + // subu $at, $at, $a3 + // addu $at, $ra, $at + // + // if n64: + // lui $a3, %highest($baltgt) + // lui $ra, %highest($tgt) + // addiu $a3, $a3, %higher($baltgt) + // addiu $ra, $ra, %higher($tgt) + // dsll $a3, $a3, 32 + // dsll $ra, $ra, 32 + // subu $at, $at, $a3 + // addu $at, $at, $ra + // + // lw $ra, 0($sp) + // lw $a3, regsize($sp) + // jr $at + // addiu $sp, $sp, regsize * 2 + // $fallthrough: + // + MF->getInfo()->setEmitNOAT(); + MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(FallThroughMBB, BalTgtMBB); + LongBrMBB->addSuccessor(BalTgtMBB); + BalTgtMBB->addSuccessor(TgtMBB); + + int RegSize = N64 ? 8 : 4; + unsigned AT = N64 ? Mips::AT_64 : Mips::AT; + unsigned A3 = N64 ? Mips::A3_64 : Mips::A3; + unsigned SP = N64 ? Mips::SP_64 : Mips::SP; + unsigned RA = N64 ? Mips::RA_64 : Mips::RA; + unsigned Load = N64 ? Mips::LD_P8 : Mips::LW; + unsigned Store = N64 ? Mips::SD_P8 : Mips::SW; + unsigned LUi = N64 ? Mips::LUi64 : Mips::LUi; + unsigned ADDiu = N64 ? Mips::DADDiu : Mips::ADDiu; + unsigned ADDu = N64 ? Mips::DADDu : Mips::ADDu; + unsigned SUBu = N64 ? Mips::SUBu : Mips::SUBu; + unsigned JR = N64 ? Mips::JR64 : Mips::JR; + + Pos = LongBrMBB->begin(); + + BuildMI(*LongBrMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) + .addImm(-RegSize * 2); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(RA).addReg(SP) + .addImm(0); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Store)).addReg(A3).addReg(SP) + .addImm(RegSize)->setIsInsideBundle(); + + Pos = BalTgtMBB->begin(); + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) + .addMBB(BalTgtMBB, MipsII::MO_ABS_HI); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), AT) + .addMBB(TgtMBB, MipsII::MO_ABS_HI); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) + .addMBB(BalTgtMBB, MipsII::MO_ABS_LO); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), AT).addReg(AT) + .addMBB(TgtMBB, MipsII::MO_ABS_LO); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(RA).addReg(AT); + + if (N64) { + BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), A3) + .addMBB(BalTgtMBB, MipsII::MO_HIGHEST); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(LUi), RA) + .addMBB(TgtMBB, MipsII::MO_HIGHEST); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), A3).addReg(A3) + .addMBB(BalTgtMBB, MipsII::MO_HIGHER); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), RA).addReg(RA) + .addMBB(TgtMBB, MipsII::MO_HIGHER); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), A3).addReg(A3) + .addImm(32); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DSLL), RA).addReg(RA) + .addImm(32); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(SUBu), AT).addReg(AT).addReg(A3); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDu), AT).addReg(AT).addReg(RA); + I.Size += 4 * 8; + } + + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), RA).addReg(SP).addImm(0); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Load), A3).addReg(SP).addImm(RegSize); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(JR)).addReg(AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(ADDiu), SP).addReg(SP) + .addImm(RegSize * 2)->setIsInsideBundle(); + I.Size += 4 * 14; + } else { + // $longbr: + // j $tgt + // nop + // $fallthrough: + // + Pos = LongBrMBB->begin(); + LongBrMBB->addSuccessor(TgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle(); + I.Size += 4 * 2; + } + + if (I.Br->isUnconditionalBranch()) { + // Change branch destination. + assert(I.Br->getDesc().getNumOperands() == 1); + I.Br->RemoveOperand(0); + I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB)); + } else + // Change branch destination and reverse condition. + replaceBranch(*MBB, I.Br, DL, FallThroughMBB); +} + +static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) { + MachineBasicBlock &MBB = F.front(); + MachineBasicBlock::iterator I = MBB.begin(); + DebugLoc DL = MBB.findDebugLoc(MBB.begin()); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII->get(Mips::ADDiu), Mips::V0) + .addReg(Mips::V0).addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + MBB.removeLiveIn(Mips::V0); +} + +bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { + if ((TM.getRelocationModel() == Reloc::PIC_) && + TM.getSubtarget().isABI_O32() && + F.getInfo()->globalBaseRegSet()) + emitGPDisp(F, TII); + + if (SkipLongBranch) + return true; + + MF = &F; + initMBBInfo(); + + SmallVector::iterator I, E = MBBInfos.end(); + bool EverMadeChange = false, MadeChange = true; + + while (MadeChange) { + MadeChange = false; + + for (I = MBBInfos.begin(); I != E; ++I) { + // Skip if this MBB doesn't have a branch or the branch has already been + // converted to a long branch. + if (!I->Br || I->HasLongBranch) + continue; + + if (!ForceLongBranch) + // Check if offset fits into 16-bit immediate field of branches. + if (isInt<16>(computeOffset(I->Br) / 4)) + continue; + + expandToLongBranch(*I); + ++LongBranches; + EverMadeChange = MadeChange = true; + } + } + + if (EverMadeChange) + MF->RenumberBlocks(); + + return true; +} diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 1597b93..d4c5e6d 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -29,7 +29,7 @@ using namespace llvm; MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter) : AsmPrinter(asmprinter) {} -void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) { +void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) { Mang = M; Ctx = C; } @@ -61,6 +61,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break; case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break; case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break; + case MipsII::MO_HIGHER: Kind = MCSymbolRefExpr::VK_Mips_HIGHER; break; + case MipsII::MO_HIGHEST: Kind = MCSymbolRefExpr::VK_Mips_HIGHEST; break; } switch (MOTy) { @@ -70,14 +72,17 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MachineOperand::MO_GlobalAddress: Symbol = Mang->getSymbol(MO.getGlobal()); + Offset += MO.getOffset(); break; case MachineOperand::MO_BlockAddress: Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + Offset += MO.getOffset(); break; case MachineOperand::MO_ExternalSymbol: Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + Offset += MO.getOffset(); break; case MachineOperand::MO_JumpTableIndex: @@ -86,8 +91,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MachineOperand::MO_ConstantPoolIndex: Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); - if (MO.getOffset()) - Offset += MO.getOffset(); + Offset += MO.getOffset(); break; default: @@ -103,71 +107,23 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, assert(Offset > 0); const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); - const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); - return MCOperand::CreateExpr(AddExpr); + const MCBinaryExpr *Add = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); + return MCOperand::CreateExpr(Add); } -static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0, - const MCOperand& Opnd1, - const MCOperand& Opnd2 = MCOperand()) { +/* +static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand &Opnd0, + const MCOperand &Opnd1, + const MCOperand &Opnd2 = MCOperand()) { Inst.setOpcode(Opc); Inst.addOperand(Opnd0); Inst.addOperand(Opnd1); if (Opnd2.isValid()) Inst.addOperand(Opnd2); } +*/ -// Lower ".cpload $reg" to -// "lui $gp, %hi(_gp_disp)" -// "addiu $gp, $gp, %lo(_gp_disp)" -// "addu $gp, $gp, $t9" -void MipsMCInstLower::LowerCPLOAD(SmallVector& MCInsts) { - MCOperand GPReg = MCOperand::CreateReg(Mips::GP); - MCOperand T9Reg = MCOperand::CreateReg(Mips::T9); - StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); - const MCSymbolRefExpr *MCSym; - - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); - MCOperand SymHi = MCOperand::CreateExpr(MCSym); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); - MCOperand SymLo = MCOperand::CreateExpr(MCSym); - - MCInsts.resize(3); - - CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi); - CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo); - CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg); -} - -// Lower ".cprestore offset" to "sw $gp, offset($sp)". -void MipsMCInstLower::LowerCPRESTORE(int64_t Offset, - SmallVector& MCInsts) { - assert(isInt<32>(Offset) && (Offset >= 0) && - "Imm operand of .cprestore must be a non-negative 32-bit value."); - - MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg; - MCOperand GPReg = MCOperand::CreateReg(Mips::GP); - - if (!isInt<16>(Offset)) { - unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff; - Offset &= 0xffff; - MCOperand ATReg = MCOperand::CreateReg(Mips::AT); - BaseReg = ATReg; - - // lui at,hi - // addu at,at,sp - MCInsts.resize(2); - CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi)); - CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg); - } - - MCInst Sw; - CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset)); - MCInsts.push_back(Sw); -} - -MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, unsigned offset) const { MachineOperandType MOTy = MO.getType(); @@ -205,139 +161,31 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } -void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI, - SmallVector& MCInsts) { - unsigned Opc = MI->getOpcode(); - MCInst Instr1, Instr2, Instr3, Move; - - bool TwoInstructions = false; - - assert(MI->getNumOperands() == 3); - assert(MI->getOperand(0).isReg()); - assert(MI->getOperand(1).isReg()); - - MCOperand Target = LowerOperand(MI->getOperand(0)); - MCOperand Base = LowerOperand(MI->getOperand(1)); - MCOperand ATReg = MCOperand::CreateReg(Mips::AT); - MCOperand ZeroReg = MCOperand::CreateReg(Mips::ZERO); - - MachineOperand UnLoweredName = MI->getOperand(2); - MCOperand Name = LowerOperand(UnLoweredName); - - Move.setOpcode(Mips::ADDu); - Move.addOperand(Target); - Move.addOperand(ATReg); - Move.addOperand(ZeroReg); - - switch (Opc) { - case Mips::ULW: { - // FIXME: only works for little endian right now - MCOperand AdjName = LowerOperand(UnLoweredName, 3); - if (Base.getReg() == (Target.getReg())) { - Instr1.setOpcode(Mips::LWL); - Instr1.addOperand(ATReg); - Instr1.addOperand(Base); - Instr1.addOperand(AdjName); - Instr2.setOpcode(Mips::LWR); - Instr2.addOperand(ATReg); - Instr2.addOperand(Base); - Instr2.addOperand(Name); - Instr3 = Move; - } else { - TwoInstructions = true; - Instr1.setOpcode(Mips::LWL); - Instr1.addOperand(Target); - Instr1.addOperand(Base); - Instr1.addOperand(AdjName); - Instr2.setOpcode(Mips::LWR); - Instr2.addOperand(Target); - Instr2.addOperand(Base); - Instr2.addOperand(Name); - } +// If the D instruction has a shift amount that is greater +// than 31 (checked in calling routine), lower it to a D32 instruction +void MipsMCInstLower::LowerLargeShift(const MachineInstr *MI, + MCInst& Inst, + int64_t Shift) { + // rt + Inst.addOperand(LowerOperand(MI->getOperand(0))); + // rd + Inst.addOperand(LowerOperand(MI->getOperand(1))); + // saminus32 + Inst.addOperand(MCOperand::CreateImm(Shift)); + + switch (MI->getOpcode()) { + default: + // Calling function is not synchronized + llvm_unreachable("Unexpected shift instruction"); break; - } - case Mips::ULHu: { - // FIXME: only works for little endian right now - MCOperand AdjName = LowerOperand(UnLoweredName, 1); - Instr1.setOpcode(Mips::LBu); - Instr1.addOperand(ATReg); - Instr1.addOperand(Base); - Instr1.addOperand(AdjName); - Instr2.setOpcode(Mips::LBu); - Instr2.addOperand(Target); - Instr2.addOperand(Base); - Instr2.addOperand(Name); - Instr3.setOpcode(Mips::INS); - Instr3.addOperand(Target); - Instr3.addOperand(ATReg); - Instr3.addOperand(MCOperand::CreateImm(0x8)); - Instr3.addOperand(MCOperand::CreateImm(0x18)); + case Mips::DSLL: + Inst.setOpcode(Mips::DSLL32); break; - } - - case Mips::USW: { - // FIXME: only works for little endian right now - assert (Base.getReg() != Target.getReg()); - TwoInstructions = true; - MCOperand AdjName = LowerOperand(UnLoweredName, 3); - Instr1.setOpcode(Mips::SWL); - Instr1.addOperand(Target); - Instr1.addOperand(Base); - Instr1.addOperand(AdjName); - Instr2.setOpcode(Mips::SWR); - Instr2.addOperand(Target); - Instr2.addOperand(Base); - Instr2.addOperand(Name); + case Mips::DSRL: + Inst.setOpcode(Mips::DSRL32); break; - } - case Mips::USH: { - MCOperand AdjName = LowerOperand(UnLoweredName, 1); - Instr1.setOpcode(Mips::SB); - Instr1.addOperand(Target); - Instr1.addOperand(Base); - Instr1.addOperand(Name); - Instr2.setOpcode(Mips::SRL); - Instr2.addOperand(ATReg); - Instr2.addOperand(Target); - Instr2.addOperand(MCOperand::CreateImm(8)); - Instr3.setOpcode(Mips::SB); - Instr3.addOperand(ATReg); - Instr3.addOperand(Base); - Instr3.addOperand(AdjName); + case Mips::DSRA: + Inst.setOpcode(Mips::DSRA32); break; } - default: - // FIXME: need to add others - llvm_unreachable("unaligned instruction not processed"); - } - - MCInsts.push_back(Instr1); - MCInsts.push_back(Instr2); - if (!TwoInstructions) MCInsts.push_back(Instr3); -} - -// Convert -// "setgp01 $reg" -// to -// "lui $reg, %hi(_gp_disp)" -// "addiu $reg, $reg, %lo(_gp_disp)" -void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI, - SmallVector& MCInsts) { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isReg()); - MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg()); - StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); - const MCSymbolRefExpr *MCSym; - - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); - MCOperand SymHi = MCOperand::CreateExpr(MCSym); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); - MCOperand SymLo = MCOperand::CreateExpr(MCSym); - - MCInsts.resize(2); - - CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi); - CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo); } diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index c1d007d..0abb996 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -31,13 +31,10 @@ class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { MipsAsmPrinter &AsmPrinter; public: MipsMCInstLower(MipsAsmPrinter &asmprinter); - void Initialize(Mangler *mang, MCContext* C); + void Initialize(Mangler *mang, MCContext *C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerCPLOAD(SmallVector& MCInsts); - void LowerCPRESTORE(int64_t Offset, SmallVector& MCInsts); - void LowerUnalignedLoadStore(const MachineInstr *MI, - SmallVector& MCInsts); - void LowerSETGP01(const MachineInstr *MI, SmallVector& MCInsts); + void LowerLargeShift(const MachineInstr *MI, MCInst &Inst, int64_t Shift); + private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index b00c62b..362173e 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -22,10 +22,6 @@ static cl::opt FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true), cl::desc("Always use $gp as the global base register.")); -bool MipsFunctionInfo::globalBaseRegFixed() const { - return FixGlobalBaseReg; -} - bool MipsFunctionInfo::globalBaseRegSet() const { return GlobalBaseReg; } @@ -37,13 +33,13 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { const MipsSubtarget &ST = MF.getTarget().getSubtarget(); - if (FixGlobalBaseReg) // $gp is the global base register. - return GlobalBaseReg = ST.isABI_N64() ? Mips::GP_64 : Mips::GP; - const TargetRegisterClass *RC; - RC = ST.isABI_N64() ? - Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass; - + if (ST.inMips16Mode()) + RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + else + RC = ST.isABI_N64() ? + (const TargetRegisterClass*)&Mips::CPU64RegsRegClass : + (const TargetRegisterClass*)&Mips::CPURegsRegClass; return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC); } diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 0fde55c..df3c4c0 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,8 +14,11 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H +#include "MipsSubtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" #include namespace llvm { @@ -45,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo { // OutArgFIRange: Range of indices of all frame objects created during call to // LowerCall except for the frame object for restoring $gp. std::pair InArgFIRange, OutArgFIRange; - int GPFI; // Index of the frame object for restoring $gp - mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; bool EmitNOAT; @@ -55,8 +56,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), EmitNOAT(false) + OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -74,25 +74,9 @@ public: OutArgFIRange.second = LastFI; } - int getGPFI() const { return GPFI; } - void setGPFI(int FI) { GPFI = FI; } - bool needGPSaveRestore() const { return getGPFI(); } - bool isGPFI(int FI) const { return GPFI && GPFI == FI; } - - // The first call to this function creates a frame object for dynamically - // allocated stack area. - int getDynAllocFI() const { - if (!DynAllocFI) - DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true); - - return DynAllocFI; - } - bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } - bool globalBaseRegFixed() const; bool globalBaseRegSet() const; unsigned getGlobalBaseReg(); diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f30de44..ae6ae3a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -16,9 +16,11 @@ #include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" #include "MipsSubtarget.h" #include "MipsMachineFunction.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/Type.h" #include "llvm/Function.h" #include "llvm/CodeGen/ValueTypes.h" @@ -35,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/DebugInfo.h" #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" @@ -54,8 +55,7 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } /// Mips Callee Saved Registers const uint16_t* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const -{ +getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; else if (!Subtarget.hasMips64()) @@ -64,12 +64,11 @@ getCalleeSavedRegs(const MachineFunction *MF) const return CSR_N32_SaveList; assert(Subtarget.isABI_N64()); - return CSR_N64_SaveList; + return CSR_N64_SaveList; } const uint32_t* -MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const -{ +MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_RegMask; else if (!Subtarget.hasMips64()) @@ -78,23 +77,21 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const return CSR_N32_RegMask; assert(Subtarget.isABI_N64()); - return CSR_N64_RegMask; + return CSR_N64_RegMask; } BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const uint16_t ReservedCPURegs[] = { - Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, - Mips::SP, Mips::FP, Mips::RA + Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, Mips::SP }; static const uint16_t ReservedCPU64Regs[] = { - Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, - Mips::SP_64, Mips::FP_64, Mips::RA_64 + Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 }; BitVector Reserved(getNumRegs()); - typedef TargetRegisterClass::iterator RegIter; + typedef TargetRegisterClass::const_iterator RegIter; for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I) Reserved.set(ReservedCPURegs[I]); @@ -104,31 +101,36 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ReservedCPU64Regs[I]); // Reserve all registers in AFGR64. - for (RegIter Reg = Mips::AFGR64RegisterClass->begin(); - Reg != Mips::AFGR64RegisterClass->end(); ++Reg) + for (RegIter Reg = Mips::AFGR64RegClass.begin(), + EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); - } - else { + } else { // Reserve all registers in CPU64Regs & FGR64. - for (RegIter Reg = Mips::CPU64RegsRegisterClass->begin(); - Reg != Mips::CPU64RegsRegisterClass->end(); ++Reg) + for (RegIter Reg = Mips::CPU64RegsRegClass.begin(), + EReg = Mips::CPU64RegsRegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); - for (RegIter Reg = Mips::FGR64RegisterClass->begin(); - Reg != Mips::FGR64RegisterClass->end(); ++Reg) + for (RegIter Reg = Mips::FGR64RegClass.begin(), + EReg = Mips::FGR64RegClass.end(); Reg != EReg; ++Reg) Reserved.set(*Reg); } - // If GP is dedicated as a global base register, reserve it. - if (MF.getInfo()->globalBaseRegFixed()) { - Reserved.set(Mips::GP); - Reserved.set(Mips::GP_64); + // Reserve FP if this function should have a dedicated frame pointer register. + if (MF.getTarget().getFrameLowering()->hasFP(MF)) { + Reserved.set(Mips::FP); + Reserved.set(Mips::FP_64); } // Reserve hardware registers. Reserved.set(Mips::HWR29); Reserved.set(Mips::HWR29_64); + // Reserve RA if in mips16 mode. + if (Subtarget.inMips16Mode()) { + Reserved.set(Mips::RA); + Reserved.set(Mips::RA_64); + } + return Reserved; } @@ -137,13 +139,9 @@ MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void MipsRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); +bool +MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; } // FrameIndex represent objects inside a abstract stack. @@ -154,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo(); unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -175,88 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - const std::vector &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; - else - FrameReg = getFrameRegister(MF); - - // Calculate final offset. - // - There is no need to change the offset if the frame object is one of the - // following: an outgoing argument, pointer to a dynamically allocated - // stack space or a $gp restore location, - // - If the frame object is any of the following, its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int64_t Offset; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) || - MipsFI->isDynAllocFI(FrameIndex)) - Offset = spOffset; - else - Offset = spOffset + (int64_t)stackSize; - - Offset += MI.getOperand(i+1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. - if (!MI.isDebugValue() && !isInt<16>(Offset)) { - MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = II->getDebugLoc(); - MipsAnalyzeImmediate AnalyzeImm; - unsigned Size = Subtarget.isABI_N64() ? 64 : 32; - unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi; - unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - - MipsFI->setEmitNOAT(); - - // The first instruction can be a LUi, which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == LUi) - BuildMI(MBB, II, DL, TII.get(LUi), ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - else - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - // Build the remaining instructions in Seq except for the last one. - for (++Inst; Inst != Seq.end() - 1; ++Inst) - BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg) - .addImm(SignExtend64<16>(Inst->ImmOpnd)); - - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); - - FrameReg = ATReg; - Offset = SignExtend64<16>(Inst->ImmOpnd); - } - - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + eliminateFI(MI, i, FrameIndex, stackSize, spOffset); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 0716d29..9a05e94 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -25,10 +25,12 @@ class MipsSubtarget; class TargetInstrInfo; class Type; -struct MipsRegisterInfo : public MipsGenRegisterInfo { +class MipsRegisterInfo : public MipsGenRegisterInfo { +protected: const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; +public: MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -42,16 +44,14 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { void adjustMipsStackFrame(MachineFunction &MF) const; /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, @@ -65,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const = 0; }; } // end namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index ce399a0..b255e42 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -70,8 +70,8 @@ class HWR num, string n> : MipsReg { let Namespace = "Mips" in { // General Purpose Registers - def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>; - def AT : MipsGPRReg< 1, "AT">, DwarfRegNum<[1]>; + def ZERO : MipsGPRReg< 0, "zero">, DwarfRegNum<[0]>; + def AT : MipsGPRReg< 1, "at">, DwarfRegNum<[1]>; def V0 : MipsGPRReg< 2, "2">, DwarfRegNum<[2]>; def V1 : MipsGPRReg< 3, "3">, DwarfRegNum<[3]>; def A0 : MipsGPRReg< 4, "4">, DwarfRegNum<[4]>; @@ -98,14 +98,14 @@ let Namespace = "Mips" in { def T9 : MipsGPRReg< 25, "25">, DwarfRegNum<[25]>; def K0 : MipsGPRReg< 26, "26">, DwarfRegNum<[26]>; def K1 : MipsGPRReg< 27, "27">, DwarfRegNum<[27]>; - def GP : MipsGPRReg< 28, "GP">, DwarfRegNum<[28]>; - def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>; - def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; - def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; + def GP : MipsGPRReg< 28, "gp">, DwarfRegNum<[28]>; + def SP : MipsGPRReg< 29, "sp">, DwarfRegNum<[29]>; + def FP : MipsGPRReg< 30, "fp">, DwarfRegNum<[30]>; + def RA : MipsGPRReg< 31, "ra">, DwarfRegNum<[31]>; // General Purpose 64-bit Registers - def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>, DwarfRegNum<[0]>; - def AT_64 : Mips64GPRReg< 1, "AT", [AT]>, DwarfRegNum<[1]>; + def ZERO_64 : Mips64GPRReg< 0, "zero", [ZERO]>, DwarfRegNum<[0]>; + def AT_64 : Mips64GPRReg< 1, "at", [AT]>, DwarfRegNum<[1]>; def V0_64 : Mips64GPRReg< 2, "2", [V0]>, DwarfRegNum<[2]>; def V1_64 : Mips64GPRReg< 3, "3", [V1]>, DwarfRegNum<[3]>; def A0_64 : Mips64GPRReg< 4, "4", [A0]>, DwarfRegNum<[4]>; @@ -132,97 +132,97 @@ let Namespace = "Mips" in { def T9_64 : Mips64GPRReg< 25, "25", [T9]>, DwarfRegNum<[25]>; def K0_64 : Mips64GPRReg< 26, "26", [K0]>, DwarfRegNum<[26]>; def K1_64 : Mips64GPRReg< 27, "27", [K1]>, DwarfRegNum<[27]>; - def GP_64 : Mips64GPRReg< 28, "GP", [GP]>, DwarfRegNum<[28]>; - def SP_64 : Mips64GPRReg< 29, "SP", [SP]>, DwarfRegNum<[29]>; - def FP_64 : Mips64GPRReg< 30, "FP", [FP]>, DwarfRegNum<[30]>; - def RA_64 : Mips64GPRReg< 31, "RA", [RA]>, DwarfRegNum<[31]>; + def GP_64 : Mips64GPRReg< 28, "gp", [GP]>, DwarfRegNum<[28]>; + def SP_64 : Mips64GPRReg< 29, "sp", [SP]>, DwarfRegNum<[29]>; + def FP_64 : Mips64GPRReg< 30, "fp", [FP]>, DwarfRegNum<[30]>; + def RA_64 : Mips64GPRReg< 31, "ra", [RA]>, DwarfRegNum<[31]>; /// Mips Single point precision FPU Registers - def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; - def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; - def F2 : FPR< 2, "F2">, DwarfRegNum<[34]>; - def F3 : FPR< 3, "F3">, DwarfRegNum<[35]>; - def F4 : FPR< 4, "F4">, DwarfRegNum<[36]>; - def F5 : FPR< 5, "F5">, DwarfRegNum<[37]>; - def F6 : FPR< 6, "F6">, DwarfRegNum<[38]>; - def F7 : FPR< 7, "F7">, DwarfRegNum<[39]>; - def F8 : FPR< 8, "F8">, DwarfRegNum<[40]>; - def F9 : FPR< 9, "F9">, DwarfRegNum<[41]>; - def F10 : FPR<10, "F10">, DwarfRegNum<[42]>; - def F11 : FPR<11, "F11">, DwarfRegNum<[43]>; - def F12 : FPR<12, "F12">, DwarfRegNum<[44]>; - def F13 : FPR<13, "F13">, DwarfRegNum<[45]>; - def F14 : FPR<14, "F14">, DwarfRegNum<[46]>; - def F15 : FPR<15, "F15">, DwarfRegNum<[47]>; - def F16 : FPR<16, "F16">, DwarfRegNum<[48]>; - def F17 : FPR<17, "F17">, DwarfRegNum<[49]>; - def F18 : FPR<18, "F18">, DwarfRegNum<[50]>; - def F19 : FPR<19, "F19">, DwarfRegNum<[51]>; - def F20 : FPR<20, "F20">, DwarfRegNum<[52]>; - def F21 : FPR<21, "F21">, DwarfRegNum<[53]>; - def F22 : FPR<22, "F22">, DwarfRegNum<[54]>; - def F23 : FPR<23, "F23">, DwarfRegNum<[55]>; - def F24 : FPR<24, "F24">, DwarfRegNum<[56]>; - def F25 : FPR<25, "F25">, DwarfRegNum<[57]>; - def F26 : FPR<26, "F26">, DwarfRegNum<[58]>; - def F27 : FPR<27, "F27">, DwarfRegNum<[59]>; - def F28 : FPR<28, "F28">, DwarfRegNum<[60]>; - def F29 : FPR<29, "F29">, DwarfRegNum<[61]>; - def F30 : FPR<30, "F30">, DwarfRegNum<[62]>; - def F31 : FPR<31, "F31">, DwarfRegNum<[63]>; + def F0 : FPR< 0, "f0">, DwarfRegNum<[32]>; + def F1 : FPR< 1, "f1">, DwarfRegNum<[33]>; + def F2 : FPR< 2, "f2">, DwarfRegNum<[34]>; + def F3 : FPR< 3, "f3">, DwarfRegNum<[35]>; + def F4 : FPR< 4, "f4">, DwarfRegNum<[36]>; + def F5 : FPR< 5, "f5">, DwarfRegNum<[37]>; + def F6 : FPR< 6, "f6">, DwarfRegNum<[38]>; + def F7 : FPR< 7, "f7">, DwarfRegNum<[39]>; + def F8 : FPR< 8, "f8">, DwarfRegNum<[40]>; + def F9 : FPR< 9, "f9">, DwarfRegNum<[41]>; + def F10 : FPR<10, "f10">, DwarfRegNum<[42]>; + def F11 : FPR<11, "f11">, DwarfRegNum<[43]>; + def F12 : FPR<12, "f12">, DwarfRegNum<[44]>; + def F13 : FPR<13, "f13">, DwarfRegNum<[45]>; + def F14 : FPR<14, "f14">, DwarfRegNum<[46]>; + def F15 : FPR<15, "f15">, DwarfRegNum<[47]>; + def F16 : FPR<16, "f16">, DwarfRegNum<[48]>; + def F17 : FPR<17, "f17">, DwarfRegNum<[49]>; + def F18 : FPR<18, "f18">, DwarfRegNum<[50]>; + def F19 : FPR<19, "f19">, DwarfRegNum<[51]>; + def F20 : FPR<20, "f20">, DwarfRegNum<[52]>; + def F21 : FPR<21, "f21">, DwarfRegNum<[53]>; + def F22 : FPR<22, "f22">, DwarfRegNum<[54]>; + def F23 : FPR<23, "f23">, DwarfRegNum<[55]>; + def F24 : FPR<24, "f24">, DwarfRegNum<[56]>; + def F25 : FPR<25, "f25">, DwarfRegNum<[57]>; + def F26 : FPR<26, "f26">, DwarfRegNum<[58]>; + def F27 : FPR<27, "f27">, DwarfRegNum<[59]>; + def F28 : FPR<28, "f28">, DwarfRegNum<[60]>; + def F29 : FPR<29, "f29">, DwarfRegNum<[61]>; + def F30 : FPR<30, "f30">, DwarfRegNum<[62]>; + def F31 : FPR<31, "f31">, DwarfRegNum<[63]>; /// Mips Double point precision FPU Registers (aliased /// with the single precision to hold 64 bit values) - def D0 : AFPR< 0, "F0", [F0, F1]>; - def D1 : AFPR< 2, "F2", [F2, F3]>; - def D2 : AFPR< 4, "F4", [F4, F5]>; - def D3 : AFPR< 6, "F6", [F6, F7]>; - def D4 : AFPR< 8, "F8", [F8, F9]>; - def D5 : AFPR<10, "F10", [F10, F11]>; - def D6 : AFPR<12, "F12", [F12, F13]>; - def D7 : AFPR<14, "F14", [F14, F15]>; - def D8 : AFPR<16, "F16", [F16, F17]>; - def D9 : AFPR<18, "F18", [F18, F19]>; - def D10 : AFPR<20, "F20", [F20, F21]>; - def D11 : AFPR<22, "F22", [F22, F23]>; - def D12 : AFPR<24, "F24", [F24, F25]>; - def D13 : AFPR<26, "F26", [F26, F27]>; - def D14 : AFPR<28, "F28", [F28, F29]>; - def D15 : AFPR<30, "F30", [F30, F31]>; + def D0 : AFPR< 0, "f0", [F0, F1]>; + def D1 : AFPR< 2, "f2", [F2, F3]>; + def D2 : AFPR< 4, "f4", [F4, F5]>; + def D3 : AFPR< 6, "f6", [F6, F7]>; + def D4 : AFPR< 8, "f8", [F8, F9]>; + def D5 : AFPR<10, "f10", [F10, F11]>; + def D6 : AFPR<12, "f12", [F12, F13]>; + def D7 : AFPR<14, "f14", [F14, F15]>; + def D8 : AFPR<16, "f16", [F16, F17]>; + def D9 : AFPR<18, "f18", [F18, F19]>; + def D10 : AFPR<20, "f20", [F20, F21]>; + def D11 : AFPR<22, "f22", [F22, F23]>; + def D12 : AFPR<24, "f24", [F24, F25]>; + def D13 : AFPR<26, "f26", [F26, F27]>; + def D14 : AFPR<28, "f28", [F28, F29]>; + def D15 : AFPR<30, "f30", [F30, F31]>; /// Mips Double point precision FPU Registers in MFP64 mode. - def D0_64 : AFPR64<0, "F0", [F0]>, DwarfRegNum<[32]>; - def D1_64 : AFPR64<1, "F1", [F1]>, DwarfRegNum<[33]>; - def D2_64 : AFPR64<2, "F2", [F2]>, DwarfRegNum<[34]>; - def D3_64 : AFPR64<3, "F3", [F3]>, DwarfRegNum<[35]>; - def D4_64 : AFPR64<4, "F4", [F4]>, DwarfRegNum<[36]>; - def D5_64 : AFPR64<5, "F5", [F5]>, DwarfRegNum<[37]>; - def D6_64 : AFPR64<6, "F6", [F6]>, DwarfRegNum<[38]>; - def D7_64 : AFPR64<7, "F7", [F7]>, DwarfRegNum<[39]>; - def D8_64 : AFPR64<8, "F8", [F8]>, DwarfRegNum<[40]>; - def D9_64 : AFPR64<9, "F9", [F9]>, DwarfRegNum<[41]>; - def D10_64 : AFPR64<10, "F10", [F10]>, DwarfRegNum<[42]>; - def D11_64 : AFPR64<11, "F11", [F11]>, DwarfRegNum<[43]>; - def D12_64 : AFPR64<12, "F12", [F12]>, DwarfRegNum<[44]>; - def D13_64 : AFPR64<13, "F13", [F13]>, DwarfRegNum<[45]>; - def D14_64 : AFPR64<14, "F14", [F14]>, DwarfRegNum<[46]>; - def D15_64 : AFPR64<15, "F15", [F15]>, DwarfRegNum<[47]>; - def D16_64 : AFPR64<16, "F16", [F16]>, DwarfRegNum<[48]>; - def D17_64 : AFPR64<17, "F17", [F17]>, DwarfRegNum<[49]>; - def D18_64 : AFPR64<18, "F18", [F18]>, DwarfRegNum<[50]>; - def D19_64 : AFPR64<19, "F19", [F19]>, DwarfRegNum<[51]>; - def D20_64 : AFPR64<20, "F20", [F20]>, DwarfRegNum<[52]>; - def D21_64 : AFPR64<21, "F21", [F21]>, DwarfRegNum<[53]>; - def D22_64 : AFPR64<22, "F22", [F22]>, DwarfRegNum<[54]>; - def D23_64 : AFPR64<23, "F23", [F23]>, DwarfRegNum<[55]>; - def D24_64 : AFPR64<24, "F24", [F24]>, DwarfRegNum<[56]>; - def D25_64 : AFPR64<25, "F25", [F25]>, DwarfRegNum<[57]>; - def D26_64 : AFPR64<26, "F26", [F26]>, DwarfRegNum<[58]>; - def D27_64 : AFPR64<27, "F27", [F27]>, DwarfRegNum<[59]>; - def D28_64 : AFPR64<28, "F28", [F28]>, DwarfRegNum<[60]>; - def D29_64 : AFPR64<29, "F29", [F29]>, DwarfRegNum<[61]>; - def D30_64 : AFPR64<30, "F30", [F30]>, DwarfRegNum<[62]>; - def D31_64 : AFPR64<31, "F31", [F31]>, DwarfRegNum<[63]>; + def D0_64 : AFPR64<0, "f0", [F0]>, DwarfRegNum<[32]>; + def D1_64 : AFPR64<1, "f1", [F1]>, DwarfRegNum<[33]>; + def D2_64 : AFPR64<2, "f2", [F2]>, DwarfRegNum<[34]>; + def D3_64 : AFPR64<3, "f3", [F3]>, DwarfRegNum<[35]>; + def D4_64 : AFPR64<4, "f4", [F4]>, DwarfRegNum<[36]>; + def D5_64 : AFPR64<5, "f5", [F5]>, DwarfRegNum<[37]>; + def D6_64 : AFPR64<6, "f6", [F6]>, DwarfRegNum<[38]>; + def D7_64 : AFPR64<7, "f7", [F7]>, DwarfRegNum<[39]>; + def D8_64 : AFPR64<8, "f8", [F8]>, DwarfRegNum<[40]>; + def D9_64 : AFPR64<9, "f9", [F9]>, DwarfRegNum<[41]>; + def D10_64 : AFPR64<10, "f10", [F10]>, DwarfRegNum<[42]>; + def D11_64 : AFPR64<11, "f11", [F11]>, DwarfRegNum<[43]>; + def D12_64 : AFPR64<12, "f12", [F12]>, DwarfRegNum<[44]>; + def D13_64 : AFPR64<13, "f13", [F13]>, DwarfRegNum<[45]>; + def D14_64 : AFPR64<14, "f14", [F14]>, DwarfRegNum<[46]>; + def D15_64 : AFPR64<15, "f15", [F15]>, DwarfRegNum<[47]>; + def D16_64 : AFPR64<16, "f16", [F16]>, DwarfRegNum<[48]>; + def D17_64 : AFPR64<17, "f17", [F17]>, DwarfRegNum<[49]>; + def D18_64 : AFPR64<18, "f18", [F18]>, DwarfRegNum<[50]>; + def D19_64 : AFPR64<19, "f19", [F19]>, DwarfRegNum<[51]>; + def D20_64 : AFPR64<20, "f20", [F20]>, DwarfRegNum<[52]>; + def D21_64 : AFPR64<21, "f21", [F21]>, DwarfRegNum<[53]>; + def D22_64 : AFPR64<22, "f22", [F22]>, DwarfRegNum<[54]>; + def D23_64 : AFPR64<23, "f23", [F23]>, DwarfRegNum<[55]>; + def D24_64 : AFPR64<24, "f24", [F24]>, DwarfRegNum<[56]>; + def D25_64 : AFPR64<25, "f25", [F25]>, DwarfRegNum<[57]>; + def D26_64 : AFPR64<26, "f26", [F26]>, DwarfRegNum<[58]>; + def D27_64 : AFPR64<27, "f27", [F27]>, DwarfRegNum<[59]>; + def D28_64 : AFPR64<28, "f28", [F28]>, DwarfRegNum<[60]>; + def D29_64 : AFPR64<29, "f29", [F29]>, DwarfRegNum<[61]>; + def D30_64 : AFPR64<30, "f30", [F30]>, DwarfRegNum<[62]>; + def D31_64 : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>; // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; @@ -236,6 +236,9 @@ let Namespace = "Mips" in { // Status flags register def FCR31 : Register<"31">; + // fcc0 register + def FCC0 : Register<"fcc0">; + // Hardware register $29 def HWR29 : Register<"29">; def HWR29_64 : Register<"29">; @@ -246,26 +249,41 @@ let Namespace = "Mips" in { //===----------------------------------------------------------------------===// def CPURegs : RegisterClass<"Mips", [i32], 32, (add + // Reserved + ZERO, AT, // Return Values and Arguments V0, V1, A0, A1, A2, A3, // Not preserved across procedure calls - T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + T0, T1, T2, T3, T4, T5, T6, T7, // Callee save S0, S1, S2, S3, S4, S5, S6, S7, + // Not preserved across procedure calls + T8, T9, // Reserved - ZERO, AT, K0, K1, GP, SP, FP, RA)>; + K0, K1, GP, SP, FP, RA)>; def CPU64Regs : RegisterClass<"Mips", [i64], 64, (add +// Reserved + ZERO_64, AT_64, // Return Values and Arguments V0_64, V1_64, A0_64, A1_64, A2_64, A3_64, // Not preserved across procedure calls - T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, T8_64, T9_64, + T0_64, T1_64, T2_64, T3_64, T4_64, T5_64, T6_64, T7_64, // Callee save S0_64, S1_64, S2_64, S3_64, S4_64, S5_64, S6_64, S7_64, + // Not preserved across procedure calls + T8_64, T9_64, // Reserved - ZERO_64, AT_64, K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)> { - let SubRegClasses = [(CPURegs sub_32)]; -} + K0_64, K1_64, GP_64, SP_64, FP_64, RA_64)>; + +def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add + // Return Values and Arguments + V0, V1, A0, A1, A2, A3, + // Callee save + S0, S1)>; + +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; + // 64bit fp: // * FGR64 - 32 64-bit registers @@ -278,26 +296,24 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>; def AFGR64 : RegisterClass<"Mips", [f64], 64, (add // Return Values and Arguments - D0, D1, D6, D7, + D0, D1, + // Not preserved across procedure calls + D2, D3, D4, D5, + // Return Values and Arguments + D6, D7, // Not preserved across procedure calls - D2, D3, D4, D5, D8, D9, + D8, D9, // Callee save - D10, D11, D12, D13, D14, D15)> { - let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; -} + D10, D11, D12, D13, D14, D15)>; -def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)> { - let SubRegClasses = [(FGR32 sub_32)]; -} +def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; // Hi/Lo Registers def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> { - let SubRegClasses = [(HILO sub_32)]; -} +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp new file mode 100644 index 0000000..1c59847 --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -0,0 +1,210 @@ +//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEFrameLowering.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsRegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + const MipsSEInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // First, compute final stack size. + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector &Moves = MMI.getFrameMoves(); + MachineLocation DstML, SrcML; + + // Adjust stack. + TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + + // emit ".cfi_def_cfa_offset StackSize" + MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); + DstML = MachineLocation(MachineLocation::VirtualFP); + SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); + Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + + if (CSI.size()) { + // Find the instruction past the last instruction that saves a callee-saved + // register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); + + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + + // If Reg is a double precision register, emit two cfa_offsets, + // one for each of the paired single precision registers. + if (Mips::AFGR64RegClass.contains(Reg)) { + MachineLocation DstML0(MachineLocation::VirtualFP, Offset); + MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); + MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); + MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); + + if (!STI.isLittle()) + std::swap(SrcML0, SrcML1); + + Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); + Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); + } else { + // Reg is either in CPURegs or FGR32. + DstML = MachineLocation(MachineLocation::VirtualFP, Offset); + SrcML = MachineLocation(Reg); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); + } + } + } + + // if framepointer enabled, set it to point to the stack pointer. + if (hasFP(MF)) { + // Insert instruction "move $fp, $sp" at this location. + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); + + // emit ".cfi_def_cfa_register $fp" + MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); + DstML = MachineLocation(FP); + SrcML = MachineLocation(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); + } +} + +void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsSEInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // if framepointer enabled, restore the stack pointer. + if (hasFP(MF)) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instruction "move $sp, $fp" at this location. + BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); + } + + // Get the number of bytes from FrameInfo + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + TII.adjustStackPtr(SP, StackSize, MBB, MBBI); +} + +bool MipsSEFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *EntryBlock = MF->begin(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method MipsTargetLowering::LowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) + && MF->getFrameInfo()->isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + EntryBlock->addLiveIn(Reg); + + // Insert the spill to the stack frame. + bool IsKill = !IsRAAndRetAddrIsTaken; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill, + CSI[i].getFrameIdx(), RC, TRI); + } + + return true; +} + +bool +MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Reserve call frame if the size of the maximum call frame fits into 16-bit + // immediate field and there are no variable sized objects on the stack. + return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); +} + +void MipsSEFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + MRI.setPhysRegUsed(FP); +} + +const MipsFrameLowering * +llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) { + return new MipsSEFrameLowering(ST); +} diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h new file mode 100644 index 0000000..6481a0a --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -0,0 +1,44 @@ +//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSE_FRAMEINFO_H +#define MIPSSE_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { + +class MipsSEFrameLowering : public MipsFrameLowering { +public: + explicit MipsSEFrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp new file mode 100644 index 0000000..eeb1de3 --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -0,0 +1,320 @@ +//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEInstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, + tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), + RI(*tm.getSubtargetImpl(), *this), + IsN64(tm.getSubtarget().isABI_N64()) {} + +const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MipsSEInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || + (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || + (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || + (Opc == Mips::LDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MipsSEInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || + (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || + (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || + (Opc == Mips::SDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + else if (Mips::CCRRegClass.contains(SrcReg)) + Opc = Mips::CFC1; + else if (Mips::FGR32RegClass.contains(SrcReg)) + Opc = Mips::MFC1; + else if (SrcReg == Mips::HI) + Opc = Mips::MFHI, SrcReg = 0; + else if (SrcReg == Mips::LO) + Opc = Mips::MFLO, SrcReg = 0; + } + else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. + if (Mips::CCRRegClass.contains(DestReg)) + Opc = Mips::CTC1; + else if (Mips::FGR32RegClass.contains(DestReg)) + Opc = Mips::MTC1; + else if (DestReg == Mips::HI) + Opc = Mips::MTHI, DestReg = 0; + else if (DestReg == Mips::LO) + Opc = Mips::MTLO, DestReg = 0; + } + else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_S; + else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D32; + else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D64; + else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) + Opc = Mips::MOVCCRToCCR; + else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (Mips::CPU64RegsRegClass.contains(SrcReg)) + Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + else if (SrcReg == Mips::HI64) + Opc = Mips::MFHI64, SrcReg = 0; + else if (SrcReg == Mips::LO64) + Opc = Mips::MFLO64, SrcReg = 0; + else if (Mips::FGR64RegClass.contains(SrcReg)) + Opc = Mips::DMFC1; + } + else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (DestReg == Mips::HI64) + Opc = Mips::MTHI64, DestReg = 0; + else if (DestReg == Mips::LO64) + Opc = Mips::MTLO64, DestReg = 0; + else if (Mips::FGR64RegClass.contains(DestReg)) + Opc = Mips::DMTC1; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void MipsSEInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::SDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); +} + +void MipsSEInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const +{ + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::LDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); +} + +bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA: + ExpandRetRA(MBB, MI, Mips::RET); + break; + case Mips::BuildPairF64: + ExpandBuildPairF64(MBB, MI); + break; + case Mips::ExtractElementF64: + ExpandExtractElementF64(MBB, MI); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ: return Mips::BNE; + case Mips::BNE: return Mips::BEQ; + case Mips::BGTZ: return Mips::BLEZ; + case Mips::BGEZ: return Mips::BLTZ; + case Mips::BLTZ: return Mips::BGEZ; + case Mips::BLEZ: return Mips::BGTZ; + case Mips::BEQ64: return Mips::BNE64; + case Mips::BNE64: return Mips::BEQ64; + case Mips::BGTZ64: return Mips::BLEZ64; + case Mips::BGEZ64: return Mips::BLTZ64; + case Mips::BLTZ64: return Mips::BGEZ64; + case Mips::BLEZ64: return Mips::BGTZ64; + case Mips::BC1T: return Mips::BC1F; + case Mips::BC1F: return Mips::BC1T; + } +} + +/// Adjust SP by Amount bytes. +void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MipsSubtarget &STI = TM.getSubtarget(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + + if (isInt<16>(Amount))// addi sp, sp, amount + BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); + else { // Expand immediate that doesn't fit in 16-bit. + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MBB.getParent()->getInfo()->setEmitNOAT(); + Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + } +} + +unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || + Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || + Opc == Mips::J) ? + Opc : 0; +} + +void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); +} + +void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1); + DebugLoc dl = I->getDebugLoc(); + + assert(N < 2 && "Invalid immediate"); + unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; + unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); + + BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); +} + +void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); + const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); + DebugLoc dl = I->getDebugLoc(); + const TargetRegisterInfo &TRI = getRegisterInfo(); + + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven)) + .addReg(LoReg); + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd)) + .addReg(HiReg); +} + +const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { + return new MipsSEInstrInfo(TM); +} diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h new file mode 100644 index 0000000..346e74d --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -0,0 +1,86 @@ +//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEINSTRUCTIONINFO_H +#define MIPSSEINSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSERegisterInfo.h" + +namespace llvm { + +class MipsSEInstrInfo : public MipsInstrInfo { + const MipsSERegisterInfo RI; + bool IsN64; + +public: + explicit MipsSEInstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + + /// Adjust SP by Amount bytes. + void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; + void ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + void ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp new file mode 100644 index 0000000..043a1ef --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -0,0 +1,138 @@ +//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSERegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MipsSERegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const MipsSEInstrInfo *II = static_cast(&TII); + unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + + II->adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} + +void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + // If MI is not a debug value, make sure Offset fits in the 16-bit immediate + // field. + if (!MI.isDebugValue() && !isInt<16>(Offset)) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + MipsAnalyzeImmediate::Inst LastInst(0, 0); + + MipsFI->setEmitNOAT(); + Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, + &LastInst); + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + + FrameReg = ATReg; + Offset = SignExtend64<16>(LastInst.ImmOpnd); + } + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); +} diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h new file mode 100644 index 0000000..4b17b33 --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -0,0 +1,39 @@ +//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEREGISTERINFO_H +#define MIPSSEREGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class MipsSERegisterInfo : public MipsRegisterInfo { +public: + MipsSERegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 00347df..11ff809 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -30,7 +30,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), - HasMinMax(false), HasSwap(false), HasBitCount(false) + HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false) { std::string CPUName = CPU; if (CPUName.empty()) @@ -58,9 +58,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, bool MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + TargetSubtargetInfo::AntiDepBreakMode &Mode, + RegClassVector &CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); CriticalPathRCs.push_back(hasMips64() ? &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass); diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 7faf77b..3215c44 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -86,6 +86,9 @@ protected: // HasBitCount - Count leading '1' and '0' bits. bool HasBitCount; + // InMips16 -- can process Mips16 instructions + bool InMips16Mode; + InstrItineraryData InstrItins; public: @@ -124,8 +127,11 @@ public: bool isSingleFloat() const { return IsSingleFloat; } bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } + bool inMips16Mode() const { return InMips16Mode; } bool isLinux() const { return IsLinux; } + bool hasStandardEncoding() const { return !inMips16Mode(); } + /// Features related to the presence of specific instructions. bool hasSEInReg() const { return HasSEInReg; } bool hasCondMov() const { return HasCondMov; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 858723b..03a024a 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -13,6 +13,8 @@ #include "MipsTargetMachine.h" #include "Mips.h" +#include "MipsFrameLowering.h" +#include "MipsInstrInfo.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" @@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine X(TheMipsTarget); RegisterTargetMachine Y(TheMipselTarget); - RegisterTargetMachine A(TheMips64Target); - RegisterTargetMachine B(TheMips64elTarget); + RegisterTargetMachine A(TheMips64Target); + RegisterTargetMachine B(TheMips64elTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -48,8 +50,8 @@ MipsTargetMachine(const Target &T, StringRef TT, (Subtarget.isABI_N64() ? "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), - InstrInfo(*this), - FrameLowering(Subtarget), + InstrInfo(MipsInstrInfo::create(*this)), + FrameLowering(MipsFrameLowering::create(*this, Subtarget)), TLInfo(*this), TSInfo(*this), JITInfo() { } @@ -71,24 +73,6 @@ MipselTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void Mips64ebTargetMachine::anchor() { } - -Mips64ebTargetMachine:: -Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void Mips64elTargetMachine::anchor() { } - -Mips64elTargetMachine:: -Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - namespace { /// Mips Code Generator Pass Configuration Options. class MipsPassConfig : public TargetPassConfig { @@ -105,8 +89,6 @@ public: } virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPreSched2(); virtual bool addPreEmitPass(); }; } // namespace @@ -118,7 +100,7 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - PM->add(createMipsISelDag(getMipsTargetMachine())); + addPass(createMipsISelDag(getMipsTargetMachine())); return false; } @@ -126,20 +108,13 @@ bool MipsPassConfig::addInstSelector() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsPassConfig::addPreEmitPass() { - PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine())); - return true; -} + MipsTargetMachine &TM = getMipsTargetMachine(); + addPass(createMipsDelaySlotFillerPass(TM)); -bool MipsPassConfig::addPreRegAlloc() { - // Do not restore $gp if target is Mips64. - // In N32/64, $gp is a callee-saved register. - if (!getMipsSubtarget().hasMips64()) - PM->add(createMipsEmitGPRestorePass(getMipsTargetMachine())); - return true; -} + // NOTE: long branch has not been implemented for mips16. + if (TM.getSubtarget().hasStandardEncoding()) + addPass(createMipsLongBranchPass(TM)); -bool MipsPassConfig::addPreSched2() { - PM->add(createMipsExpandPseudoPass(getMipsTargetMachine())); return true; } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 80c00e8..21b49e6 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -25,56 +25,56 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class formatted_raw_ostream; +class formatted_raw_ostream; +class MipsRegisterInfo; + +class MipsTargetMachine : public LLVMTargetMachine { + MipsSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + const MipsInstrInfo *InstrInfo; + const MipsFrameLowering *FrameLowering; + MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; - class MipsTargetMachine : public LLVMTargetMachine { - MipsSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - MipsInstrInfo InstrInfo; - MipsFrameLowering FrameLowering; - MipsTargetLowering TLInfo; - MipsSelectionDAGInfo TSInfo; - MipsJITInfo JITInfo; - - public: - MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); - - virtual const MipsInstrInfo *getInstrInfo() const - { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const - { return &FrameLowering; } - virtual const MipsSubtarget *getSubtargetImpl() const - { return &Subtarget; } - virtual const TargetData *getTargetData() const - { return &DataLayout;} - virtual MipsJITInfo *getJITInfo() - { return &JITInfo; } - - - virtual const MipsRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE); - - }; +public: + MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool isLittle); + + virtual ~MipsTargetMachine() { delete InstrInfo; } + + virtual const MipsInstrInfo *getInstrInfo() const + { return InstrInfo; } + virtual const TargetFrameLowering *getFrameLowering() const + { return FrameLowering; } + virtual const MipsSubtarget *getSubtargetImpl() const + { return &Subtarget; } + virtual const TargetData *getTargetData() const + { return &DataLayout;} + virtual MipsJITInfo *getJITInfo() + { return &JITInfo; } + + virtual const MipsRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + + virtual const MipsTargetLowering *getTargetLowering() const { + return &TLInfo; + } + + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); +}; -/// MipsebTargetMachine - Mips32 big endian target machine. +/// MipsebTargetMachine - Mips32/64 big endian target machine. /// class MipsebTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -85,7 +85,7 @@ public: CodeGenOpt::Level OL); }; -/// MipselTargetMachine - Mips32 little endian target machine. +/// MipselTargetMachine - Mips32/64 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -96,29 +96,6 @@ public: CodeGenOpt::Level OL); }; -/// Mips64ebTargetMachine - Mips64 big endian target machine. -/// -class Mips64ebTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Mips64elTargetMachine - Mips64 little endian target machine. -/// -class Mips64elTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; } // End llvm namespace #endif diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt new file mode 100644 index 0000000..7cb16b4 --- /dev/null +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -0,0 +1,34 @@ +set(LLVM_TARGET_DEFINITIONS NVPTX.td) + + +tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) +add_public_tablegen_target(NVPTXCommonTableGen) + +set(NVPTXCodeGen_sources + NVPTXFrameLowering.cpp + NVPTXInstrInfo.cpp + NVPTXISelDAGToDAG.cpp + NVPTXISelLowering.cpp + NVPTXRegisterInfo.cpp + NVPTXSubtarget.cpp + NVPTXTargetMachine.cpp + NVPTXSplitBBatBar.cpp + NVPTXLowerAggrCopies.cpp + NVPTXutil.cpp + NVPTXAllocaHoisting.cpp + NVPTXAsmPrinter.cpp + NVPTXUtilities.cpp + VectorElementize.cpp + ) + +add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) + +add_dependencies(LLVMNVPTXCodeGen intrinsics_gen) + +add_subdirectory(TargetInfo) +add_subdirectory(InstPrinter) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/NVPTX/InstPrinter/CMakeLists.txt b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..ae4c751 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMNVPTXAsmPrinter + NVPTXInstPrinter.cpp + ) + +add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen) diff --git a/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..032b573 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NVPTXAsmPrinter +parent = NVPTX +required_libraries = MC Support +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/InstPrinter/Makefile b/lib/Target/NVPTX/InstPrinter/Makefile new file mode 100644 index 0000000..7b78654 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMNVPTXAsmPrinter + +# Hack: we need to include 'main' ptx target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp new file mode 100644 index 0000000..10051c7 --- /dev/null +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -0,0 +1 @@ +// Placeholder diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt new file mode 100644 index 0000000..e2d6ed2 --- /dev/null +++ b/lib/Target/NVPTX/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = NVPTX +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = NVPTXCodeGen +parent = NVPTX +required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..a030d9f --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_library(LLVMNVPTXDesc + NVPTXMCAsmInfo.cpp + NVPTXMCTargetDesc.cpp + ) + +add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +#include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..01a051a --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NVPTXDesc +parent = NVPTX +required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support +add_to_library_groups = NVPTX diff --git a/lib/Target/NVPTX/MCTargetDesc/Makefile b/lib/Target/NVPTX/MCTargetDesc/Makefile new file mode 100644 index 0000000..31d06cb --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMNVPTXDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h new file mode 100644 index 0000000..4545838 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -0,0 +1,88 @@ +//===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the NVPTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXBASEINFO_H +#define NVPTXBASEINFO_H + +namespace llvm { + +enum AddressSpace { + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, + ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space + ADDRESS_SPACE_SHARED = 3, + ADDRESS_SPACE_CONST = 4, + ADDRESS_SPACE_LOCAL = 5, + + // NVVM Internal + ADDRESS_SPACE_PARAM = 101 +}; + +enum PropertyAnnotation { + PROPERTY_MAXNTID_X = 0, + PROPERTY_MAXNTID_Y, + PROPERTY_MAXNTID_Z, + PROPERTY_REQNTID_X, + PROPERTY_REQNTID_Y, + PROPERTY_REQNTID_Z, + PROPERTY_MINNCTAPERSM, + PROPERTY_ISTEXTURE, + PROPERTY_ISSURFACE, + PROPERTY_ISSAMPLER, + PROPERTY_ISREADONLY_IMAGE_PARAM, + PROPERTY_ISWRITEONLY_IMAGE_PARAM, + PROPERTY_ISKERNEL_FUNCTION, + PROPERTY_ALIGN, + + // last property + PROPERTY_LAST +}; + +const unsigned AnnotationNameLen = 8; // length of each annotation name +const char +PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN + + // last property + "proplast", // PROPERTY_LAST +}; + +// name of named metadata used for global annotations +#if defined(__GNUC__) +// As this is declared to be static but some of the .cpp files that +// include NVVM.h do not use this array, gcc gives a warning when +// compiling those .cpp files, hence __attribute__((unused)). +__attribute__((unused)) +#endif +static const char* NamedMDForAnnotations = "nvvm.annotations"; + +} + + +#endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp new file mode 100644 index 0000000..1d41665 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -0,0 +1,63 @@ +//===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the NVPTXMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +bool CompileForDebugging; + +// -debug-compile - Command line option to inform opt and llc passes to +// compile for debugging +static cl::opt +Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, + cl::location(CompileForDebugging), + cl::init(false)); + +void NVPTXMCAsmInfo::anchor() { } + +NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::nvptx64) + PointerSize = 8; + + CommentString = "//"; + + PrivateGlobalPrefix = "$L__"; + + AllowPeriodsInName = false; + + HasSetDirective = false; + + HasSingleParameterDotFile = false; + + InlineAsmStart = " inline asm"; + InlineAsmEnd = " inline asm"; + + SupportsDebugInformation = CompileForDebugging; + HasDotTypeDotSizeDirective = false; + + Data8bitsDirective = " .b8 "; + Data16bitsDirective = " .b16 "; + Data32bitsDirective = " .b32 "; + Data64bitsDirective = " .b64 "; + PrivateGlobalPrefix = ""; + ZeroDirective = " .b8"; + AsciiDirective = " .b8"; + AscizDirective = " .b8"; + + // @TODO: Can we just disable this? + GlobalDirective = "\t// .globl\t"; +} diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h new file mode 100644 index 0000000..82097da --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVPTXMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_MCASM_INFO_H +#define NVPTX_MCASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { +class Target; +class StringRef; + +class NVPTXMCAsmInfo : public MCAsmInfo { + virtual void anchor(); +public: + explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT); +}; +} // namespace llvm + +#endif // NVPTX_MCASM_INFO_H diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp new file mode 100644 index 0000000..44aa01c --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -0,0 +1,91 @@ +//===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides NVPTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXMCTargetDesc.h" +#include "NVPTXMCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "NVPTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "NVPTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "NVPTXGenRegisterInfo.inc" + + +using namespace llvm; + +static MCInstrInfo *createNVPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitNVPTXMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitNVPTXMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo X(TheNVPTXTarget32); + RegisterMCAsmInfo Y(TheNVPTXTarget64); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32, + createNVPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64, + createNVPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32, + createNVPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64, + createNVPTXMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32, + createNVPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64, + createNVPTXMCSubtargetInfo); + +} diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h new file mode 100644 index 0000000..af95c76 --- /dev/null +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h @@ -0,0 +1,36 @@ +//===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides NVPTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXMCTARGETDESC_H +#define NVPTXMCTARGETDESC_H + +namespace llvm { +class Target; + +extern Target TheNVPTXTarget32; +extern Target TheNVPTXTarget64; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "NVPTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "NVPTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "NVPTXGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/NVPTX/Makefile b/lib/Target/NVPTX/Makefile new file mode 100644 index 0000000..8db20eb --- /dev/null +++ b/lib/Target/NVPTX/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMNVPTXCodeGen +TARGET = NVPTX + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = NVPTXGenAsmWriter.inc \ + NVPTXGenDAGISel.inc \ + NVPTXGenInstrInfo.inc \ + NVPTXGenRegisterInfo.inc \ + NVPTXGenSubtargetInfo.inc + +DIRS = InstPrinter TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h new file mode 100644 index 0000000..b568488 --- /dev/null +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -0,0 +1,49 @@ +//===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The strings allocated from a managed string pool are owned by the string +// pool and will be deleted together with the managed string pool. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_SUPPORT_MANAGED_STRING_H +#define LLVM_SUPPORT_MANAGED_STRING_H + +#include "llvm/ADT/SmallVector.h" +#include + +namespace llvm { + +/// ManagedStringPool - The strings allocated from a managed string pool are +/// owned by the string pool and will be deleted together with the managed +/// string pool. +class ManagedStringPool { + SmallVector Pool; + +public: + ManagedStringPool() {} + ~ManagedStringPool() { + SmallVector::iterator Current = Pool.begin(); + while (Current != Pool.end()) { + delete *Current; + Current++; + } + } + + std::string *getManagedString(const char *S) { + std::string *Str = new std::string(S); + Pool.push_back(Str); + return Str; + } +}; + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h new file mode 100644 index 0000000..a8d082a --- /dev/null +++ b/lib/Target/NVPTX/NVPTX.h @@ -0,0 +1,137 @@ +//===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM NVPTX back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_NVPTX_H +#define LLVM_TARGET_NVPTX_H + +#include "llvm/Value.h" +#include "llvm/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" +#include +#include + +namespace llvm { +class NVPTXTargetMachine; +class FunctionPass; +class formatted_raw_ostream; + +namespace NVPTXCC { +enum CondCodes { + EQ, + NE, + LT, + LE, + GT, + GE +}; +} + +inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { + switch (CC) { + case NVPTXCC::NE: return "ne"; + case NVPTXCC::EQ: return "eq"; + case NVPTXCC::LT: return "lt"; + case NVPTXCC::LE: return "le"; + case NVPTXCC::GT: return "gt"; + case NVPTXCC::GE: return "ge"; + } + llvm_unreachable("Unknown condition code"); +} + +FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel); +FunctionPass *createVectorElementizePass(NVPTXTargetMachine &); +FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); +FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); +FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); + +bool isImageOrSamplerVal(const Value *, const Module *); + +extern Target TheNVPTXTarget32; +extern Target TheNVPTXTarget64; + +namespace NVPTX +{ +enum DrvInterface { + NVCL, + CUDA, + TEST +}; + +// A field inside TSFlags needs a shift and a mask. The usage is +// always as follows : +// ((TSFlags & fieldMask) >> fieldShift) +// The enum keeps the mask, the shift, and all valid values of the +// field in one place. +enum VecInstType { + VecInstTypeShift = 0, + VecInstTypeMask = 0xF, + + VecNOP = 0, + VecLoad = 1, + VecStore = 2, + VecBuild = 3, + VecShuffle = 4, + VecExtract = 5, + VecInsert = 6, + VecDest = 7, + VecOther = 15 +}; + +enum SimpleMove { + SimpleMoveMask = 0x10, + SimpleMoveShift = 4 +}; +enum LoadStore { + isLoadMask = 0x20, + isLoadShift = 5, + isStoreMask = 0x40, + isStoreShift = 6 +}; + +namespace PTXLdStInstCode { +enum AddressSpace{ + GENERIC = 0, + GLOBAL = 1, + CONSTANT = 2, + SHARED = 3, + PARAM = 4, + LOCAL = 5 +}; +enum FromType { + Unsigned = 0, + Signed, + Float +}; +enum VecType { + Scalar = 1, + V2 = 2, + V4 = 4 +}; +} +} +} // end namespace llvm; + +// Defines symbolic names for NVPTX registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "NVPTXGenRegisterInfo.inc" + +// Defines symbolic names for the NVPTX instructions. +#define GET_INSTRINFO_ENUM +#include "NVPTXGenInstrInfo.inc" + +#endif diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td new file mode 100644 index 0000000..ae7710e --- /dev/null +++ b/lib/Target/NVPTX/NVPTX.td @@ -0,0 +1,44 @@ +//===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the NVPTX target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +include "NVPTXRegisterInfo.td" +include "NVPTXInstrInfo.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +// - We use the SM version number instead of explicit feature table. +// - Need at least one feature to avoid generating zero sized array by +// TableGen in NVPTXGenSubtarget.inc. +//===----------------------------------------------------------------------===// +def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +//===----------------------------------------------------------------------===// +// NVPTX supported processors. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"sm_10", [FeatureDummy]>; + + +def NVPTXInstrInfo : InstrInfo { +} + +def NVPTX : Target { + let InstructionSet = NVPTXInstrInfo; +} diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp new file mode 100644 index 0000000..668c393 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -0,0 +1,48 @@ +//===-- AllocaHoisting.cpp - Hoist allocas to the entry block --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hoist the alloca instructions in the non-entry blocks to the entry blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Constants.h" +#include "NVPTXAllocaHoisting.h" + +namespace llvm { + +bool NVPTXAllocaHoisting::runOnFunction(Function &function) { + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + + for (Function::iterator E = function.end(); I != E; ++I) { + for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { + AllocaInst *allocaInst = dyn_cast(BI++); + if (allocaInst && isa(allocaInst->getArraySize())) { + allocaInst->moveBefore(firstTerminatorInst); + functionModified = true; + } + } + } + + return functionModified; +} + +char NVPTXAllocaHoisting::ID = 1; +RegisterPass X("alloca-hoisting", + "Hoisting alloca instructions in non-entry " + "blocks to the entry block"); + +FunctionPass *createAllocaHoisting() { + return new NVPTXAllocaHoisting(); +} + +} // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h new file mode 100644 index 0000000..24b3bd5 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -0,0 +1,49 @@ +//===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hoist the alloca instructions in the non-entry blocks to the entry blocks. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_ALLOCA_HOISTING_H_ +#define NVPTX_ALLOCA_HOISTING_H_ + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +class FunctionPass; +class Function; + +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAllocaHoisting : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAllocaHoisting() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + } + + virtual const char *getPassName() const { + return "NVPTX specific alloca hoisting"; + } + + virtual bool runOnFunction(Function &function); +}; + +extern FunctionPass *createAllocaHoisting(); + +} // end namespace llvm + +#endif // NVPTX_ALLOCA_HOISTING_H_ diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp new file mode 100644 index 0000000..f2b9616 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -0,0 +1,2064 @@ +//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to NVPTX assembly language. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXAsmPrinter.h" +#include "NVPTX.h" +#include "NVPTXInstrInfo.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXUtilities.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTXNumRegisters.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/DebugInfo.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/TimeValue.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Support/Path.h" +#include "llvm/Assembly/Writer.h" +#include "cl_common_defines.h" +#include +using namespace llvm; + + +#include "NVPTXGenAsmWriter.inc" + +bool RegAllocNilUsed = true; + +#define DEPOTNAME "__local_depot" + +static cl::opt +EmitLineNumbers("nvptx-emit-line-numbers", + cl::desc("NVPTX Specific: Emit Line numbers even without -G"), + cl::init(true)); + +namespace llvm { +bool InterleaveSrcInPtx = false; +} + +static cl::optInterleaveSrc("nvptx-emit-src", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); + + + + +// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we +// cannot just link to the existing version. +/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. +/// +using namespace nvptx; +const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + if (CV->isNullValue() || isa(CV)) + return MCConstantExpr::Create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast(CV)) + return MCConstantExpr::Create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast(CV)) + return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast(CV)) + return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast(CV); + if (CE == 0) + llvm_unreachable("Unknown constant value to lower!"); + + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using TargetData as a + // last resort before giving up. + if (Constant *C = + ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + if (C != CE) + return LowerConstant(C, AP); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + case Instruction::GetElementPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Generate a symbolic expression for the byte address + const Constant *PtrVal = CE->getOperand(0); + SmallVector IdxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); + + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); + if (Offset == 0) + return Base; + + // Truncate/sext the offset to the pointer size. + if (TD.getPointerSizeInBits() != 64) { + int SExtAmount = 64-TD.getPointerSizeInBits(); + Offset = (Offset << SExtAmount) >> SExtAmount; + } + + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return LowerConstant(CE->getOperand(0), AP); + + case Instruction::IntToPtr: { + const TargetData &TD = *AP.TM.getTargetData(); + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + false/*ZExt*/); + return LowerConstant(Op, AP); + } + + case Instruction::PtrToInt: { + const TargetData &TD = *AP.TM.getTargetData(); + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = LowerConstant(Op, AP); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); + const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + } + } + } +} + + +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) +{ + if (!EmitLineNumbers) + return; + if (ignoreLoc(MI)) + return; + + DebugLoc curLoc = MI.getDebugLoc(); + + if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) + return; + + if (prevDebugLoc == curLoc) + return; + + prevDebugLoc = curLoc; + + if (curLoc.isUnknown()) + return; + + + const MachineFunction *MF = MI.getParent()->getParent(); + //const TargetMachine &TM = MF->getTarget(); + + const LLVMContext &ctx = MF->getFunction()->getContext(); + DIScope Scope(curLoc.getScope(ctx)); + + if (!Scope.Verify()) + return; + + StringRef fileName(Scope.getFilename()); + StringRef dirName(Scope.getDirectory()); + SmallString<128> FullPathName = dirName; + if (!dirName.empty() && !sys::path::is_absolute(fileName)) { + sys::path::append(FullPathName, fileName); + fileName = FullPathName.str(); + } + + if (filenameMap.find(fileName.str()) == filenameMap.end()) + return; + + + // Emit the line from the source file. + if (llvm::InterleaveSrcInPtx) + this->emitSrcInText(fileName.str(), curLoc.getLine()); + + std::stringstream temp; + temp << "\t.loc " << filenameMap[fileName.str()] + << " " << curLoc.getLine() << " " << curLoc.getCol(); + OutStreamer.EmitRawText(Twine(temp.str().c_str())); +} + +void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + emitLineNumberAsDotLoc(*MI); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); +} + +void NVPTXAsmPrinter::printReturnValStr(const Function *F, + raw_ostream &O) +{ + const TargetData *TD = TM.getTargetData(); + const TargetLowering *TLI = TM.getTargetLowering(); + + Type *Ty = F->getReturnType(); + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + if (Ty->getTypeID() == Type::VoidTyID) + return; + + O << " ("; + + if (isABI) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + unsigned size = 0; + if (const IntegerType *ITy = dyn_cast(Ty)) { + size = ITy->getBitWidth(); + if (size < 32) size = 32; + } else { + assert(Ty->isFloatingPointTy() && + "Floating point type expected here"); + size = Ty->getPrimitiveSizeInBits(); + } + + O << ".param .b" << size << " func_retval0"; + } + else if (isa(Ty)) { + O << ".param .b" << TLI->getPointerTy().getSizeInBits() + << " func_retval0"; + } else { + if ((Ty->getTypeID() == Type::StructTyID) || + isa(Ty)) { + SmallVector vtparts; + ComputeValueVTs(*TLI, Ty, vtparts); + unsigned totalsz = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + totalsz += sz/8; + } + } + unsigned retAlignment = 0; + if (!llvm::getAlign(*F, 0, retAlignment)) + retAlignment = TD->getABITypeAlignment(Ty); + O << ".param .align " + << retAlignment + << " .b8 func_retval0[" + << totalsz << "]"; + } else + assert(false && + "Unknown return type"); + } + } else { + SmallVector vtparts; + ComputeValueVTs(*TLI, Ty, vtparts); + unsigned idx = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " func_retval" << idx; + if (j Str; + raw_svector_ostream O(Str); + + // Set up + MRI = &MF->getRegInfo(); + F = MF->getFunction(); + emitLinkageDirective(F,O); + if (llvm::isKernelFunction(*F)) + O << ".entry "; + else { + O << ".func "; + printReturnValStr(*MF, O); + } + + O << *CurrentFnSym; + + emitFunctionParamList(*MF, O); + + if (llvm::isKernelFunction(*F)) + emitKernelFunctionDirectives(*F, O); + + OutStreamer.EmitRawText(O.str()); + + prevDebugLoc = DebugLoc(); +} + +void NVPTXAsmPrinter::EmitFunctionBodyStart() { + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + unsigned numRegClasses = TRI.getNumRegClasses(); + VRidGlobal2LocalMap = new std::map[numRegClasses+1]; + OutStreamer.EmitRawText(StringRef("{\n")); + setAndEmitFunctionVirtualRegisters(*MF); + + SmallString<128> Str; + raw_svector_ostream O(Str); + emitDemotedVars(MF->getFunction(), O); + OutStreamer.EmitRawText(O.str()); +} + +void NVPTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(StringRef("}\n")); + delete []VRidGlobal2LocalMap; +} + + +void +NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, + raw_ostream &O) const { + // If the NVVM IR has some of reqntid* specified, then output + // the reqntid directive, and set the unspecified ones to 1. + // If none of reqntid* is specified, don't output reqntid directive. + unsigned reqntidx, reqntidy, reqntidz; + bool specified = false; + if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; + else specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; + else specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; + else specified = true; + + if (specified) + O << ".reqntid " << reqntidx << ", " + << reqntidy << ", " << reqntidz << "\n"; + + // If the NVVM IR has some of maxntid* specified, then output + // the maxntid directive, and set the unspecified ones to 1. + // If none of maxntid* is specified, don't output maxntid directive. + unsigned maxntidx, maxntidy, maxntidz; + specified = false; + if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; + else specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; + else specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; + else specified = true; + + if (specified) + O << ".maxntid " << maxntidx << ", " + << maxntidy << ", " << maxntidz << "\n"; + + unsigned mincta; + if (llvm::getMinCTASm(F, mincta)) + O << ".minnctapersm " << mincta << "\n"; +} + +void +NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass * RC = MRI->getRegClass(vr); + unsigned id = RC->getID(); + + std::map ®map = VRidGlobal2LocalMap[id]; + unsigned mapped_vr = regmap[vr]; + + if (!isVec) { + O << getNVPTXRegClassStr(RC) << mapped_vr; + return; + } + // Vector virtual register + if (getNVPTXVectorSize(RC) == 4) + O << "{" + << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_3" + << "}"; + else if (getNVPTXVectorSize(RC) == 2) + O << "{" + << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " + << getNVPTXRegClassStr(RC) << mapped_vr << "_1" + << "}"; + else + llvm_unreachable("Unsupported vector size"); +} + +void +NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { + getVirtualRegisterName(vr, isVec, O); +} + +void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, + const char *Modifier, + raw_ostream &O) { + static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; + int Imm = (int)MO.getImm(); + if(0 == strcmp(Modifier, "vecelem")) + O << "_" << vecelem[Imm]; + else if(0 == strcmp(Modifier, "vecv4comm1")) { + if((Imm < 0) || (Imm > 3)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv4comm2")) { + if((Imm < 4) || (Imm > 7)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv4pos")) { + if(Imm < 0) Imm = 0; + O << "_" << vecelem[Imm%4]; + } + else if(0 == strcmp(Modifier, "vecv2comm1")) { + if((Imm < 0) || (Imm > 1)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv2comm2")) { + if((Imm < 2) || (Imm > 3)) + O << "//"; + } + else if(0 == strcmp(Modifier, "vecv2pos")) { + if(Imm < 0) Imm = 0; + O << "_" << vecelem[Imm%2]; + } + else + llvm_unreachable("Unknown Modifier on immediate operand"); +} + +void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MO.getReg() == NVPTX::VRDepot) + O << DEPOTNAME << getFunctionNumber(); + else + O << getRegisterName(MO.getReg()); + } else { + if (!Modifier) + emitVirtualRegister(MO.getReg(), false, O); + else { + if (strcmp(Modifier, "vecfull") == 0) + emitVirtualRegister(MO.getReg(), true, O); + else + llvm_unreachable( + "Don't know how to handle the modifier on virtual register."); + } + } + return; + + case MachineOperand::MO_Immediate: + if (!Modifier) + O << MO.getImm(); + else if (strstr(Modifier, "vec") == Modifier) + printVecModifiedImmediate(MO, Modifier, O); + else + llvm_unreachable("Don't know how to handle modifier on immediate operand"); + return; + + case MachineOperand::MO_FPImmediate: + printFPConstant(MO.getFPImm(), O); + break; + + case MachineOperand::MO_GlobalAddress: + O << *Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_ExternalSymbol: { + const char * symbname = MO.getSymbolName(); + if (strstr(symbname, ".PARAM") == symbname) { + unsigned index; + sscanf(symbname+6, "%u[];", &index); + printParamName(index, O); + } + else if (strstr(symbname, ".HLPPARAM") == symbname) { + unsigned index; + sscanf(symbname+9, "%u[];", &index); + O << *CurrentFnSym << "_param_" << index << "_offset"; + } + else + O << symbname; + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + + default: + llvm_unreachable("Operand type not supported."); + } +} + +void NVPTXAsmPrinter:: +printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +#ifndef __OPTIMIZE__ + O << "\t// Implicit def :"; + //printOperand(MI, 0); + O << "\n"; +#endif +} + +void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + printOperand(MI, opNum, O); + + if (Modifier && !strcmp(Modifier, "add")) { + O << ", "; + printOperand(MI, opNum+1, O); + } else { + if (MI->getOperand(opNum+1).isImm() && + MI->getOperand(opNum+1).getImm() == 0) + return; // don't print ',0' or '+0' + O << "+"; + printOperand(MI, opNum+1, O); + } +} + +void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) +{ + if (Modifier) { + const MachineOperand &MO = MI->getOperand(opNum); + int Imm = (int)MO.getImm(); + if (!strcmp(Modifier, "volatile")) { + if (Imm) + O << ".volatile"; + } else if (!strcmp(Modifier, "addsp")) { + switch (Imm) { + case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; + case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; + case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; + case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; + case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GENERIC: + if (!nvptxSubtarget.hasGenericLdSt()) + O << ".global"; + break; + default: + assert("wrong value"); + } + } + else if (!strcmp(Modifier, "sign")) { + if (Imm==NVPTX::PTXLdStInstCode::Signed) + O << "s"; + else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + O << "u"; + else + O << "f"; + } + else if (!strcmp(Modifier, "vec")) { + if (Imm==NVPTX::PTXLdStInstCode::V2) + O << ".v2"; + else if (Imm==NVPTX::PTXLdStInstCode::V4) + O << ".v4"; + } + else + assert("unknown modifier"); + } + else + assert("unknown modifier"); +} + +void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { + + emitLinkageDirective(F,O); + if (llvm::isKernelFunction(*F)) + O << ".entry "; + else + O << ".func "; + printReturnValStr(F, O); + O << *CurrentFnSym << "\n"; + emitFunctionParamList(F, O); + O << ";\n"; +} + +static bool usedInGlobalVarDef(const Constant *C) +{ + if (!C) + return false; + + if (const GlobalVariable *GV = dyn_cast(C)) { + if (GV->getName().str() == "llvm.used") + return false; + return true; + } + + for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); + ui!=ue; ++ui) { + const Constant *C = dyn_cast(*ui); + if (usedInGlobalVarDef(C)) + return true; + } + return false; +} + +static bool usedInOneFunc(const User *U, Function const *&oneFunc) +{ + if (const GlobalVariable *othergv = dyn_cast(U)) { + if (othergv->getName().str() == "llvm.used") + return true; + } + + if (const Instruction *instr = dyn_cast(U)) { + if (instr->getParent() && instr->getParent()->getParent()) { + const Function *curFunc = instr->getParent()->getParent(); + if (oneFunc && (curFunc != oneFunc)) + return false; + oneFunc = curFunc; + return true; + } + else + return false; + } + + if (const MDNode *md = dyn_cast(U)) + if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || + (md->getName().str() == "llvm.dbg.sp"))) + return true; + + + for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); + ui!=ue; ++ui) { + if (usedInOneFunc(*ui, oneFunc) == false) + return false; + } + return true; +} + +/* Find out if a global variable can be demoted to local scope. + * Currently, this is valid for CUDA shared variables, which have local + * scope and global lifetime. So the conditions to check are : + * 1. Is the global variable in shared address space? + * 2. Does it have internal linkage? + * 3. Is the global variable referenced only in one function? + */ +static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { + if (gv->hasInternalLinkage() == false) + return false; + const PointerType *Pty = gv->getType(); + if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) + return false; + + const Function *oneFunc = 0; + + bool flag = usedInOneFunc(gv, oneFunc); + if (flag == false) + return false; + if (!oneFunc) + return false; + f = oneFunc; + return true; +} + +static bool useFuncSeen(const Constant *C, + llvm::DenseMap &seenMap) { + for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); + ui!=ue; ++ui) { + if (const Constant *cu = dyn_cast(*ui)) { + if (useFuncSeen(cu, seenMap)) + return true; + } else if (const Instruction *I = dyn_cast(*ui)) { + const BasicBlock *bb = I->getParent(); + if (!bb) continue; + const Function *caller = bb->getParent(); + if (!caller) continue; + if (seenMap.find(caller) != seenMap.end()) + return true; + } + } + return false; +} + +void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { + llvm::DenseMap seenMap; + for (Module::const_iterator FI=M.begin(), FE=M.end(); + FI!=FE; ++FI) { + const Function *F = FI; + + if (F->isDeclaration()) { + if (F->use_empty()) + continue; + if (F->getIntrinsicID()) + continue; + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + continue; + } + for (Value::const_use_iterator iter=F->use_begin(), + iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + if (const Constant *C = dyn_cast(*iter)) { + if (usedInGlobalVarDef(C)) { + // The use is in the initialization of a global variable + // that is a function pointer, so print a declaration + // for the original function + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + // Emit a declaration of this function if the function that + // uses this constant expr has already been seen. + if (useFuncSeen(C, seenMap)) { + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + } + + if (!isa(*iter)) continue; + const Instruction *instr = cast(*iter); + const BasicBlock *bb = instr->getParent(); + if (!bb) continue; + const Function *caller = bb->getParent(); + if (!caller) continue; + + // If a caller has already been seen, then the caller is + // appearing in the module before the callee. so print out + // a declaration for the callee. + if (seenMap.find(caller) != seenMap.end()) { + CurrentFnSym = Mang->getSymbol(F); + emitDeclaration(F, O); + break; + } + } + seenMap[F] = true; + } +} + +void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + unsigned i=1; + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef Filename(DIUnit.getFilename()); + StringRef Dirname(DIUnit.getDirectory()); + SmallString<128> FullPathName = Dirname; + if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { + sys::path::append(FullPathName, Filename); + Filename = FullPathName.str(); + } + if (filenameMap.find(Filename.str()) != filenameMap.end()) + continue; + filenameMap[Filename.str()] = i; + OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); + ++i; + } + + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) { + DISubprogram SP(*I); + StringRef Filename(SP.getFilename()); + StringRef Dirname(SP.getDirectory()); + SmallString<128> FullPathName = Dirname; + if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { + sys::path::append(FullPathName, Filename); + Filename = FullPathName.str(); + } + if (filenameMap.find(Filename.str()) != filenameMap.end()) + continue; + filenameMap[Filename.str()] = i; + ++i; + } +} + +bool NVPTXAsmPrinter::doInitialization (Module &M) { + + SmallString<128> Str1; + raw_svector_ostream OS1(Str1); + + MMI = getAnalysisIfAvailable(); + MMI->AnalyzeModule(M); + + // We need to call the parent's one explicitly. + //bool Result = AsmPrinter::doInitialization(M); + + // Initialize TargetLoweringObjectFile. + const_cast(getObjFileLowering()) + .Initialize(OutContext, TM); + + Mang = new Mangler(OutContext, *TM.getTargetData()); + + // Emit header before any dwarf directives are emitted below. + emitHeader(M, OS1); + OutStreamer.EmitRawText(OS1.str()); + + + // Already commented out + //bool Result = AsmPrinter::doInitialization(M); + + + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + recordAndEmitFilenames(M); + + SmallString<128> Str2; + raw_svector_ostream OS2(Str2); + + emitDeclarations(M, OS2); + + // Print out module-level global variables here. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + printModuleLevelGV(I, OS2); + + OS2 << '\n'; + + OutStreamer.EmitRawText(OS2.str()); + return false; // success +} + +void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { + O << "//\n"; + O << "// Generated by LLVM NVPTX Back-End\n"; + O << "//\n"; + O << "\n"; + + O << ".version 3.0\n"; + + O << ".target "; + O << nvptxSubtarget.getTargetName(); + + if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) + O << ", texmode_independent"; + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (!nvptxSubtarget.hasDouble()) + O << ", map_f64_to_f32"; + } + + if (MAI->doesSupportDebugInformation()) + O << ", debug"; + + O << "\n"; + + O << ".address_size "; + if (nvptxSubtarget.is64Bit()) + O << "64"; + else + O << "32"; + O << "\n"; + + O << "\n"; +} + +bool NVPTXAsmPrinter::doFinalization(Module &M) { + // XXX Temproarily remove global variables so that doFinalization() will not + // emit them again (global variables are emitted at beginning). + + Module::GlobalListType &global_list = M.getGlobalList(); + int i, n = global_list.size(); + GlobalVariable **gv_array = new GlobalVariable* [n]; + + // first, back-up GlobalVariable in gv_array + i = 0; + for (Module::global_iterator I = global_list.begin(), E = global_list.end(); + I != E; ++I) + gv_array[i++] = &*I; + + // second, empty global_list + while (!global_list.empty()) + global_list.remove(global_list.begin()); + + // call doFinalization + bool ret = AsmPrinter::doFinalization(M); + + // now we restore global variables + for (i = 0; i < n; i ++) + global_list.insert(global_list.end(), gv_array[i]); + + delete[] gv_array; + return ret; + + + //bool Result = AsmPrinter::doFinalization(M); + // Instead of calling the parents doFinalization, we may + // clone parents doFinalization and customize here. + // Currently, we if NVISA out the EmitGlobals() in + // parent's doFinalization, which is too intrusive. + // + // Same for the doInitialization. + //return Result; +} + +// This function emits appropriate linkage directives for +// functions and global variables. +// +// extern function declaration -> .extern +// extern function definition -> .visible +// external global variable with init -> .visible +// external without init -> .extern +// appending -> not allowed, assert. + +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) +{ + if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (V->hasExternalLinkage()) { + if (isa(V)) { + const GlobalVariable *GVar = cast(V); + if (GVar) { + if (GVar->hasInitializer()) + O << ".visible "; + else + O << ".extern "; + } + } else if (V->isDeclaration()) + O << ".extern "; + else + O << ".visible "; + } else if (V->hasAppendingLinkage()) { + std::string msg; + msg.append("Error: "); + msg.append("Symbol "); + if (V->hasName()) + msg.append(V->getName().str()); + msg.append("has unsupported appending linkage type"); + llvm_unreachable(msg.c_str()); + } + } +} + + +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, + bool processDemoted) { + + // Skip meta data + if (GVar->hasSection()) { + if (GVar->getSection() == "llvm.metadata") + return; + } + + const TargetData *TD = TM.getTargetData(); + + // GlobalVariables are always constant pointers themselves. + const PointerType *PTy = GVar->getType(); + Type *ETy = PTy->getElementType(); + + if (GVar->hasExternalLinkage()) { + if (GVar->hasInitializer()) + O << ".visible "; + else + O << ".extern "; + } + + if (llvm::isTexture(*GVar)) { + O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; + return; + } + + if (llvm::isSurface(*GVar)) { + O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; + return; + } + + if (GVar->isDeclaration()) { + // (extern) declarations, no definition or initializer + // Currently the only known declaration is for an automatic __local + // (.shared) promoted to global. + emitPTXGlobalVariable(GVar, O); + O << ";\n"; + return; + } + + if (llvm::isSampler(*GVar)) { + O << ".global .samplerref " << llvm::getSamplerName(*GVar); + + Constant *Initializer = NULL; + if (GVar->hasInitializer()) + Initializer = GVar->getInitializer(); + ConstantInt *CI = NULL; + if (Initializer) + CI = dyn_cast(Initializer); + if (CI) { + unsigned sample=CI->getZExtValue(); + + O << " = { "; + + for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> + __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + O << "addr_mode_" << i << " = "; + switch (addr) { + case 0: O << "wrap"; break; + case 1: O << "clamp_to_border"; break; + case 2: O << "clamp_to_edge"; break; + case 3: O << "wrap"; break; + case 4: O << "mirror"; break; + } + O <<", "; + } + O << "filter_mode = "; + switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { + case 0: O << "nearest"; break; + case 1: O << "linear"; break; + case 2: assert ( 0 && "Anisotropic filtering is not supported"); + default: O << "nearest"; break; + } + if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + O << ", force_unnormalized_coords = 1"; + } + O << " }"; + } + + O << ";\n"; + return; + } + + if (GVar->hasPrivateLinkage()) { + + if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) + return; + + // FIXME - need better way (e.g. Metadata) to avoid generating this global + if (!strncmp(GVar->getName().data(), "filename", 8)) + return; + if (GVar->use_empty()) + return; + } + + const Function *demotedFunc = 0; + if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { + O << "// " << GVar->getName().str() << " has been demoted\n"; + if (localDecls.find(demotedFunc) != localDecls.end()) + localDecls[demotedFunc].push_back(GVar); + else { + std::vector temp; + temp.push_back(GVar); + localDecls[demotedFunc] = temp; + } + return; + } + + O << "."; + emitPTXAddressSpace(PTy->getAddressSpace(), O); + if (GVar->getAlignment() == 0) + O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + else + O << " .align " << GVar->getAlignment(); + + + if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) { + O << " ."; + O << getPTXFundamentalTypeStr(ETy, false); + O << " "; + O << *Mang->getSymbol(GVar); + + // Ptx allows variable initilization only for constant and global state + // spaces. + if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) + && GVar->hasInitializer()) { + Constant *Initializer = GVar->getInitializer(); + if (!Initializer->isNullValue()) { + O << " = " ; + printScalarConstant(Initializer, O); + } + } + } else { + unsigned int ElementSize =0; + + // Although PTX has direct support for struct type and array type and + // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for + // targets that support these high level field accesses. Structs, arrays + // and vectors are lowered into arrays of bytes. + switch (ETy->getTypeID()) { + case Type::StructTyID: + case Type::ArrayTyID: + case Type::VectorTyID: + ElementSize = TD->getTypeStoreSize(ETy); + // Ptx allows variable initilization only for constant and + // global state spaces. + if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) + && GVar->hasInitializer()) { + Constant *Initializer = GVar->getInitializer(); + if (!isa(Initializer) && + !Initializer->isNullValue()) { + AggBuffer aggBuffer(ElementSize, O, *this); + bufferAggregateConstant(Initializer, &aggBuffer); + if (aggBuffer.numSymbols) { + if (nvptxSubtarget.is64Bit()) { + O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize/8; + } + else { + O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize/4; + } + O << "]"; + } + else { + O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << ElementSize; + O << "]"; + } + O << " = {" ; + aggBuffer.print(); + O << "}"; + } + else { + O << " .b8 " << *Mang->getSymbol(GVar) ; + if (ElementSize) { + O <<"[" ; + O << ElementSize; + O << "]"; + } + } + } + else { + O << " .b8 " << *Mang->getSymbol(GVar); + if (ElementSize) { + O <<"[" ; + O << ElementSize; + O << "]"; + } + } + break; + default: + assert( 0 && "type not supported yet"); + } + + } + O << ";\n"; +} + +void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { + if (localDecls.find(f) == localDecls.end()) + return; + + std::vector &gvars = localDecls[f]; + + for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + O << "\t// demoted variable\n\t"; + printModuleLevelGV(gvars[i], O, true); + } +} + +void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, + raw_ostream &O) const { + switch (AddressSpace) { + case llvm::ADDRESS_SPACE_LOCAL: + O << "local" ; + break; + case llvm::ADDRESS_SPACE_GLOBAL: + O << "global" ; + break; + case llvm::ADDRESS_SPACE_CONST: + // This logic should be consistent with that in + // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) + if (nvptxSubtarget.hasGenericLdSt()) + O << "global" ; + else + O << "const" ; + break; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + O << "const" ; + break; + case llvm::ADDRESS_SPACE_SHARED: + O << "shared" ; + break; + default: + llvm_unreachable("unexpected address space"); + } +} + +std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, + bool useB4PTR) const { + switch (Ty->getTypeID()) { + default: + llvm_unreachable("unexpected type"); + break; + case Type::IntegerTyID: { + unsigned NumBits = cast(Ty)->getBitWidth(); + if (NumBits == 1) + return "pred"; + else if (NumBits <= 64) { + std::string name = "u"; + return name + utostr(NumBits); + } else { + llvm_unreachable("Integer too large"); + break; + } + break; + } + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::PointerTyID: + if (nvptxSubtarget.is64Bit()) + if (useB4PTR) return "b64"; + else return "u64"; + else + if (useB4PTR) return "b32"; + else return "u32"; + } + llvm_unreachable("unexpected type"); + return NULL; +} + +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, + raw_ostream &O) { + + const TargetData *TD = TM.getTargetData(); + + // GlobalVariables are always constant pointers themselves. + const PointerType *PTy = GVar->getType(); + Type *ETy = PTy->getElementType(); + + O << "."; + emitPTXAddressSpace(PTy->getAddressSpace(), O); + if (GVar->getAlignment() == 0) + O << " .align " << (int) TD->getPrefTypeAlignment(ETy); + else + O << " .align " << GVar->getAlignment(); + + if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) { + O << " ."; + O << getPTXFundamentalTypeStr(ETy); + O << " "; + O << *Mang->getSymbol(GVar); + return; + } + + int64_t ElementSize =0; + + // Although PTX has direct support for struct type and array type and LLVM IR + // is very similar to PTX, the LLVM CodeGen does not support for targets that + // support these high level field accesses. Structs and arrays are lowered + // into arrays of bytes. + switch (ETy->getTypeID()) { + case Type::StructTyID: + case Type::ArrayTyID: + case Type::VectorTyID: + ElementSize = TD->getTypeStoreSize(ETy); + O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + if (ElementSize) { + O << itostr(ElementSize) ; + } + O << "]"; + break; + default: + assert( 0 && "type not supported yet"); + } + return ; +} + + +static unsigned int +getOpenCLAlignment(const TargetData *TD, + Type *Ty) { + if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa(Ty)) + return TD->getPrefTypeAlignment(Ty); + + const ArrayType *ATy = dyn_cast(Ty); + if (ATy) + return getOpenCLAlignment(TD, ATy->getElementType()); + + const VectorType *VTy = dyn_cast(Ty); + if (VTy) { + Type *ETy = VTy->getElementType(); + unsigned int numE = VTy->getNumElements(); + unsigned int alignE = TD->getPrefTypeAlignment(ETy); + if (numE == 3) + return 4*alignE; + else + return numE*alignE; + } + + const StructType *STy = dyn_cast(Ty); + if (STy) { + unsigned int alignStruct = 1; + // Go through each element of the struct and find the + // largest alignment. + for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + Type *ETy = STy->getElementType(i); + unsigned int align = getOpenCLAlignment(TD, ETy); + if (align > alignStruct) + alignStruct = align; + } + return alignStruct; + } + + const FunctionType *FTy = dyn_cast(Ty); + if (FTy) + return TD->getPointerPrefAlignment(); + return TD->getPrefTypeAlignment(Ty); +} + +void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, + int paramIndex, raw_ostream &O) { + if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) + O << *CurrentFnSym << "_param_" << paramIndex; + else { + std::string argName = I->getName(); + const char *p = argName.c_str(); + while (*p) { + if (*p == '.') + O << "_"; + else + O << *p; + p++; + } + } +} + +void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { + Function::const_arg_iterator I, E; + int i = 0; + + if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { + O << *CurrentFnSym << "_param_" << paramIndex; + return; + } + + for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { + if (i==paramIndex) { + printParamName(I, paramIndex, O); + return; + } + } + llvm_unreachable("paramIndex out of bound"); +} + +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, + raw_ostream &O) { + const TargetData *TD = TM.getTargetData(); + const AttrListPtr &PAL = F->getAttributes(); + const TargetLowering *TLI = TM.getTargetLowering(); + Function::const_arg_iterator I, E; + unsigned paramIndex = 0; + bool first = true; + bool isKernelFunc = llvm::isKernelFunction(*F); + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + MVT thePointerTy = TLI->getPointerTy(); + + O << "(\n"; + + for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { + const Type *Ty = I->getType(); + + if (!first) + O << ",\n"; + + first = false; + + // Handle image/sampler parameters + if (llvm::isSampler(*I) || llvm::isImage(*I)) { + if (llvm::isImage(*I)) { + std::string sname = I->getName(); + if (llvm::isImageWriteOnly(*I)) + O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; + else // Default image is read_only + O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; + } + else // Should be llvm::isSampler(*I) + O << "\t.param .samplerref " << *CurrentFnSym << "_param_" + << paramIndex; + continue; + } + + if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) { + // Just a scalar + const PointerType *PTy = dyn_cast(Ty); + if (isKernelFunc) { + if (PTy) { + // Special handling for pointer arguments to kernel + O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; + + if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { + Type *ETy = PTy->getElementType(); + int addrSpace = PTy->getAddressSpace(); + switch(addrSpace) { + default: + O << ".ptr "; + break; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + O << ".ptr .const "; + break; + case llvm::ADDRESS_SPACE_SHARED: + O << ".ptr .shared "; + break; + case llvm::ADDRESS_SPACE_GLOBAL: + case llvm::ADDRESS_SPACE_CONST: + O << ".ptr .global "; + break; + } + O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + } + printParamName(I, paramIndex, O); + continue; + } + + // non-pointer scalar to kernel func + O << "\t.param ." + << getPTXFundamentalTypeStr(Ty) << " "; + printParamName(I, paramIndex, O); + continue; + } + // Non-kernel function, just print .param .b for ABI + // and .reg .b for non ABY + unsigned sz = 0; + if (isa(Ty)) { + sz = cast(Ty)->getBitWidth(); + if (sz < 32) sz = 32; + } + else if (isa(Ty)) + sz = thePointerTy.getSizeInBits(); + else + sz = Ty->getPrimitiveSizeInBits(); + if (isABI) + O << "\t.param .b" << sz << " "; + else + O << "\t.reg .b" << sz << " "; + printParamName(I, paramIndex, O); + continue; + } + + // param has byVal attribute. So should be a pointer + const PointerType *PTy = dyn_cast(Ty); + assert(PTy && + "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + if (isABI || isKernelFunc) { + // Just print .param .b8 .align .param[size]; + // = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned sz = TD->getTypeAllocSize(ETy); + O << "\t.param .align " << align + << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + continue; + } else { + // Split the ETy into constituent parts and + // print .param .b for each part. + // Further, if a part is vector, print the above for + // each vector element. + SmallVector vtparts; + ComputeValueVTs(*TLI, ETy, vtparts); + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << "\t.reg .b" << sz << " "; + printParamName(I, paramIndex, O); + if (j Str; + raw_svector_ostream O(Str); + + // Map the global virtual register number to a register class specific + // virtual register number starting from 1 with that class. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + //unsigned numRegClasses = TRI->getNumRegClasses(); + + // Emit the Fake Stack Object + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int NumBytes = (int) MFI->getStackSize(); + if (NumBytes) { + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" + << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; + if (nvptxSubtarget.is64Bit()) { + O << "\t.reg .b64 \t%SP;\n"; + O << "\t.reg .b64 \t%SPL;\n"; + } + else { + O << "\t.reg .b32 \t%SP;\n"; + O << "\t.reg .b32 \t%SPL;\n"; + } + } + + // Go through all virtual registers to establish the mapping between the + // global virtual + // register number and the per class virtual register number. + // We use the per class virtual register number in the ptx output. + unsigned int numVRs = MRI->getNumVirtRegs(); + for (unsigned i=0; i< numVRs; i++) { + unsigned int vr = TRI->index2VirtReg(i); + const TargetRegisterClass *RC = MRI->getRegClass(vr); + std::map ®map = VRidGlobal2LocalMap[RC->getID()]; + int n = regmap.size(); + regmap.insert(std::make_pair(vr, n+1)); + } + + // Emit register declarations + // @TODO: Extract out the real register usage + O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; + O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; + + // Emit declaration of the virtual registers or 'physical' registers for + // each register class + //for (unsigned i=0; i< numRegClasses; i++) { + // std::map ®map = VRidGlobal2LocalMap[i]; + // const TargetRegisterClass *RC = TRI->getRegClass(i); + // std::string rcname = getNVPTXRegClassName(RC); + // std::string rcStr = getNVPTXRegClassStr(RC); + // //int n = regmap.size(); + // if (!isNVPTXVectorRegClass(RC)) { + // O << "\t.reg " << rcname << " \t" << rcStr << "<" + // << NVPTXNumRegisters << ">;\n"; + // } + + // Only declare those registers that may be used. And do not emit vector + // registers as + // they are all elementized to scalar registers. + //if (n && !isNVPTXVectorRegClass(RC)) { + // if (RegAllocNilUsed) { + // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) + // << ">;\n"; + // } + // else { + // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) + // << "<" << 32 << ">;\n"; + // } + //} + //} + + OutStreamer.EmitRawText(O.str()); +} + + +void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + bool ignored; + unsigned int numHex; + const char *lead; + + if (Fp->getType()->getTypeID()==Type::FloatTyID) { + numHex = 8; + lead = "0f"; + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, + &ignored); + } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { + numHex = 16; + lead = "0d"; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + } else + llvm_unreachable("unsupported fp type"); + + APInt API = APF.bitcastToAPInt(); + std::string hexstr(utohexstr(API.getZExtValue())); + O << lead; + if (hexstr.length() < numHex) + O << std::string(numHex - hexstr.length(), '0'); + O << utohexstr(API.getZExtValue()); +} + +void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { + if (ConstantInt *CI = dyn_cast(CPV)) { + O << CI->getValue(); + return; + } + if (ConstantFP *CFP = dyn_cast(CPV)) { + printFPConstant(CFP, O); + return; + } + if (isa(CPV)) { + O << "0"; + return; + } + if (GlobalValue *GVar = dyn_cast(CPV)) { + O << *Mang->getSymbol(GVar); + return; + } + if (ConstantExpr *Cexpr = dyn_cast(CPV)) { + Value *v = Cexpr->stripPointerCasts(); + if (GlobalValue *GVar = dyn_cast(v)) { + O << *Mang->getSymbol(GVar); + return; + } else { + O << *LowerConstant(CPV, *this); + return; + } + } + llvm_unreachable("Not scalar type found in printScalarConstant()"); +} + + +void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, + AggBuffer *aggBuffer) { + + const TargetData *TD = TM.getTargetData(); + + if (isa(CPV) || CPV->isNullValue()) { + int s = TD->getTypeAllocSize(CPV->getType()); + if (saddZeros(s); + return; + } + + unsigned char *ptr; + switch (CPV->getType()->getTypeID()) { + + case Type::IntegerTyID: { + const Type *ETy = CPV->getType(); + if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + unsigned char c = + (unsigned char)(dyn_cast(CPV))->getZExtValue(); + ptr = &c; + aggBuffer->addBytes(ptr, 1, Bytes); + } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { + short int16 = + (short)(dyn_cast(CPV))->getZExtValue(); + ptr = (unsigned char*)&int16; + aggBuffer->addBytes(ptr, 2, Bytes); + } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + if (ConstantInt *constInt = dyn_cast(CPV)) { + int int32 =(int)(constInt->getZExtValue()); + ptr = (unsigned char*)&int32; + aggBuffer->addBytes(ptr, 4, Bytes); + break; + } else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { + if (ConstantInt *constInt = + dyn_cast(ConstantFoldConstantExpression( + Cexpr, TD))) { + int int32 =(int)(constInt->getZExtValue()); + ptr = (unsigned char*)&int32; + aggBuffer->addBytes(ptr, 4, Bytes); + break; + } + if (Cexpr->getOpcode() == Instruction::PtrToInt) { + Value *v = Cexpr->getOperand(0)->stripPointerCasts(); + aggBuffer->addSymbol(v); + aggBuffer->addZeros(4); + break; + } + } + llvm_unreachable("unsupported integer const type"); + } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + if (ConstantInt *constInt = dyn_cast(CPV)) { + long long int64 =(long long)(constInt->getZExtValue()); + ptr = (unsigned char*)&int64; + aggBuffer->addBytes(ptr, 8, Bytes); + break; + } else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { + if (ConstantInt *constInt = dyn_cast( + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 =(long long)(constInt->getZExtValue()); + ptr = (unsigned char*)&int64; + aggBuffer->addBytes(ptr, 8, Bytes); + break; + } + if (Cexpr->getOpcode() == Instruction::PtrToInt) { + Value *v = Cexpr->getOperand(0)->stripPointerCasts(); + aggBuffer->addSymbol(v); + aggBuffer->addZeros(8); + break; + } + } + llvm_unreachable("unsupported integer const type"); + } else + llvm_unreachable("unsupported integer const type"); + break; + } + case Type::FloatTyID: + case Type::DoubleTyID: { + ConstantFP *CFP = dyn_cast(CPV); + const Type* Ty = CFP->getType(); + if (Ty == Type::getFloatTy(CPV->getContext())) { + float float32 = (float)CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char*)&float32; + aggBuffer->addBytes(ptr, 4, Bytes); + } else if (Ty == Type::getDoubleTy(CPV->getContext())) { + double float64 = CFP->getValueAPF().convertToDouble(); + ptr = (unsigned char*)&float64; + aggBuffer->addBytes(ptr, 8, Bytes); + } + else { + llvm_unreachable("unsupported fp const type"); + } + break; + } + case Type::PointerTyID: { + if (GlobalValue *GVar = dyn_cast(CPV)) { + aggBuffer->addSymbol(GVar); + } + else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { + Value *v = Cexpr->stripPointerCasts(); + aggBuffer->addSymbol(v); + } + unsigned int s = TD->getTypeAllocSize(CPV->getType()); + aggBuffer->addZeros(s); + break; + } + + case Type::ArrayTyID: + case Type::VectorTyID: + case Type::StructTyID: { + if (isa(CPV) || isa(CPV) || + isa(CPV)) { + int ElementSize = TD->getTypeAllocSize(CPV->getType()); + bufferAggregateConstant(CPV, aggBuffer); + if ( Bytes > ElementSize ) + aggBuffer->addZeros(Bytes-ElementSize); + } + else if (isa(CPV)) + aggBuffer->addZeros(Bytes); + else + llvm_unreachable("Unexpected Constant type"); + break; + } + + default: + llvm_unreachable("unsupported type"); + } +} + +void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, + AggBuffer *aggBuffer) { + const TargetData *TD = TM.getTargetData(); + int Bytes; + + // Old constants + if (isa(CPV) || isa(CPV)) { + if (CPV->getNumOperands()) + for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) + bufferLEByte(cast(CPV->getOperand(i)), 0, aggBuffer); + return; + } + + if (const ConstantDataSequential *CDS = + dyn_cast(CPV)) { + if (CDS->getNumElements()) + for (unsigned i = 0; i < CDS->getNumElements(); ++i) + bufferLEByte(cast(CDS->getElementAsConstant(i)), 0, + aggBuffer); + return; + } + + + if (isa(CPV)) { + if (CPV->getNumOperands()) { + StructType *ST = cast(CPV->getType()); + for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { + if ( i == (e - 1)) + Bytes = TD->getStructLayout(ST)->getElementOffset(0) + + TD->getTypeAllocSize(ST) + - TD->getStructLayout(ST)->getElementOffset(i); + else + Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast(CPV->getOperand(i)), Bytes, + aggBuffer); + } + } + return; + } + llvm_unreachable("unsupported constant type in printAggregateConstant()"); +} + +// buildTypeNameMap - Run through symbol table looking for type names. +// + + +bool NVPTXAsmPrinter::isImageType(const Type *Ty) { + + std::map::iterator PI = TypeNameMap.find(Ty); + + if (PI != TypeNameMap.end() && + (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) + return true; + + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemOperand(MI, OpNo, O); + O << ']'; + + return false; +} + +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) +{ + switch(MI.getOpcode()) { + default: + return false; + case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: + case NVPTX::CallArgF64: case NVPTX::CallArgI16: + case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: case NVPTX::CallArgI8: + case NVPTX::CallArgParam: case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallVoidInstReg64: + case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: + case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: + case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: + case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: + case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: + case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: + case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: + case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: + case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: + case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: + case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: + case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: + case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: + case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: + case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: + case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: + case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: + case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: + case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: + case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: + case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: + case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: + case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: + case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: + case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: + case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: + case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: + case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: + case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: + case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: + case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: + case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: + case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: + case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + return true; + } + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { + RegisterAsmPrinter X(TheNVPTXTarget32); + RegisterAsmPrinter Y(TheNVPTXTarget64); +} + + +void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { + std::stringstream temp; + LineReader * reader = this->getReader(filename.str()); + temp << "\n//"; + temp << filename.str(); + temp << ":"; + temp << line; + temp << " "; + temp << reader->readLine(line); + temp << "\n"; + this->OutStreamer.EmitRawText(Twine(temp.str())); +} + + +LineReader *NVPTXAsmPrinter::getReader(std::string filename) { + if (reader == NULL) { + reader = new LineReader(filename); + } + + if (reader->fileName() != filename) { + delete reader; + reader = new LineReader(filename); + } + + return reader; +} + + +std::string +LineReader::readLine(unsigned lineNum) { + if (lineNum < theCurLine) { + theCurLine = 0; + fstr.seekg(0,std::ios::beg); + } + while (theCurLine < lineNum) { + fstr.getline(buff,500); + theCurLine++; + } + return buff; +} + +// Force static initialization. +extern "C" void LLVMInitializeNVPTXAsmPrinter() { + RegisterAsmPrinter X(TheNVPTXTarget32); + RegisterAsmPrinter Y(TheNVPTXTarget64); +} diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h new file mode 100644 index 0000000..6488b14 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -0,0 +1,315 @@ +//===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to NVPTX assembly language. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXASMPRINTER_H +#define NVPTXASMPRINTER_H + +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXSubtarget.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include + +// The ptx syntax and format is very different from that usually seem in a .s +// file, +// therefore we are not able to use the MCAsmStreamer interface here. +// +// We are handcrafting the output method here. +// +// A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer +// (subclass of MCStreamer). + +// This is defined in AsmPrinter.cpp. +// Used to process the constant expressions in initializers. +namespace nvptx { +const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, + llvm::AsmPrinter &AP) ; +} + +namespace llvm { + +class LineReader { +private: + unsigned theCurLine ; + std::ifstream fstr; + char buff[512]; + std::string theFileName; + SmallVector lineOffset; +public: + LineReader(std::string filename) { + theCurLine = 0; + fstr.open(filename.c_str()); + theFileName = filename; + } + std::string fileName() { return theFileName; } + ~LineReader() { + fstr.close(); + } + std::string readLine(unsigned line); +}; + + + +class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { + + + class AggBuffer { + // Used to buffer the emitted string for initializing global + // aggregates. + // + // Normally an aggregate (array, vector or structure) is emitted + // as a u8[]. However, if one element/field of the aggregate + // is a non-NULL address, then the aggregate is emitted as u32[] + // or u64[]. + // + // We first layout the aggregate in 'buffer' in bytes, except for + // those symbol addresses. For the i-th symbol address in the + //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer' + // are filled with 0s. symbolPosInBuffer[i-1] records its position + // in 'buffer', and Symbols[i-1] records the Value*. + // + // Once we have this AggBuffer setup, we can choose how to print + // it out. + public: + unsigned size; // size of the buffer in bytes + unsigned char *buffer; // the buffer + unsigned numSymbols; // number of symbol addresses + SmallVector symbolPosInBuffer; + SmallVector Symbols; + + private: + unsigned curpos; + raw_ostream &O; + NVPTXAsmPrinter &AP; + + public: + AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) + :O(_O),AP(_AP) { + buffer = new unsigned char[_size]; + size = _size; + curpos = 0; + numSymbols = 0; + } + ~AggBuffer() { + delete [] buffer; + } + unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { + assert((curpos+Num) <= size); + assert((curpos+Bytes) <= size); + for ( int i= 0; i < Num; ++i) { + buffer[curpos] = Ptr[i]; + curpos ++; + } + for ( int i=Num; i < Bytes ; ++i) { + buffer[curpos] = 0; + curpos ++; + } + return curpos; + } + unsigned addZeros(int Num) { + assert((curpos+Num) <= size); + for ( int i= 0; i < Num; ++i) { + buffer[curpos] = 0; + curpos ++; + } + return curpos; + } + void addSymbol(Value *GVar) { + symbolPosInBuffer.push_back(curpos); + Symbols.push_back(GVar); + numSymbols++; + } + void print() { + if (numSymbols == 0) { + // print out in bytes + for (unsigned i=0; i(v)) { + MCSymbol *Name = AP.Mang->getSymbol(GVar); + O << *Name; + } + else if (ConstantExpr *Cexpr = + dyn_cast(v)) { + O << *nvptx::LowerConstant(Cexpr, AP); + } else + llvm_unreachable("symbol type unknown"); + nSym++; + if (nSym >= numSymbols) + nextSymbolPos = size+1; + else + nextSymbolPos = symbolPosInBuffer[nSym]; + } else + if (nBytes == 4) + O << *(unsigned int*)(buffer+pos); + else + O << *(unsigned long long*)(buffer+pos); + } + } + } + }; + + friend class AggBuffer; + + virtual void emitSrcInText(StringRef filename, unsigned line); + +private : + virtual const char *getPassName() const { + return "NVPTX Assembly Printer"; + } + + const Function *F; + std::string CurrentFnName; + + void EmitFunctionEntryLabel(); + void EmitFunctionBodyStart(); + void EmitFunctionBodyEnd(); + + void EmitInstruction(const MachineInstr *); + + void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {} + + void printGlobalVariable(const GlobalVariable *GVar); + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printVecModifiedImmediate(const MachineOperand &MO, + const char *Modifier, raw_ostream &O); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + const char *Modifier=0); + void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; + // definition autogenerated. + void printInstruction(const MachineInstr *MI, raw_ostream &O); + void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, + bool=false); + void printParamName(int paramIndex, raw_ostream &O); + void printParamName(Function::const_arg_iterator I, int paramIndex, + raw_ostream &O); + void emitHeader(Module &M, raw_ostream &O); + void emitKernelFunctionDirectives(const Function& F, + raw_ostream &O) const; + void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); + void emitFunctionExternParamList(const MachineFunction &MF); + void emitFunctionParamList(const Function *, raw_ostream &O); + void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); + void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); + void emitFunctionTempData(const MachineFunction &MF, + unsigned &FrameSize); + bool isImageType(const Type *Ty); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &); + void printReturnValStr(const Function *, raw_ostream &O); + void printReturnValStr(const MachineFunction &MF, raw_ostream &O); + +protected: + bool doInitialization(Module &M); + bool doFinalization(Module &M); + +private: + std::string CurrentBankselLabelInBasicBlock; + + // This is specific per MachineFunction. + const MachineRegisterInfo *MRI; + // The contents are specific for each + // MachineFunction. But the size of the + // array is not. + std::map *VRidGlobal2LocalMap; + // cache the subtarget here. + const NVPTXSubtarget &nvptxSubtarget; + // Build the map between type name and ID based on module's type + // symbol table. + std::map TypeNameMap; + + // List of variables demoted to a function scope. + std::map > localDecls; + + // To record filename to ID mapping + std::map filenameMap; + void recordAndEmitFilenames(Module &); + + void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); + void emitPTXAddressSpace(unsigned int AddressSpace, + raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; + void printScalarConstant(Constant *CPV, raw_ostream &O) ; + void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + + void printOperandProper(const MachineOperand &MO); + + void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitDeclarations(Module &, raw_ostream &O); + void emitDeclaration(const Function *, raw_ostream &O); + + static const char *getRegisterName(unsigned RegNo); + void emitDemotedVars(const Function *, raw_ostream &); + + LineReader *reader; + LineReader *getReader(std::string); +public: + NVPTXAsmPrinter(TargetMachine &TM, + MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget()) { + CurrentBankselLabelInBasicBlock = ""; + VRidGlobal2LocalMap = NULL; + reader = NULL; + } + + ~NVPTXAsmPrinter() { + if (!reader) + delete reader; + } + + bool ignoreLoc(const MachineInstr &); + + virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &); + + DebugLoc prevDebugLoc; + void emitLineNumberAsDotLoc(const MachineInstr &); +}; +} // end of namespace + +#endif diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp new file mode 100644 index 0000000..a9abc00 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -0,0 +1,76 @@ +//=======- NVPTXFrameLowering.cpp - NVPTX Frame Information ---*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXFrameLowering.h" +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { + return true; +} + +void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { + if (MF.getFrameInfo()->hasStackObjects()) { + MachineBasicBlock &MBB = MF.front(); + // Insert "mov.u32 %SP, %Depot" + MachineBasicBlock::iterator MBBI = MBB.begin(); + // This instruction really occurs before first instruction + // in the BB, so giving it no debug location. + DebugLoc dl = DebugLoc(); + + if (tm.getSubtargetImpl()->hasGenericLdSt()) { + // mov %SPL, %depot; + // cvta.local %SP, %SPL; + if (is64bit) { + MachineInstr *MI = BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) + .addReg(NVPTX::VRDepot); + } else { + MachineInstr *MI = BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) + .addReg(NVPTX::VRDepot); + } + } + else { + // mov %SP, %depot; + if (is64bit) + BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) + .addReg(NVPTX::VRDepot); + else + BuildMI(MBB, MBBI, dl, + tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) + .addReg(NVPTX::VRDepot); + } + } +} + +void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h new file mode 100644 index 0000000..ee87b39 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -0,0 +1,40 @@ +//===--- NVPTXFrameLowering.h - Define frame lowering for NVPTX -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_FRAMELOWERING_H +#define NVPTX_FRAMELOWERING_H + +#include "llvm/Target/TargetFrameLowering.h" + + +namespace llvm { +class NVPTXTargetMachine; + +class NVPTXFrameLowering : public TargetFrameLowering { + NVPTXTargetMachine &tm; + bool is64bit; + +public: + explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), + tm(_tm), is64bit(_is64bit) {} + + virtual bool hasFP(const MachineFunction &MF) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp new file mode 100644 index 0000000..4e92f0e --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -0,0 +1,683 @@ +//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the NVPTX target. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "NVPTXISelDAGToDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/GlobalValue.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "nvptx-isel" + +using namespace llvm; + + +static cl::opt +UseFMADInstruction("nvptx-mad-enable", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); + +static cl::opt +FMAContractLevel("nvptx-fma-level", + cl::ZeroOrMore, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); + + +static cl::opt +UsePrecDivF32("nvptx-prec-divf32", + cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); + +/// createNVPTXISelDag - This pass converts a legalized DAG into a +/// NVPTX-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, + llvm::CodeGenOpt::Level OptLevel) { + return new NVPTXDAGToDAGISel(TM, OptLevel); +} + + +NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, + CodeGenOpt::Level OptLevel) +: SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget()) +{ + // Always do fma.f32 fpcontract if the target supports the instruction. + // Always do fma.f64 fpcontract if the target supports the instruction. + // Do mad.f32 is nvptx-mad-enable is specified and the target does not + // support fma.f32. + + doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && + (FMAContractLevel>=1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && + (FMAContractLevel>=1); + doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && + (FMAContractLevel==2); + doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && + (FMAContractLevel==2); + + allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; + + UseF32FTZ = false; + + doMulWide = (OptLevel > 0); + + // Decide how to translate f32 div + do_DIVF32_PREC = UsePrecDivF32; + // sm less than sm_20 does not support div.rnd. Use div.full. + if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20()) + do_DIVF32_PREC = 1; + +} + +/// Select - Select instructions not customized! Used for +/// expanded, promoted and normal instructions. +SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { + + if (N->isMachineOpcode()) + return NULL; // Already selected. + + SDNode *ResNode = NULL; + switch (N->getOpcode()) { + case ISD::LOAD: + ResNode = SelectLoad(N); + break; + case ISD::STORE: + ResNode = SelectStore(N); + break; + } + if (ResNode) + return ResNode; + return SelectCode(N); +} + + +static unsigned int +getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) +{ + const Value *Src = N->getSrcValue(); + if (!Src) + return NVPTX::PTXLdStInstCode::LOCAL; + + if (const PointerType *PT = dyn_cast(Src->getType())) { + switch (PT->getAddressSpace()) { + case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_CONST_NOT_GEN: + return NVPTX::PTXLdStInstCode::CONSTANT; + case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_CONST: + // If the arch supports generic address space, translate it to GLOBAL + // for correctness. + // If the arch does not support generic address space, then the arch + // does not really support ADDRESS_SPACE_CONST, translate it to + // to CONSTANT for better performance. + if (Subtarget.hasGenericLdSt()) + return NVPTX::PTXLdStInstCode::GLOBAL; + else + return NVPTX::PTXLdStInstCode::CONSTANT; + default: break; + } + } + return NVPTX::PTXLdStInstCode::LOCAL; +} + + +SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + LoadSDNode *LD = cast(N); + EVT LoadedVT = LD->getMemoryVT(); + SDNode *NVPTXLD= NULL; + + // do not support pre/post inc/dec + if (LD->isIndexed()) + return NULL; + + if (!LoadedVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool isVolatile = LD->isVolatile(); + if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + isVolatile = false; + + // Vector Setting + MVT SimpleVT = LoadedVT.getSimpleVT(); + unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; + if (SimpleVT.isVector()) { + unsigned num = SimpleVT.getVectorNumElements(); + if (num == 2) + vecType = NVPTX::PTXLdStInstCode::V2; + else if (num == 4) + vecType = NVPTX::PTXLdStInstCode::V4; + else + return NULL; + } + + // Type Setting: fromType + fromTypeWidth + // + // Sign : ISD::SEXTLOAD + // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the + // type is integer + // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned int fromType; + if ((LD->getExtensionType() == ISD::SEXTLOAD)) + fromType = NVPTX::PTXLdStInstCode::Signed; + else if (ScalarVT.isFloatingPoint()) + fromType = NVPTX::PTXLdStInstCode::Float; + else + fromType = NVPTX::PTXLdStInstCode::Unsigned; + + // Create the machine instruction DAG + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Addr; + SDValue Offset, Base; + unsigned Opcode; + MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy; + + if (SelectDirectAddr(N1, Addr)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; + case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; + case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; + case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; + case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; + case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_avar; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_avar; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_avar; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_avar; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_avar; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_avar; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_avar; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_avar; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_avar; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_avar; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N1.getNode(), N1, Base, Offset): + SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; + case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; + case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; + case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; + case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; + case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_asi; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_asi; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_asi; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_asi; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_asi; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_asi; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_asi; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_asi; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_asi; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_asi; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N1.getNode(), N1, Base, Offset): + SelectADDRri(N1.getNode(), N1, Base, Offset)) { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; + case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; + case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; + case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; + case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; + case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_ari; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_ari; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_ari; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_ari; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_ari; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_ari; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_ari; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_ari; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_ari; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_ari; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 8); + } + else { + switch (TargetVT) { + case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; + case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; + case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; + case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; + case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; + case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; + case MVT::v2i8: Opcode = NVPTX::LD_v2i8_areg; break; + case MVT::v2i16: Opcode = NVPTX::LD_v2i16_areg; break; + case MVT::v2i32: Opcode = NVPTX::LD_v2i32_areg; break; + case MVT::v2i64: Opcode = NVPTX::LD_v2i64_areg; break; + case MVT::v2f32: Opcode = NVPTX::LD_v2f32_areg; break; + case MVT::v2f64: Opcode = NVPTX::LD_v2f64_areg; break; + case MVT::v4i8: Opcode = NVPTX::LD_v4i8_areg; break; + case MVT::v4i16: Opcode = NVPTX::LD_v4i16_areg; break; + case MVT::v4i32: Opcode = NVPTX::LD_v4i32_areg; break; + case MVT::v4f32: Opcode = NVPTX::LD_v4f32_areg; break; + default: return NULL; + } + SDValue Ops[] = { getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(fromType), + getI32Imm(fromTypeWidth), + N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, + MVT::Other, Ops, 7); + } + + if (NVPTXLD != NULL) { + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); + } + + return NVPTXLD; +} + +SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + StoreSDNode *ST = cast(N); + EVT StoreVT = ST->getMemoryVT(); + SDNode *NVPTXST = NULL; + + // do not support pre/post inc/dec + if (ST->isIndexed()) + return NULL; + + if (!StoreVT.isSimple()) + return NULL; + + // Address Space Setting + unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); + + // Volatile Setting + // - .volatile is only availalble for .global and .shared + bool isVolatile = ST->isVolatile(); + if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL && + codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED && + codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC) + isVolatile = false; + + // Vector Setting + MVT SimpleVT = StoreVT.getSimpleVT(); + unsigned vecType = NVPTX::PTXLdStInstCode::Scalar; + if (SimpleVT.isVector()) { + unsigned num = SimpleVT.getVectorNumElements(); + if (num == 2) + vecType = NVPTX::PTXLdStInstCode::V2; + else if (num == 4) + vecType = NVPTX::PTXLdStInstCode::V4; + else + return NULL; + } + + // Type Setting: toType + toTypeWidth + // - for integer type, always use 'u' + // + MVT ScalarVT = SimpleVT.getScalarType(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned int toType; + if (ScalarVT.isFloatingPoint()) + toType = NVPTX::PTXLdStInstCode::Float; + else + toType = NVPTX::PTXLdStInstCode::Unsigned; + + // Create the machine instruction DAG + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue Addr; + SDValue Offset, Base; + unsigned Opcode; + MVT::SimpleValueType SourceVT = + N1.getNode()->getValueType(0).getSimpleVT().SimpleTy; + + if (SelectDirectAddr(N2, Addr)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; + case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; + case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; + case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; + case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; + case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_avar; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_avar; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_avar; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_avar; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_avar; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_avar; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_avar; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_avar; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_avar; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_avar; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit()? + SelectADDRsi64(N2.getNode(), N2, Base, Offset): + SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; + case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; + case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; + case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; + case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; + case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_asi; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_asi; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_asi; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_asi; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_asi; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_asi; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_asi; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_asi; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_asi; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_asi; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit()? + SelectADDRri64(N2.getNode(), N2, Base, Offset): + SelectADDRri(N2.getNode(), N2, Base, Offset)) { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; + case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; + case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; + case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; + case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; + case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_ari; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_ari; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_ari; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_ari; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_ari; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_ari; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_ari; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_ari; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_ari; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_ari; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 9); + } else { + switch (SourceVT) { + case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; + case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; + case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; + case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; + case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; + case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; + case MVT::v2i8: Opcode = NVPTX::ST_v2i8_areg; break; + case MVT::v2i16: Opcode = NVPTX::ST_v2i16_areg; break; + case MVT::v2i32: Opcode = NVPTX::ST_v2i32_areg; break; + case MVT::v2i64: Opcode = NVPTX::ST_v2i64_areg; break; + case MVT::v2f32: Opcode = NVPTX::ST_v2f32_areg; break; + case MVT::v2f64: Opcode = NVPTX::ST_v2f64_areg; break; + case MVT::v4i8: Opcode = NVPTX::ST_v4i8_areg; break; + case MVT::v4i16: Opcode = NVPTX::ST_v4i16_areg; break; + case MVT::v4i32: Opcode = NVPTX::ST_v4i32_areg; break; + case MVT::v4f32: Opcode = NVPTX::ST_v4f32_areg; break; + default: return NULL; + } + SDValue Ops[] = { N1, + getI32Imm(isVolatile), + getI32Imm(codeAddrSpace), + getI32Imm(vecType), + getI32Imm(toType), + getI32Imm(toTypeWidth), + N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 8); + } + + if (NVPTXST != NULL) { + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast(N)->getMemOperand(); + cast(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); + } + + return NVPTXST; +} + +// SelectDirectAddr - Match a direct address for DAG. +// A direct address could be a globaladdress or externalsymbol. +bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { + // Return true if TGA or ES. + if (N.getOpcode() == ISD::TargetGlobalAddress + || N.getOpcode() == ISD::TargetExternalSymbol) { + Address = N; + return true; + } + if (N.getOpcode() == NVPTXISD::Wrapper) { + Address = N.getOperand(0); + return true; + } + if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + unsigned IID = cast(N.getOperand(0))->getZExtValue(); + if (IID == Intrinsic::nvvm_ptr_gen_to_param) + if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam) + return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address)); + } + return false; +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset, + MVT mvt) { + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + SDValue base=Addr.getOperand(0); + if (SelectDirectAddr(base, Base)) { + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + return true; + } + } + } + return false; +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32); +} + +// symbol+offset +bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64); +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset, + MVT mvt) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); + Offset = CurDAG->getTargetConstant(0, mvt); + return true; + } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (SelectDirectAddr(Addr.getOperand(0), Addr)) { + return false; + } + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + // Constant offset from frame ref. + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); + else + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); + return true; + } + } + return false; +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32); +} + +// register+offset +bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, + SDValue &Base, SDValue &Offset) { + return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64); +} + +bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, + unsigned int spN) const { + const Value *Src = NULL; + // Even though MemIntrinsicSDNode is a subclas of MemSDNode, + // the classof() for MemSDNode does not include MemIntrinsicSDNode + // (See SelectionDAGNodes.h). So we need to check for both. + if (MemSDNode *mN = dyn_cast(N)) { + Src = mN->getSrcValue(); + } + else if (MemSDNode *mN = dyn_cast(N)) { + Src = mN->getSrcValue(); + } + if (!Src) + return false; + if (const PointerType *PT = dyn_cast(Src->getType())) + return (PT->getAddressSpace() == spN); + return false; +} + +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for +/// inline asm expressions. +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (SelectDirectAddr(Op, Op0)) { + OutOps.push_back(Op0); + OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); + return false; + } + if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) { + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + break; + } + return true; +} + +// Return true if N is a undef or a constant. +// If N was undef, return a (i8imm 0) in Retval +// If N was imm, convert it to i8imm and return in Retval +// Note: The convert to i8imm is required, otherwise the +// pattern matcher inserts a bunch of IMOVi8rr to convert +// the imm to i8imm, and this causes instruction selection +// to fail. +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, + SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && + !(N.getOpcode() == ISD::Constant)) + return false; + + if (N.getOpcode() == ISD::UNDEF) + Retval = CurDAG->getTargetConstant(0, MVT::i8); + else { + ConstantSDNode *cn = cast(N.getNode()); + unsigned retval = cn->getZExtValue(); + Retval = CurDAG->getTargetConstant(retval, MVT::i8); + } + return true; +} diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h new file mode 100644 index 0000000..ccd69b29 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -0,0 +1,105 @@ +//===-- NVPTXISelDAGToDAG.h - A dag to dag inst selector for NVPTX --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the NVPTX target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "nvptx-isel" + +#include "NVPTX.h" +#include "NVPTXISelLowering.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Intrinsics.h" +using namespace llvm; + +namespace { + +class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { + + // If true, generate corresponding FPCONTRACT. This is + // language dependent (i.e. CUDA and OpenCL works differently). + bool doFMADF32; + bool doFMAF64; + bool doFMAF32; + bool doFMAF64AGG; + bool doFMAF32AGG; + bool allowFMA; + + // 0: use div.approx + // 1: use div.full + // 2: For sm_20 and later, ieee-compliant div.rnd.f32 can be generated; + // Otherwise, use div.full + int do_DIVF32_PREC; + + // If true, add .ftz to f32 instructions. + // This is only meaningful for sm_20 and later, as the default + // is not ftz. + // For sm earlier than sm_20, f32 denorms are always ftz by the + // hardware. + // We always add the .ftz modifier regardless of the sm value + // when Use32FTZ is true. + bool UseF32FTZ; + + // If true, generate mul.wide from sext and mul + bool doMulWide; + +public: + explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, + CodeGenOpt::Level OptLevel); + + // Pass Name + virtual const char *getPassName() const { + return "NVPTX DAG->DAG Pattern Instruction Selection"; + } + + const NVPTXSubtarget &Subtarget; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps); +private: + // Include the pieces autogenerated from the target description. +#include "NVPTXGenDAGISel.inc" + + SDNode *Select(SDNode *N); + SDNode* SelectLoad(SDNode *N); + SDNode* SelectStore(SDNode *N); + + inline SDValue getI32Imm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + // Match direct address complex pattern. + bool SelectDirectAddr(SDValue N, SDValue &Address); + + bool SelectADDRri_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset, MVT mvt); + bool SelectADDRri(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRri64(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + + bool SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset, MVT mvt); + bool SelectADDRsi(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, + SDValue &Offset); + + + bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; + + bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); + +}; +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp new file mode 100644 index 0000000..6ea10ea --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -0,0 +1,1291 @@ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that NVPTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + + +#include "NVPTX.h" +#include "NVPTXISelLowering.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXTargetObjectFile.h" +#include "NVPTXUtilities.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Module.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" +#include + +#undef DEBUG_TYPE +#define DEBUG_TYPE "nvptx-lower" + +using namespace llvm; + +static unsigned int uniqueCallSite = 0; + +static cl::opt +RetainVectorOperands("nvptx-codegen-vectors", + cl::desc("NVPTX Specific: Retain LLVM's vectors and generate PTX vectors"), + cl::init(true)); + +static cl::opt +sched4reg("nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), + cl::init(false)); + +// NVPTXTargetLowering Constructor. +NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) +: TargetLowering(TM, new NVPTXTargetObjectFile()), + nvTM(&TM), + nvptxSubtarget(TM.getSubtarget()) { + + // always lower memset, memcpy, and memmove intrinsics to load/store + // instructions, rather + // then generating calls to memset, mempcy or memmove. + maxStoresPerMemset = (unsigned)0xFFFFFFFF; + maxStoresPerMemcpy = (unsigned)0xFFFFFFFF; + maxStoresPerMemmove = (unsigned)0xFFFFFFFF; + + setBooleanContents(ZeroOrNegativeOneBooleanContent); + + // Jump is Expensive. Don't create extra control flow for 'and', 'or' + // condition branches. + setJumpIsExpensive(true); + + // By default, use the Source scheduling + if (sched4reg) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Source); + + addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); + addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass); + addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); + addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); + addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); + + if (RetainVectorOperands) { + addRegisterClass(MVT::v2f32, &NVPTX::V2F32RegsRegClass); + addRegisterClass(MVT::v4f32, &NVPTX::V4F32RegsRegClass); + addRegisterClass(MVT::v2i32, &NVPTX::V2I32RegsRegClass); + addRegisterClass(MVT::v4i32, &NVPTX::V4I32RegsRegClass); + addRegisterClass(MVT::v2f64, &NVPTX::V2F64RegsRegClass); + addRegisterClass(MVT::v2i64, &NVPTX::V2I64RegsRegClass); + addRegisterClass(MVT::v2i16, &NVPTX::V2I16RegsRegClass); + addRegisterClass(MVT::v4i16, &NVPTX::V4I16RegsRegClass); + addRegisterClass(MVT::v2i8, &NVPTX::V2I8RegsRegClass); + addRegisterClass(MVT::v4i8, &NVPTX::V4I8RegsRegClass); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i8 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16 , Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i8 , Custom); + + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i8 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16 , Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i8 , Custom); + } + + // Operations not directly supported by NVPTX. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + if (nvptxSubtarget.hasROT64()) { + setOperationAction(ISD::ROTL , MVT::i64, Legal); + setOperationAction(ISD::ROTR , MVT::i64, Legal); + } + else { + setOperationAction(ISD::ROTL , MVT::i64, Expand); + setOperationAction(ISD::ROTR , MVT::i64, Expand); + } + if (nvptxSubtarget.hasROT32()) { + setOperationAction(ISD::ROTL , MVT::i32, Legal); + setOperationAction(ISD::ROTR , MVT::i32, Legal); + } + else { + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + } + + setOperationAction(ISD::ROTL , MVT::i16, Expand); + setOperationAction(ISD::ROTR , MVT::i16, Expand); + setOperationAction(ISD::ROTL , MVT::i8, Expand); + setOperationAction(ISD::ROTR , MVT::i8, Expand); + setOperationAction(ISD::BSWAP , MVT::i16, Expand); + setOperationAction(ISD::BSWAP , MVT::i32, Expand); + setOperationAction(ISD::BSWAP , MVT::i64, Expand); + + // Indirect branch is not supported. + // This also disables Jump Table creation. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + + setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); + setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + + // We want to legalize constant related memmove and memcopy + // intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + // Turn FP extload into load/fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // PTX does not support load / store predicate registers + setOperationAction(ISD::LOAD, MVT::i1, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setOperationAction(ISD::STORE, MVT::i1, Expand); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i32, MVT::i1, Expand); + setTruncStoreAction(MVT::i16, MVT::i1, Expand); + setTruncStoreAction(MVT::i8, MVT::i1, Expand); + + // This is legal in NVPTX + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + + // TRAP can be lowered to PTX trap + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + // By default, CONCAT_VECTORS is implemented via store/load + // through stack. It is slow and uses local memory. We need + // to custom-lowering them. + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i8 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f32 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i16 , Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i8 , Custom); + + // Expand vector int to float and float to int conversions + // - For SINT_TO_FP and UINT_TO_FP, the src type + // (Node->getOperand(0).getValueType()) + // is used to determine the action, while for FP_TO_UINT and FP_TO_SINT, + // the dest type (Node->getValueType(0)) is used. + // + // See VectorLegalizer::LegalizeOp() (LegalizeVectorOps.cpp) for the vector + // case, and + // SelectionDAGLegalize::LegalizeOp() (LegalizeDAG.cpp) for the scalar case. + // + // That is why v4i32 or v2i32 are used here. + // + // The expansion for vectors happens in VectorLegalizer::LegalizeOp() + // (LegalizeVectorOps.cpp). + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); + + // Now deduce the information based on the above mentioned + // actions + computeRegisterProperties(); +} + + +const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case NVPTXISD::CALL: return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + case NVPTXISD::DeclareScalarParam: + return "NVPTXISD::DeclareScalarParam"; + case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; + } +} + + +SDValue +NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + const GlobalValue *GV = cast(Op)->getGlobal(); + Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); +} + +std::string NVPTXTargetLowering::getPrototype(Type *retTy, + const ArgListTy &Args, + const SmallVectorImpl &Outs, + unsigned retAlignment) const { + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + std::stringstream O; + O << "prototype_" << uniqueCallSite << " : .callprototype "; + + if (retTy->getTypeID() == Type::VoidTyID) + O << "()"; + else { + O << "("; + if (isABI) { + if (retTy->isPrimitiveType() || retTy->isIntegerTy()) { + unsigned size = 0; + if (const IntegerType *ITy = dyn_cast(retTy)) { + size = ITy->getBitWidth(); + if (size < 32) size = 32; + } + else { + assert(retTy->isFloatingPointTy() && + "Floating point type expected here"); + size = retTy->getPrimitiveSizeInBits(); + } + + O << ".param .b" << size << " _"; + } + else if (isa(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() + << " _"; + else { + if ((retTy->getTypeID() == Type::StructTyID) || + isa(retTy)) { + SmallVector vtparts; + ComputeValueVTs(*this, retTy, vtparts); + unsigned totalsz = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + totalsz += sz/8; + } + } + O << ".param .align " + << retAlignment + << " .b8 _[" + << totalsz << "]"; + } + else { + assert(false && + "Unknown return type"); + } + } + } + else { + SmallVector vtparts; + ComputeValueVTs(*this, retTy, vtparts); + unsigned idx = 0; + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0, je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " _"; + if (j(Ty)) { + sz = cast(Ty)->getBitWidth(); + if (sz < 32) sz = 32; + } + else if (isa(Ty)) + sz = thePointerTy.getSizeInBits(); + else + sz = Ty->getPrimitiveSizeInBits(); + if (isABI) + O << ".param .b" << sz << " "; + else + O << ".reg .b" << sz << " "; + O << "_"; + continue; + } + const PointerType *PTy = dyn_cast(Ty); + assert(PTy && + "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + if (isABI) { + unsigned align = Outs[i].Flags.getByValAlign(); + unsigned sz = getTargetData()->getTypeAllocSize(ETy); + O << ".param .align " << align + << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + continue; + } + else { + SmallVector vtparts; + ComputeValueVTs(*this, ETy, vtparts); + for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = vtparts[i]; + if (vtparts[i].isVector()) { + elems = vtparts[i].getVectorNumElements(); + elemtype = vtparts[i].getVectorElementType(); + } + + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + O << ".reg .b" << sz << " "; + O << "_"; + if (j &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.Args; + Type *retTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + SDValue tempChain = Chain; + Chain = DAG.getCALLSEQ_START(Chain, + DAG.getIntPtrConstant(uniqueCallSite, true)); + SDValue InFlag = Chain.getValue(1); + + assert((Outs.size() == Args.size()) && + "Unexpected number of arguments to function call"); + unsigned paramCount = 0; + // Declare the .params or .reg need to pass values + // to the function + for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + EVT VT = Outs[i].VT; + + if (Outs[i].Flags.isByVal() == false) { + // Plain scalar + // for ABI, declare .param .b .param; + // for nonABI, declare .reg .b .param; + unsigned isReg = 1; + if (isABI) + isReg = 0; + unsigned sz = VT.getSizeInBits(); + if (VT.isInteger() && (sz < 32)) sz = 32; + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(isReg, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + + unsigned opcode = NVPTXISD::StoreParam; + if (isReg) + opcode = NVPTXISD::MoveToParam; + else { + if (Outs[i].Flags.isZExt()) + opcode = NVPTXISD::StoreParamU32; + else if (Outs[i].Flags.isSExt()) + opcode = NVPTXISD::StoreParamS32; + } + Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5); + + InFlag = Chain.getValue(1); + ++paramCount; + continue; + } + // struct or vector + SmallVector vtparts; + const PointerType *PTy = dyn_cast(Args[i].Ty); + assert(PTy && + "Type of a byval parameter should be pointer"); + ComputeValueVTs(*this, PTy->getElementType(), vtparts); + + if (isABI) { + // declare .param .align 16 .b8 .param[]; + unsigned sz = Outs[i].Flags.getByValSize(); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + // The ByValAlign in the Outs[i].Flags is alway set at this point, so we + // don't need to + // worry about natural alignment or not. See TargetLowering::LowerCallTo() + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + unsigned curOffset = 0; + for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + unsigned elems = 1; + EVT elemtype = vtparts[j]; + if (vtparts[j].isVector()) { + elems = vtparts[j].getVectorNumElements(); + elemtype = vtparts[j].getVectorElementType(); + } + for (unsigned k=0,ke=elems; k!=ke; ++k) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 8)) sz = 8; + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + OutVals[i], + DAG.getConstant(curOffset, + getPointerTy())); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, + MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; + Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, + CopyParamOps, 5); + InFlag = Chain.getValue(1); + curOffset += sz/8; + } + } + ++paramCount; + continue; + } + // Non-abi, struct or vector + // Declare a bunch or .reg .b .param + unsigned curOffset = 0; + for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + unsigned elems = 1; + EVT elemtype = vtparts[j]; + if (vtparts[j].isVector()) { + elems = vtparts[j].getVectorNumElements(); + elemtype = vtparts[j].getVectorElementType(); + } + for (unsigned k=0,ke=elems; k!=ke; ++k) { + unsigned sz = elemtype.getSizeInBits(); + if (elemtype.isInteger() && (sz < 32)) sz = 32; + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, + MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareParamOps, 5); + InFlag = Chain.getValue(1); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, + getPointerTy())); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(0, MVT::i32), theVal, + InFlag }; + Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs, + CopyParamOps, 5); + InFlag = Chain.getValue(1); + ++paramCount; + } + } + } + + GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); + unsigned retAlignment = 0; + + // Handle Result + unsigned retCount = 0; + if (Ins.size() > 0) { + SmallVector resvtparts; + ComputeValueVTs(*this, retTy, resvtparts); + + // Declare one .param .align 16 .b8 func_retval0[] for ABI or + // individual .reg .b func_retval<0..> for non ABI + unsigned resultsz = 0; + for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + unsigned elems = 1; + EVT elemtype = resvtparts[i]; + if (resvtparts[i].isVector()) { + elems = resvtparts[i].getVectorNumElements(); + elemtype = resvtparts[i].getVectorElementType(); + } + for (unsigned j=0,je=elems; j!=je; ++j) { + unsigned sz = elemtype.getSizeInBits(); + if (isABI == false) { + if (elemtype.isInteger() && (sz < 32)) sz = 32; + } + else { + if (elemtype.isInteger() && (sz < 8)) sz = 8; + } + if (isABI == false) { + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(retCount, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + ++retCount; + } + resultsz += sz; + } + } + if (isABI) { + if (retTy->isPrimitiveType() || retTy->isIntegerTy() || + retTy->isPointerTy() ) { + // Scalar needs to be at least 32bit wide + if (resultsz < 32) + resultsz = 32; + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resultsz, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + } + else { + if (Func) { // direct call + if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) + retAlignment = getTargetData()->getABITypeAlignment(retTy); + } else { // indirect call + const CallInst *CallI = dyn_cast(CS->getInstruction()); + if (!llvm::getAlign(*CallI, 0, retAlignment)) + retAlignment = getTargetData()->getABITypeAlignment(retTy); + } + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, + MVT::i32), + DAG.getConstant(resultsz/8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, + DeclareRetOps, 5); + InFlag = Chain.getValue(1); + } + } + } + + if (!Func) { + // This is indirect function call case : PTX requires a prototype of the + // form + // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); + // to be emitted, and the label has to used as the last arg of call + // instruction. + // The prototype is embedded in a string and put as the operand for an + // INLINEASM SDNode. + SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); + std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); + const char *asmstr = nvTM->getManagedStrPool()-> + getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { Chain, + DAG.getTargetExternalSymbol(asmstr, + getPointerTy()), + DAG.getMDNode(0), + DAG.getTargetConstant(0, MVT::i32), InFlag }; + Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); + InFlag = Chain.getValue(1); + } + // Op to just print "call" + SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrintCallOps[] = { Chain, + DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) + : retCount, MVT::i32), + InFlag }; + Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, + PrintCallVTs, PrintCallOps, 3); + InFlag = Chain.getValue(1); + + // Ops to print out the function name + SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallVoidOps[] = { Chain, Callee, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); + InFlag = Chain.getValue(1); + + // Ops to print out the param list + SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgBeginOps[] = { Chain, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, + CallArgBeginOps, 2); + InFlag = Chain.getValue(1); + + for (unsigned i=0, e=paramCount; i!=e; ++i) { + unsigned opcode; + if (i==(e-1)) + opcode = NVPTXISD::LastCallArg; + else + opcode = NVPTXISD::CallArg; + SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(i, MVT::i32), + InFlag }; + Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); + InFlag = Chain.getValue(1); + } + SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgEndOps[] = { Chain, + DAG.getConstant(Func ? 1 : 0, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, + 3); + InFlag = Chain.getValue(1); + + if (!Func) { + SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrototypeOps[] = { Chain, + DAG.getConstant(uniqueCallSite, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); + InFlag = Chain.getValue(1); + } + + // Generate loads from param memory/moves from registers for result + if (Ins.size() > 0) { + if (isABI) { + unsigned resoffset = 0; + for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + unsigned sz = Ins[i].VT.getSizeInBits(); + if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; + std::vector LoadRetVTs; + LoadRetVTs.push_back(Ins[i].VT); + LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); + std::vector LoadRetOps; + LoadRetOps.push_back(Chain); + LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); + LoadRetOps.push_back(InFlag); + SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, + &LoadRetOps[0], LoadRetOps.size()); + Chain = retval.getValue(1); + InFlag = retval.getValue(2); + InVals.push_back(retval); + resoffset += sz/8; + } + } + else { + SmallVector resvtparts; + ComputeValueVTs(*this, retTy, resvtparts); + + assert(Ins.size() == resvtparts.size() && + "Unexpected number of return values in non-ABI case"); + unsigned paramNum = 0; + for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + assert(EVT(Ins[i].VT) == resvtparts[i] && + "Unexpected EVT type in non-ABI case"); + unsigned numelems = 1; + EVT elemtype = Ins[i].VT; + if (Ins[i].VT.isVector()) { + numelems = Ins[i].VT.getVectorNumElements(); + elemtype = Ins[i].VT.getVectorElementType(); + } + std::vector tempRetVals; + for (unsigned j=0; j MoveRetVTs; + MoveRetVTs.push_back(elemtype); + MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); + std::vector MoveRetOps; + MoveRetOps.push_back(Chain); + MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); + MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); + MoveRetOps.push_back(InFlag); + SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, + &MoveRetOps[0], MoveRetOps.size()); + Chain = retval.getValue(1); + InFlag = retval.getValue(2); + tempRetVals.push_back(retval); + ++paramNum; + } + if (Ins[i].VT.isVector()) + InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT, + &tempRetVals[0], tempRetVals.size())); + else + InVals.push_back(tempRetVals[0]); + } + } + } + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite+1, true), + InFlag); + uniqueCallSite++; + + // set isTailCall to false for now, until we figure out how to express + // tail call optimization in PTX + isTailCall = false; + return Chain; +} + +// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() +// (see LegalizeDAG.cpp). This is slow and uses local memory. +// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 +SDValue NVPTXTargetLowering:: +LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + SmallVector Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + EVT VVT = SubOp.getNode()->getValueType(0); + EVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j=0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j))); + } + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Ops[0], Ops.size()); +} + +SDValue NVPTXTargetLowering:: +LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::RETURNADDR: return SDValue(); + case ISD::FRAMEADDR: return SDValue(); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::BUILD_VECTOR: + case ISD::EXTRACT_SUBVECTOR: + return Op; + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + default: + llvm_unreachable("Custom lowering not defined for operation"); + } +} + +SDValue +NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, + EVT v) const { + std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); + std::stringstream suffix; + suffix << idx; + *name += suffix.str(); + return DAG.getTargetExternalSymbol(name->c_str(), v); +} + +SDValue +NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { + return getExtSymb(DAG, ".PARAM", idx, v); +} + +SDValue +NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { + return getExtSymb(DAG, ".HLPPARAM", idx); +} + +// Check to see if the kernel argument is image*_t or sampler_t + +bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { + static const char *const specialTypes[] = { + "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" + }; + + const Type *Ty = arg->getType(); + const PointerType *PTy = dyn_cast(Ty); + + if (!PTy) + return false; + + if (!context) + return false; + + const StructType *STy = dyn_cast(PTy->getElementType()); + const std::string TypeName = STy ? STy->getName() : ""; + + for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i) + if (TypeName == specialTypes[i]) + return true; + + return false; +} + +SDValue +NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetData *TD = getTargetData(); + + const Function *F = MF.getFunction(); + const AttrListPtr &PAL = F->getAttributes(); + + SDValue Root = DAG.getRoot(); + std::vector OutChains; + + bool isKernel = llvm::isKernelFunction(*F); + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + std::vector argTypes; + std::vector theArgs; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + theArgs.push_back(I); + argTypes.push_back(I->getType()); + } + assert(argTypes.size() == Ins.size() && + "Ins types and function types did not match"); + + int idx = 0; + for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + Type *Ty = argTypes[i]; + EVT ObjectVT = getValueType(Ty); + assert(ObjectVT == Ins[i].VT && + "Ins type did not match function type"); + + // If the kernel argument is image*_t or sampler_t, convert it to + // a i32 constant holding the parameter position. This can later + // matched in the AsmPrinter to output the correct mangled name. + if (isImageOrSamplerVal(theArgs[i], + (theArgs[i]->getParent() ? + theArgs[i]->getParent()->getParent() : 0))) { + assert(isKernel && "Only kernels can have image/sampler params"); + InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + continue; + } + + if (theArgs[i]->use_empty()) { + // argument is dead + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + continue; + } + + // In the following cases, assign a node order of "idx+1" + // to newly created nodes. The SDNOdes for params have to + // appear in the same order as their order of appearance + // in the original function. "idx+1" holds that order. + if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) { + // A plain scalar. + if (isABI || isKernel) { + // If ABI, load from the param symbol + SDValue Arg = getParamSymbol(DAG, idx); + Value *srcValue = new Argument(PointerType::get(ObjectVT.getTypeForEVT( + F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, + MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT( + F->getContext()))); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + InVals.push_back(p); + } + else { + // If no ABI, just move the param symbol + SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + InVals.push_back(p); + } + continue; + } + + // Param has ByVal attribute + if (isABI || isKernel) { + // Return MoveParam(param symbol). + // Ideally, the param symbol can be returned directly, + // but when SDNode builder decides to use it in a CopyToReg(), + // machine instruction fails because TargetExternalSymbol + // (not lowered) is target dependent, and CopyToReg assumes + // the source is lowered. + SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + DAG.AssignOrdering(p.getNode(), idx+1); + if (isKernel) + InVals.push_back(p); + else { + SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), + p); + InVals.push_back(p2); + } + } else { + // Have to move a set of param symbols to registers and + // store them locally and return the local pointer in InVals + const PointerType *elemPtrType = dyn_cast(argTypes[i]); + assert(elemPtrType && + "Byval parameter should be a pointer type"); + Type *elemType = elemPtrType->getElementType(); + // Compute the constituent parts + SmallVector vtparts; + SmallVector offsets; + ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); + unsigned totalsize = 0; + for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + totalsize += vtparts[j].getStoreSizeInBits(); + SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> + CreateStackObject(totalsize/8, 16, false), + getPointerTy()); + unsigned sizesofar = 0; + std::vector theChains; + for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + unsigned numElems = 1; + if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); + for (unsigned k=0, ke=numElems; k!=ke; ++k) { + EVT tmpvt = vtparts[j]; + if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, + getParamSymbol(DAG, idx, tmpvt)); + SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore(Chain, dl, arg, addr, + MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits()/8; + ++idx; + } + } + --idx; + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0], + theChains.size()); + InVals.push_back(localcopy); + } + } + + // Clang will check explicit VarArg and issue error if any. However, Clang + // will let code with + // implicit var arg like f() pass. + // We treat this case as if the arg list is empty. + //if (F.isVarArg()) { + // assert(0 && "VarArg not supported yet!"); + //} + + if (!OutChains.empty()) + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size())); + + return Chain; +} + +SDValue +NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + + unsigned sizesofar = 0; + unsigned idx = 0; + for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + SDValue theVal = OutVals[i]; + EVT theValType = theVal.getValueType(); + unsigned numElems = 1; + if (theValType.isVector()) numElems = theValType.getVectorNumElements(); + for (unsigned j=0,je=numElems; j!=je; ++j) { + SDValue tmpval = theVal; + if (theValType.isVector()) + tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + theValType.getVectorElementType(), + tmpval, DAG.getIntPtrConstant(j)); + Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, + dl, MVT::Other, + Chain, + DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + tmpval); + if (theValType.isVector()) + sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + else + sizesofar += theValType.getStoreSizeInBits()/8; + ++idx; + } + } + + return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); +} + +void +NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const +{ + if (Constraint.length() > 1) + return; + else + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +// NVPTX suuport vector of legal types of any length in Intrinsics because the +// NVPTX specific type legalizer +// will legalize them to the PTX supported length. +bool +NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { + if (isTypeLegal(VT)) + return true; + if (VT.isVector()) { + MVT eVT = VT.getVectorElementType(); + if (isTypeLegal(eVT)) + return true; + } + return false; +} + + +// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as +// TgtMemIntrinsic +// because we need the information that is only available in the "Value" type +// of destination +// pointer. In particular, the address space information. +bool +NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + + case Intrinsic::nvvm_atomic_load_add_f32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = true; + Info.align = 0; + return true; + + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = true; + Info.align = 0; + return true; + + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + + Info.opc = ISD::INTRINSIC_W_CHAIN; + if (Intrinsic == Intrinsic::nvvm_ldu_global_i) + Info.memVT = MVT::i32; + else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) + Info.memVT = getPointerTy(); + else + Info.memVT = MVT::f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 0; + return true; + + } + return false; +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +/// Used to guide target specific optimizations, like loop strength reduction +/// (LoopStrengthReduce.cpp) and memory optimization for address mode +/// (CodeGenPrepare.cpp) +bool +NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + + // AddrMode - This represents an addressing mode of: + // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + // + // The legal address modes are + // - [avar] + // - [areg] + // - [areg+immoff] + // - [immAddr] + + if (AM.BaseGV) { + if (AM.BaseOffs || AM.HasBaseReg || AM.Scale) + return false; + return true; + } + + switch (AM.Scale) { + case 0: // "r", "r+i" or "i" is allowed + break; + case 1: + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + return false; + // Otherwise we have r+i. + break; + default: + // No scale > 1 is allowed + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// NVPTX Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +NVPTXTargetLowering::ConstraintType +NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'r': + case 'h': + case 'c': + case 'l': + case 'f': + case 'd': + case '0': + case 'N': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + + +std::pair +NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'c': + return std::make_pair(0U, &NVPTX::Int8RegsRegClass); + case 'h': + return std::make_pair(0U, &NVPTX::Int16RegsRegClass); + case 'r': + return std::make_pair(0U, &NVPTX::Int32RegsRegClass); + case 'l': + case 'N': + return std::make_pair(0U, &NVPTX::Int64RegsRegClass); + case 'f': + return std::make_pair(0U, &NVPTX::Float32RegsRegClass); + case 'd': + return std::make_pair(0U, &NVPTX::Float64RegsRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + + + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { + return 4; +} diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h new file mode 100644 index 0000000..86246e6 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -0,0 +1,144 @@ +//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that NVPTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXISELLOWERING_H +#define NVPTXISELLOWERING_H + +#include "NVPTX.h" +#include "NVPTXSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +namespace NVPTXISD { +enum NodeType { + // Start the numbering from where ISD NodeType finishes. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + Wrapper, + CALL, + RET_FLAG, + LOAD_PARAM, + NVBuiltin, + DeclareParam, + DeclareScalarParam, + DeclareRetParam, + DeclareRet, + DeclareScalarRet, + LoadParam, + StoreParam, + StoreParamS32, // to sext and store a <32bit value, not used currently + StoreParamU32, // to zext and store a <32bit value, not used currently + MoveToParam, + PrintCall, + PrintCallUni, + CallArgBegin, + CallArg, + LastCallArg, + CallArgEnd, + CallVoid, + CallVal, + CallSymbol, + Prototype, + MoveParam, + MoveRetval, + MoveToRetval, + StoreRetval, + PseudoUseParam, + RETURN, + CallSeqBegin, + CallSeqEnd, + Dummy +}; +} + +//===--------------------------------------------------------------------===// +// TargetLowering Implementation +//===--------------------------------------------------------------------===// +class NVPTXTargetLowering : public TargetLowering { +public: + explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, + SelectionDAG &DAG) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + bool isTypeSupportedInIntrinsic(MVT VT) const; + + bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + unsigned Intrinsic) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type + /// Used to guide target specific optimizations, like loop strength + /// reduction (LoopStrengthReduce.cpp) and memory optimization for + /// address mode (CodeGenPrepare.cpp) + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + + virtual EVT getSetCCResultType(EVT VT) const { + return MVT::i1; + } + + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + virtual SDValue + LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + + std::string getPrototype(Type *, const ArgListTy &, + const SmallVectorImpl &, + unsigned retAlignment) const; + + virtual SDValue + LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, + SelectionDAG &DAG) const; + + virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const; + + NVPTXTargetMachine *nvTM; + + // PTX always uses 32-bit shift amounts + virtual MVT getShiftAmountTy(EVT LHSTy) const { + return MVT::i32; + } + +private: + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = + MVT::i32) const; + SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; + SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); + + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; +}; +} // namespace llvm + +#endif // NVPTXISELLOWERING_H diff --git a/lib/Target/NVPTX/NVPTXInstrFormats.td b/lib/Target/NVPTX/NVPTXInstrFormats.td new file mode 100644 index 0000000..f11f1b8 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrFormats.td @@ -0,0 +1,43 @@ +//===- NVPTXInstrFormats.td - NVPTX Instruction Formats-------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe NVPTX instructions format +// +//===----------------------------------------------------------------------===// + +// Vector instruction type enum +class VecInstTypeEnum val> { + bits<4> Value=val; +} +def VecNOP : VecInstTypeEnum<0>; + +// Generic NVPTX Format + +class NVPTXInst pattern> + : Instruction { + field bits<14> Inst; + + let Namespace = "NVPTX"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + + // TSFlagFields + bits<4> VecInstType = VecNOP.Value; + bit IsSimpleMove = 0; + bit IsLoad = 0; + bit IsStore = 0; + + let TSFlags{3-0} = VecInstType; + let TSFlags{4-4} = IsSimpleMove; + let TSFlags{5-5} = IsLoad; + let TSFlags{6-6} = IsStore; +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp new file mode 100644 index 0000000..cd50deb --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -0,0 +1,326 @@ +//===- NVPTXInstrInfo.cpp - NVPTX Instruction Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXInstrInfo.h" +#include "NVPTXTargetMachine.h" +#define GET_INSTRINFO_CTOR +#include "NVPTXGenInstrInfo.inc" +#include "llvm/Function.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include + + +using namespace llvm; + +// FIXME: Add the subtarget support on this constructor. +NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) +: NVPTXGenInstrInfo(), + TM(tm), + RegInfo(*this, *TM.getSubtargetImpl()) {} + + +void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (NVPTX::Int32RegsRegClass.contains(DestReg) && + NVPTX::Int32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int8RegsRegClass.contains(DestReg) && + NVPTX::Int8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int1RegsRegClass.contains(DestReg) && + NVPTX::Int1RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Float32RegsRegClass.contains(DestReg) && + NVPTX::Float32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int16RegsRegClass.contains(DestReg) && + NVPTX::Int16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Int64RegsRegClass.contains(DestReg) && + NVPTX::Int64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::Float64RegsRegClass.contains(DestReg) && + NVPTX::Float64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4F32RegsRegClass.contains(DestReg) && + NVPTX::V4F32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4f32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I32RegsRegClass.contains(DestReg) && + NVPTX::V4I32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2F32RegsRegClass.contains(DestReg) && + NVPTX::V2F32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2f32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I32RegsRegClass.contains(DestReg) && + NVPTX::V2I32RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i32Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I8RegsRegClass.contains(DestReg) && + NVPTX::V4I8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i8Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I8RegsRegClass.contains(DestReg) && + NVPTX::V2I8RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i8Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V4I16RegsRegClass.contains(DestReg) && + NVPTX::V4I16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V4i16Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I16RegsRegClass.contains(DestReg) && + NVPTX::V2I16RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i16Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2I64RegsRegClass.contains(DestReg) && + NVPTX::V2I64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2i64Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (NVPTX::V2F64RegsRegClass.contains(DestReg) && + NVPTX::V2F64RegsRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(NVPTX::V2f64Mov), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else { + llvm_unreachable("Don't know how to copy a register"); + } +} + +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, + unsigned &DestReg) const { + // Look for the appropriate part of TSFlags + bool isMove = false; + + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> + NVPTX::SimpleMoveShift; + isMove = (TSFlags == 1); + + if (isMove) { + MachineOperand dest = MI.getOperand(0); + MachineOperand src = MI.getOperand(1); + assert(dest.isReg() && "dest of a movrr is not a reg"); + assert(src.isReg() && "src of a movrr is not a reg"); + + SrcReg = src.getReg(); + DestReg = dest.getReg(); + return true; + } + + return false; +} + +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const +{ + switch (MI.getOpcode()) { + default: return false; + case NVPTX::INT_PTX_SREG_NTID_X: + case NVPTX::INT_PTX_SREG_NTID_Y: + case NVPTX::INT_PTX_SREG_NTID_Z: + case NVPTX::INT_PTX_SREG_TID_X: + case NVPTX::INT_PTX_SREG_TID_Y: + case NVPTX::INT_PTX_SREG_TID_Z: + case NVPTX::INT_PTX_SREG_CTAID_X: + case NVPTX::INT_PTX_SREG_CTAID_Y: + case NVPTX::INT_PTX_SREG_CTAID_Z: + case NVPTX::INT_PTX_SREG_NCTAID_X: + case NVPTX::INT_PTX_SREG_NCTAID_Y: + case NVPTX::INT_PTX_SREG_NCTAID_Z: + case NVPTX::INT_PTX_SREG_WARPSIZE: + return true; + } +} + + +bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, + unsigned &AddrSpace) const { + bool isLoad = false; + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> + NVPTX::isLoadShift; + isLoad = (TSFlags == 1); + if (isLoad) + AddrSpace = getLdStCodeAddrSpace(MI); + return isLoad; +} + +bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, + unsigned &AddrSpace) const { + bool isStore = false; + unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> + NVPTX::isStoreShift; + isStore = (TSFlags == 1); + if (isStore) + AddrSpace = getLdStCodeAddrSpace(MI); + return isStore; +} + + +bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { + unsigned addrspace = 0; + if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) + return false; + if (isLoadInstr(*MI, addrspace)) + if (addrspace == NVPTX::PTXLdStInstCode::SHARED) + return false; + if (isStoreInstr(*MI, addrspace)) + if (addrspace == NVPTX::PTXLdStInstCode::SHARED) + return false; + return true; +} + + +/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning +/// true if it cannot be understood (e.g. it's a switch dispatch or isn't +/// implemented for a target). Upon success, this returns false and returns +/// with the following information in various cases: +/// +/// 1. If this block ends with no branches (it just falls through to its succ) +/// just return false, leaving TBB/FBB null. +/// 2. If this block ends with only an unconditional branch, it sets TBB to be +/// the destination block. +/// 3. If this block ends with an conditional branch and it falls through to +/// an successor block, it sets TBB to be the branch destination block and a +/// list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// 4. If this block ends with an conditional branch and an unconditional +/// block, it returns the 'true' destination in TBB, the 'false' destination +/// in FBB, and a list of operands that evaluate the condition. These +/// operands can be passed to other TargetInstrInfo methods to create new +/// branches. +/// +/// Note that RemoveBranch and InsertBranch must be implemented to support +/// cases where this method returns success. +/// +bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == NVPTX::GOTO) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastInst->getOpcode() == NVPTX::CBranch) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. + if (SecondLastInst->getOpcode() == NVPTX::CBranch && + LastInst->getOpcode() == NVPTX::GOTO) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two NVPTX:GOTOs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == NVPTX::GOTO && + LastInst->getOpcode() == NVPTX::GOTO) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != NVPTX::CBranch) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "NVPTX branch conditions have two components!"); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch + BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)) + .addReg(Cond[0].getReg()).addMBB(TBB); + return 1; + } + + // Two-way Conditional Branch. + BuildMI(&MBB, DL, get(NVPTX::CBranch)) + .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); + return 2; +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h new file mode 100644 index 0000000..7b8e218 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -0,0 +1,83 @@ +//===- NVPTXInstrInfo.h - NVPTX Instruction Information----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the niversity of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXINSTRUCTIONINFO_H +#define NVPTXINSTRUCTIONINFO_H + +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "NVPTXGenInstrInfo.inc" + +namespace llvm { + +class NVPTXInstrInfo : public NVPTXGenInstrInfo +{ + NVPTXTargetMachine &TM; + const NVPTXRegisterInfo RegInfo; +public: + explicit NVPTXInstrInfo(NVPTXTargetMachine &TM); + + virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } + + /* The following virtual functions are used in register allocation. + * They are not implemented because the existing interface and the logic + * at the caller side do not work for the elementized vector load and store. + * + * virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + * int &FrameIndex) const; + * virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + * int &FrameIndex) const; + * virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + * MachineBasicBlock::iterator MBBI, + * unsigned SrcReg, bool isKill, int FrameIndex, + * const TargetRegisterClass *RC) const; + * virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + * MachineBasicBlock::iterator MBBI, + * unsigned DestReg, int FrameIndex, + * const TargetRegisterClass *RC) const; + */ + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const ; + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, + unsigned &DestReg) const; + bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; + bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; + bool isReadSpecialReg(MachineInstr &MI) const; + + virtual bool CanTailMerge(const MachineInstr *MI) const ; + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; + unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { + return MI.getOperand(2).getImm(); + } + +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td new file mode 100644 index 0000000..8a410b8 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -0,0 +1,2837 @@ +//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "NVPTXInstrFormats.td" + +// A NOP instruction +def NOP : NVPTXInst<(outs), (ins), "", []>; + +// List of vector specific properties +def isVecLD : VecInstTypeEnum<1>; +def isVecST : VecInstTypeEnum<2>; +def isVecBuild : VecInstTypeEnum<3>; +def isVecShuffle : VecInstTypeEnum<4>; +def isVecExtract : VecInstTypeEnum<5>; +def isVecInsert : VecInstTypeEnum<6>; +def isVecDest : VecInstTypeEnum<7>; +def isVecOther : VecInstTypeEnum<15>; + +//===----------------------------------------------------------------------===// +// NVPTX Operand Definitions. +//===----------------------------------------------------------------------===// + +def brtarget : Operand; + +//===----------------------------------------------------------------------===// +// NVPTX Instruction Predicate Definitions +//===----------------------------------------------------------------------===// + + +def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; +def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; +def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; +def useAtomRedG32forGen32 : + Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; +def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; +def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; +def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; +def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; +def useAtomRedG64forGen64 : + Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; +def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; +def hasVote : Predicate<"Subtarget.hasVote()">; +def hasDouble : Predicate<"Subtarget.hasDouble()">; +def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; +def hasLDU : Predicate<"Subtarget.hasLDU()">; +def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; + +def doF32FTZ : Predicate<"UseF32FTZ">; + +def doFMAF32 : Predicate<"doFMAF32">; +def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; +def doFMAF32AGG : Predicate<"doFMAF32AGG">; +def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">; +def doFMAF64 : Predicate<"doFMAF64">; +def doFMAF64AGG : Predicate<"doFMAF64AGG">; +def doFMADF32 : Predicate<"doFMADF32">; +def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">; + +def doMulWide : Predicate<"doMulWide">; + +def allowFMA : Predicate<"allowFMA">; +def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; + +def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; +def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; + +def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; + +def true : Predicate<"1">; + +//===----------------------------------------------------------------------===// +// Special Handling for 8-bit Operands and Operations +// +// PTX supports 8-bit signed and unsigned types, but does not support 8-bit +// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have +// 8-bit registers. +// +// PTX ld, st and cvt instructions permit source and destination data operands +// to be wider than the instruction-type size, so that narrow values may be +// loaded, stored, and converted using regular-width registers. +// +// So in PTX generation, we +// - always use 16-bit registers in place in 8-bit registers. +// (8-bit variables should stay as 8-bit as they represent memory layout.) +// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values +// before operation +// . div +// . rem +// . neg (sign) +// . set, setp +// . shr +// +// We are patching the operations by inserting the cvt instructions in the +// asm strings of the affected instructions. +// +// Since vector operations, except for ld/st, are eventually elementized. We +// do not need to special-hand the vector 8-bit operations. +// +// +//===----------------------------------------------------------------------===// + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// cvt.s16.s8 %temp2, %b; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8rr { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp2;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))); +} + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// mov.b16 %temp2, %b; +// cvt.s16.s8 %temp2, %temp2; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8ri { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", + !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", + !strconcat("mov.b16 \t%temp2, $b;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); +} + +// Generate string block like +// { +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// mov.b16 %temp1, %b; +// cvt.s16.s8 %temp1, %temp1; +// cvt.s16.s8 %temp2, %a; +// opc.s16 %dst, %temp1, %temp2; +// } +// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 +class Handle_i8ir { + string s = !strconcat("{{\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp1;\n\t", + !strconcat(".reg .", !strconcat(TypeStr, + !strconcat(" \t%temp2;\n\t", + !strconcat("mov.b16 \t%temp1, $a;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t", + !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", + !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); +} + + +//===----------------------------------------------------------------------===// +// Some Common Instruction Class Templates +//===----------------------------------------------------------------------===// + +multiclass I3 { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; +} + +multiclass I3_i8 { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr.s, + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri.s, + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; +} + +multiclass I3_noi8 { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; +} + +multiclass ADD_SUB_INT_32 { + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +} + +multiclass F3 { + def f64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[allowFMA]>; + def f64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; + def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA_ftz]>; + def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA_ftz]>; + def f32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA]>; + def f32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; +} + +multiclass F3_rn { + def f64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, + (OpNode Float64Regs:$a, fpimm:$b))]>; + def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ]>; + def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ]>; + def f32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, + (OpNode Float32Regs:$a, fpimm:$b))]>; +} + +multiclass F2 { + def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), + !strconcat(OpcStr, ".f64 \t$dst, $a;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; + def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, + Requires<[doF32FTZ]>; + def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; +} + +//===----------------------------------------------------------------------===// +// NVPTX Instructions. +//===----------------------------------------------------------------------===// + +//----------------------------------- +// Integer Arithmetic +//----------------------------------- + +multiclass ADD_SUB_i1 { + def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; +} + +defm ADD_i1 : ADD_SUB_i1; +defm SUB_i1 : ADD_SUB_i1; + + +defm ADD : I3<"add.s", add>; +defm SUB : I3<"sub.s", sub>; + +defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; + +defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; + +//mul.wide PTX instruction +def SInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isSignedIntN(32)) + return true; + return false; +}]>; + +def UInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isIntN(32)) + return true; + return false; +}]>; + +def SInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isSignedIntN(16)) + return true; + return false; +}]>; + +def UInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + if (v.isIntN(16)) + return true; + return false; +}]>; + +def Int5Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + // Check if 0 <= v < 32 + // Only then the result from (x << v) will be i32 + if (v.sge(0) && v.slt(32)) + return true; + return false; +}]>; + +def Int4Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + // Check if 0 <= v < 16 + // Only then the result from (x << v) will be i16 + if (v.sge(0) && v.slt(16)) + return true; + return false; +}]>; + +def SHL2MUL32 : SDNodeXFormgetAPIntValue(); + APInt temp(32, 1); + return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); +}]>; + +def SHL2MUL16 : SDNodeXFormgetAPIntValue(); + APInt temp(16, 1); + return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); +}]>; + +def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, i64imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; + +def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), + (ins Int32Regs:$a, i64imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; + +def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, i32imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; + +def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, i32imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; + +def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; + +def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), + (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), + (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), + (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; +def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), + (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, + Requires<[doMulWide]>; + +defm MULT : I3<"mul.lo.s", mul>; + +defm MULTHS : I3_noi8<"mul.hi.s", mulhs>; +defm MULTHU : I3_noi8<"mul.hi.u", mulhu>; +def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s16 temp1; \n\t", + !strconcat(".reg \t.s16 temp2; \n\t", + !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", + !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t", + !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", "")))))))), + [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>; +def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s16 temp1; \n\t", + !strconcat(".reg \t.s16 temp2; \n\t", + !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", + !strconcat("mov.b16 \ttemp2, $b; \n\t", + !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t", + !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", ""))))))))), + [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>; +def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.u16 temp1; \n\t", + !strconcat(".reg \t.u16 temp2; \n\t", + !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", + !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t", + !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", "")))))))), + [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>; +def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.u16 temp1; \n\t", + !strconcat(".reg \t.u16 temp2; \n\t", + !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", + !strconcat("mov.b16 \ttemp2, $b; \n\t", + !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t", + !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", + !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", + !strconcat("}}", ""))))))))), + [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>; + + +defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">; +defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">; + +defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">; +// The ri version will not be selected as DAGCombiner::visitSREM will lower it. +defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">; +// The ri version will not be selected as DAGCombiner::visitUREM will lower it. + +def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), + Int8Regs:$c))]>; +def MAD8rri : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), + imm:$c))]>; +def MAD8rir : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), + Int8Regs:$c))]>; +def MAD8rii : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, i8imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), + imm:$c))]>; + +def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; +def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; +def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add + (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; +def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), + imm:$c))]>; + +def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; +def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; +def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; +def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (add + (mul Int32Regs:$a, imm:$b), imm:$c))]>; + +def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; +def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; +def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; +def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (add + (mul Int64Regs:$a, imm:$b), imm:$c))]>; + + +def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + !strconcat("cvt.s16.s8 \t$dst, $src;\n\t", + "neg.s16 \t$dst, $dst;"), + [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>; +def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "neg.s16 \t$dst, $src;", + [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; +def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "neg.s32 \t$dst, $src;", + [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; +def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "neg.s64 \t$dst, $src;", + [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; + +//----------------------------------- +// Floating Point Arithmetic +//----------------------------------- + +// Constant 1.0f +def FloatConst1 : PatLeaf<(fpimm), [{ + if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) + return false; + float f = (float)N->getValueAPF().convertToFloat(); + return (f==1.0f); +}]>; +// Constand (double)1.0 +def DoubleConst1 : PatLeaf<(fpimm), [{ + if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) + return false; + double d = (double)N->getValueAPF().convertToDouble(); + return (d==1.0); +}]>; + +defm FADD : F3<"add", fadd>; +defm FSUB : F3<"sub", fsub>; +defm FMUL : F3<"mul", fmul>; + +defm FADD_rn : F3_rn<"add", fadd>; +defm FSUB_rn : F3_rn<"sub", fsub>; +defm FMUL_rn : F3_rn<"mul", fmul>; + +defm FABS : F2<"abs", fabs>; +defm FNEG : F2<"neg", fneg>; +defm FSQRT : F2<"sqrt.rn", fsqrt>; + +// +// F64 division +// +def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + "rcp.rn.f64 \t$dst, $b;", + [(set Float64Regs:$dst, + (fdiv DoubleConst1:$a, Float64Regs:$b))]>; +def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (fdiv Float64Regs:$a, Float64Regs:$b))]>; +def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, + (fdiv Float64Regs:$a, fpimm:$b))]>; + +// +// F32 Approximate reciprocal +// +def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Approximate division +// +def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Semi-accurate reciprocal +// +// rcp.approx gives the same result as div.full(1.0f, a) and is faster. +// +def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Semi-accurate division +// +def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Accurate reciprocal +// +def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20, doF32FTZ]>; +def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.f32 \t$dst, $b;", + [(set Float32Regs:$dst, + (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +// +// F32 Accurate division +// +def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, + (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[reqPTX20]>; + + +multiclass FPCONTRACT32 { + def rrr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, Float32Regs:$b), + Float32Regs:$c))]>, Requires<[Pred]>; + // This is to WAR a weird bug in Tablegen that does not automatically + // generate the following permutated rule rrr2 from the above rrr. + // So we explicitly add it here. This happens to FMA32 only. + // See the comments at FMAD32 and FMA32 for more information. + def rrr2 : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd Float32Regs:$c, + (fmul Float32Regs:$a, Float32Regs:$b)))]>, + Requires<[Pred]>; + def rri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, + Requires<[Pred]>; + def rir : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, f32imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float32Regs:$dst, (fadd + (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, + Requires<[Pred]>; +} + +multiclass FPCONTRACT64 { + def rrr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, Float64Regs:$b), + Float64Regs:$c))]>, Requires<[Pred]>; + def rri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, + Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; + def rir : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, f64imm:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set Float64Regs:$dst, (fadd + (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, + Requires<[Pred]>; +} + +// Due to a unknown reason (most likely a bug in tablegen), tablegen does not +// automatically generate the rrr2 rule from +// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. +// If we reverse the order of the following two lines, then rrr2 rule will be +// generated for FMA32, but not for rrr. +// Therefore, we manually write the rrr2 rule in FPCONTRACT32. +defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>; +defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>; +defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; +defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; +defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; + +// b*c-a => fmad(b, c, -a) +multiclass FPCONTRACT32_SUB_PAT_MAD { + def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), + (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, + Requires<[Pred]>; +} + +// a-b*c => fmad(-b,c, a) +// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c +// b*c-a => fmad(b, c, -a) +// - legal because b*c-a <=> b*c+(-a) +multiclass FPCONTRACT32_SUB_PAT { + def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), + (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, + Requires<[Pred]>; + def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), + (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, + Requires<[Pred]>; +} + +// a-b*c => fmad(-b,c, a) +// b*c-a => fmad(b, c, -a) +multiclass FPCONTRACT64_SUB_PAT { + def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), + (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), + (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, + Requires<[Pred]>; +} + +defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT; +defm FMAF32ext : FPCONTRACT32_SUB_PAT; +defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD; +defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD; +defm FMAF64ext : FPCONTRACT64_SUB_PAT; + +def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "sin.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; +def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "cos.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; + +//----------------------------------- +// Logical Arithmetic +//----------------------------------- + +multiclass LOG_FORMAT { + def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; + def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int16Regs:$b))]>; + def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int64Regs:$b))]>; + def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; +} + +defm OR : LOG_FORMAT<"or", or>; +defm AND : LOG_FORMAT<"and", and>; +defm XOR : LOG_FORMAT<"xor", xor>; + +def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), + "not.pred \t$dst, $src;", + [(set Int1Regs:$dst, (not Int1Regs:$src))]>; +def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + "not.b16 \t$dst, $src;", + [(set Int8Regs:$dst, (not Int8Regs:$src))]>; +def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "not.b16 \t$dst, $src;", + [(set Int16Regs:$dst, (not Int16Regs:$src))]>; +def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "not.b32 \t$dst, $src;", + [(set Int32Regs:$dst, (not Int32Regs:$src))]>; +def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "not.b64 \t$dst, $src;", + [(set Int64Regs:$dst, (not Int64Regs:$src))]>; + +// For shifts, the second src operand must be 32-bit value +multiclass LSHIFT_FORMAT { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int32Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + (i32 imm:$b)))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + (i32 imm:$b)))]>; + def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), + (i32 imm:$b)))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int32Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + (i32 imm:$b)))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + Int32Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + (i32 imm:$b)))]>; +} + +defm SHL : LSHIFT_FORMAT<"shl.b", shl>; + +// For shifts, the second src operand must be 32-bit value +// Need to add cvt for the 8-bits. +multiclass RSHIFT_FORMAT { + def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + Int32Regs:$b))]>; + def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, + (i32 imm:$b)))]>; + def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + Int32Regs:$b))]>; + def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, + (i32 imm:$b)))]>; + def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), + (i32 imm:$b)))]>; + def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + Int32Regs:$b))]>; + def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, + (i32 imm:$b)))]>; + def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), + !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", + !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + Int32Regs:$b))]>; + def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), + !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", + !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), + [(set Int8Regs:$dst, (OpNode Int8Regs:$a, + (i32 imm:$b)))]>; +} + +defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; +defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; + +// 32bit +def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", + !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))), + []>; + +def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32-N->getZExtValue(), MVT::i32); +}]>; + +def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; +def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; + +def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat(".reg .b32 %amt2;\n\t", + !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", + !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; + +def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %lhs;\n\t", + !strconcat(".reg .b32 %rhs;\n\t", + !strconcat(".reg .b32 %amt2;\n\t", + !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", + !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; + +// 64bit +def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + i32imm:$amt1, i32imm:$amt2), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", + !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))), + []>; + +def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat(".reg .u32 %amt2;\n\t", + !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", + !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; + +def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, + Int32Regs:$amt), + !strconcat("{{\n\t", + !strconcat(".reg .b64 %lhs;\n\t", + !strconcat(".reg .b64 %rhs;\n\t", + !strconcat(".reg .u32 %amt2;\n\t", + !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", + !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", + !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", + !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", + !strconcat("}}", ""))))))))), + [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; + + +//----------------------------------- +// Data Movement (Load / Store, Move) +//----------------------------------- + +def ADDRri : ComplexPattern; +def ADDRri64 : ComplexPattern; + +def MEMri : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int32Regs, i32imm); +} +def MEMri64 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int64Regs, i64imm); +} + +def imem : Operand { + let PrintMethod = "printOperand"; +} + +def imemAny : Operand { + let PrintMethod = "printOperand"; +} + +def LdStCode : Operand { + let PrintMethod = "printLdStCode"; +} + +def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; + +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), + "mov.u32 \t$dst, $a;", + [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; + +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), + "mov.u64 \t$dst, $a;", + [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; + +// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp +let IsSimpleMove=1 in { +def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), + "mov.pred \t$dst, $sss;", []>; +def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; +def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; +def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), + "mov.u32 \t$dst, $sss;", []>; +def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), + "mov.u64 \t$dst, $sss;", []>; + +def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "mov.f32 \t$dst, $src;", []>; +def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), + "mov.f64 \t$dst, $src;", []>; +} +def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), + "mov.pred \t$dst, $src;", + [(set Int1Regs:$dst, imm:$src)]>; +def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int8Regs:$dst, imm:$src)]>; +def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int16Regs:$dst, imm:$src)]>; +def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), + "mov.u32 \t$dst, $src;", + [(set Int32Regs:$dst, imm:$src)]>; +def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), + "mov.u64 \t$dst, $src;", + [(set Int64Regs:$dst, imm:$src)]>; + +def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), + "mov.f32 \t$dst, $src;", + [(set Float32Regs:$dst, fpimm:$src)]>; +def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), + "mov.f64 \t$dst, $src;", + [(set Float64Regs:$dst, fpimm:$src)]>; + +def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; + +//---- Copy Frame Index ---- +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), + "add.u32 \t$dst, ${addr:add};", + [(set Int32Regs:$dst, ADDRri:$addr)]>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), + "add.u64 \t$dst, ${addr:add};", + [(set Int64Regs:$dst, ADDRri64:$addr)]>; + +//----------------------------------- +// Comparison and Selection +//----------------------------------- + +// Generate string block like +// { +// .reg .pred p; +// setp.gt.s16 p, %a, %b; +// selp.s16 %dst, -1, 0, p; +// } +// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b +class Set_Str { + string t1 = "{{\n\t.reg .pred p;\n\t"; + string t2 = !strconcat(t1 , OpcStr); + string t3 = !strconcat(t2 , sz1); + string t4 = !strconcat(t3 , " \tp, "); + string t5 = !strconcat(t4 , a); + string t6 = !strconcat(t5 , ", "); + string t7 = !strconcat(t6 , b); + string t8 = !strconcat(t7 , ";\n\tselp.s"); + string t9 = !strconcat(t8 , sz2); + string t10 = !strconcat(t9, " \t"); + string t11 = !strconcat(t10, d); + string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); +} + +// Generate string block like +// { +// .reg .pred p; +// .reg .s16 %temp1; +// .reg .s16 %temp2; +// cvt.s16.s8 %temp1, %a; +// cvt s16.s8 %temp1, %b; +// setp.gt.s16 p, %temp1, %temp2; +// selp.s16 %dst, -1, 0, p; +// } +// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8 +class Set_Stri8 { + string t1 = "{{\n\t.reg .pred p;\n\t"; + string t2 = !strconcat(t1, ".reg ."); + string t3 = !strconcat(t2, type); + string t4 = !strconcat(t3, " %temp1;\n\t"); + string t5 = !strconcat(t4, ".reg ."); + string t6 = !strconcat(t5, type); + string t7 = !strconcat(t6, " %temp2;\n\t"); + string t8 = !strconcat(t7, cvt); + string t9 = !strconcat(t8, " \t%temp1, "); + string t10 = !strconcat(t9, a); + string t11 = !strconcat(t10, ";\n\t"); + string t12 = !strconcat(t11, cvt); + string t13 = !strconcat(t12, " \t%temp2, "); + string t14 = !strconcat(t13, b); + string t15 = !strconcat(t14, ";\n\t"); + string t16 = !strconcat(t15, OpcStr); + string t17 = !strconcat(t16, "16"); + string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t"); + string t19 = !strconcat(t18, "selp.s16 \t"); + string t20 = !strconcat(t19, d); + string s = !strconcat(t20, ", -1, 0, p;\n\t}}"); +} + +multiclass ISET_FORMAT { + def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Set_Stri8.s, + []>; + def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, + Int16Regs:$b), + Set_Str.s, + []>; + def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + Set_Str.s, + []>; + def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, + Int64Regs:$b), + Set_Str.s, + []>; + + def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr.s, + [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri.s, + [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b), + Handle_i8ir.s, + [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; + def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; + def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; + def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; + + def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), + Handle_i8rr.s, + [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; + def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b), + Handle_i8ri.s, + [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; + def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b), + Handle_i8ir.s, + [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; + def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, + Int16Regs:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), + !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; + def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, + Int32Regs:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), + !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; + def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, + Int64Regs:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), + !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; +} + +multiclass FSET_FORMAT { + def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, + Float32Regs:$b), + Set_Str.s, + []>, Requires<[doF32FTZ]>; + def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, + Float32Regs:$b), + Set_Str.s, + []>; + def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, + Float64Regs:$b), + Set_Str.s, + []>; + def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, + Float64Regs:$b), + Set_Str.s, + []>; + + def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a + , Float32Regs:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> + , Requires<[doF32FTZ]>; + def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ]>; + def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ]>; + def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), + !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; + + def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; + def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; + def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; +} + +defm ISetSGT +: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; +defm ISetUGT +: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; +defm ISetSLT +: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; +defm ISetULT +: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; +defm ISetSGE +: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; +defm ISetUGE +: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; +defm ISetSLE +: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; +defm ISetULE +: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; +defm ISetSEQ +: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; +defm ISetUEQ +: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; +defm ISetSNE +: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; +defm ISetUNE +: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; + +def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; +def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; +def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("not.pred \t$dst, temp;\n\t}}","")))), + [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; +def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("not.pred \t$dst, temp;\n\t}}","")))), + [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; + +// Compare 2 i1's and produce a u32 +def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), + [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; +def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int1Regs:$a, Int1Regs:$b), + !strconcat("{{\n\t", + !strconcat(".reg .pred temp;\n\t", + !strconcat("xor.pred \ttemp, $a, $b;\n\t", + !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), + [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; + +defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; +defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; +defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; +defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; +defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; +defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; + +defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; +defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; +defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; +defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; +defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; +defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; + +defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; +defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; + +def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), + (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), + (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; +def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>; +def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst), + (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>; +def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst), + (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>; +def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst), + (ins i8imm:$a, i8imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; +def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; +def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), + (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; +def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), + (ins i16imm:$a, i16imm:$b, Int1Regs:$p), + "selp.b16 \t$dst, $a, $b, $p;", + [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; +def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; +def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), + (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; +def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), + (ins i32imm:$a, i32imm:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; +def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; +def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), + (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; +def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), + (ins i64imm:$a, i64imm:$b, Int1Regs:$p), + "selp.b64 \t$dst, $a, $b, $p;", + [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; + +def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, + (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; +def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; +def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; +def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, f32imm:$b, Int1Regs:$p), + "selp.f32 \t$dst, $a, $b, $p;", + [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; + +def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, + (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; +def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; +def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), + "selp.f64 \t$dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; +def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, f64imm:$b, Int1Regs:$p), + "selp.f64 \t $dst, $a, $b, $p;", + [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; + +//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, +// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisInt<2>]>; +def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, + SDTCisInt<1>, SDTCisInt<2>]>; +def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; +def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; +def SDTCallValProfile : SDTypeProfile<1, 0, []>; +def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; +def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; +def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; + +def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", + SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", + SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, + []>; +def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", + SDTPseudoUseParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPSideEffect]>; + +class LoadParamMemInst : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t$dst, [retval0+$b];"), + [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; + +class LoadParamRegInst : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat(!strconcat("mov", opstr), + "\t$dst, retval$b;"), + [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; + +class StoreParamInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], $val;"), + [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; + +class MoveToParamInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("mov", opstr), + "\tparam$a, $val;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; + +class StoreRetvalInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval0+$a], $val;"), + [(StoreRetval (i32 imm:$a), regclass:$val)]>; + +class MoveToRetvalInst : + NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), + !strconcat(!strconcat("mov", opstr), + "\tfunc_retval$num, $val;"), + [(MoveToRetval (i32 imm:$num), regclass:$val)]>; + +class MoveRetvalInst : + NVPTXInst<(outs), (ins regclass:$val), + !strconcat(!strconcat("mov", opstr), + "\tfunc_retval0, $val;"), + [(MoveRetval regclass:$val)]>; + +def PrintCallRetInst1 : NVPTXInst<(outs), (ins), +"call (retval0), ", + [(PrintCall (i32 1))]>; +def PrintCallRetInst2 : NVPTXInst<(outs), (ins), +"call (retval0, retval1), ", + [(PrintCall (i32 2))]>; +def PrintCallRetInst3 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2), ", + [(PrintCall (i32 3))]>; +def PrintCallRetInst4 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3), ", + [(PrintCall (i32 4))]>; +def PrintCallRetInst5 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCall (i32 5))]>; +def PrintCallRetInst6 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCall (i32 6))]>; +def PrintCallRetInst7 : NVPTXInst<(outs), (ins), +"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCall (i32 7))]>; +def PrintCallRetInst8 : NVPTXInst<(outs), (ins), +!strconcat("call (retval0, retval1, retval2, retval3, retval4", + ", retval5, retval6, retval7), "), + [(PrintCall (i32 8))]>; + +def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", + [(PrintCall (i32 0))]>; + +def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), +"call.uni (retval0), ", + [(PrintCallUni (i32 1))]>; +def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1), ", + [(PrintCallUni (i32 2))]>; +def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2), ", + [(PrintCallUni (i32 3))]>; +def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3), ", + [(PrintCallUni (i32 4))]>; +def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4), ", + [(PrintCallUni (i32 5))]>; +def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", + [(PrintCallUni (i32 6))]>; +def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), +"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", + [(PrintCallUni (i32 7))]>; +def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), +!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", + ", retval5, retval6, retval7), "), + [(PrintCallUni (i32 8))]>; + +def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", + [(PrintCallUni (i32 0))]>; + +def LoadParamMemI64 : LoadParamMemInst; +def LoadParamMemI32 : LoadParamMemInst; +def LoadParamMemI16 : LoadParamMemInst; +def LoadParamMemI8 : LoadParamMemInst; + +//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), +// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", +// "cvt.u16.u32\t$dst, temp_param_reg;"), +// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; +//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), +// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", +// "cvt.u16.u32\t$dst, temp_param_reg;"), +// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; + +def LoadParamMemF32 : LoadParamMemInst; +def LoadParamMemF64 : LoadParamMemInst; + +def LoadParamRegI64 : LoadParamRegInst; +def LoadParamRegI32 : LoadParamRegInst; +def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), + "cvt.u16.u32\t$dst, retval$b;", + [(set Int16Regs:$dst, + (LoadParam (i32 0), (i32 imm:$b)))]>; +def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), + "cvt.u16.u32\t$dst, retval$b;", + [(set Int8Regs:$dst, + (LoadParam (i32 0), (i32 imm:$b)))]>; + +def LoadParamRegF32 : LoadParamRegInst; +def LoadParamRegF64 : LoadParamRegInst; + +def StoreParamI64 : StoreParamInst; +def StoreParamI32 : StoreParamInst; + +def StoreParamI16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + "st.param.b16\t[param$a+$b], $val;", + [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + +def StoreParamI8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + "st.param.b8\t[param$a+$b], $val;", + [(StoreParam + (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreParamS32I16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; +def StoreParamU32I16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; + +def StoreParamU32I8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; +def StoreParamS32I8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", + "st.param.b32\t[param$a+$b], temp_param_reg;"), + [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreParamF32 : StoreParamInst; +def StoreParamF64 : StoreParamInst; + +def MoveToParamI64 : MoveToParamInst; +def MoveToParamI32 : MoveToParamInst; +def MoveToParamF64 : MoveToParamInst; +def MoveToParamF32 : MoveToParamInst; +def MoveToParamI16 : NVPTXInst<(outs), + (ins Int16Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "mov.b32\tparam$a, temp_param_reg;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; +def MoveToParamI8 : NVPTXInst<(outs), + (ins Int8Regs:$val, i32imm:$a, i32imm:$b), + !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", + "mov.b32\tparam$a, temp_param_reg;"), + [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; + +def StoreRetvalI64 : StoreRetvalInst; +def StoreRetvalI32 : StoreRetvalInst; +def StoreRetvalI16 : StoreRetvalInst; +def StoreRetvalI8 : StoreRetvalInst; + +//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), +// !strconcat("\{\n\t", +// !strconcat(".reg .b32 temp_retval_reg;\n\t", +// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", +// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), +// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>; +//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a), +// !strconcat("\{\n\t", +// !strconcat(".reg .b32 temp_retval_reg;\n\t", +// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", +// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), +// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>; + +def StoreRetvalF64 : StoreRetvalInst; +def StoreRetvalF32 : StoreRetvalInst; + +def MoveRetvalI64 : MoveRetvalInst; +def MoveRetvalI32 : MoveRetvalInst; +def MoveRetvalI16 : MoveRetvalInst; +def MoveRetvalI8 : MoveRetvalInst; +def MoveRetvalF64 : MoveRetvalInst; +def MoveRetvalF32 : MoveRetvalInst; + +def MoveToRetvalI64 : MoveToRetvalInst; +def MoveToRetvalI32 : MoveToRetvalInst; +def MoveToRetvalF64 : MoveToRetvalInst; +def MoveToRetvalF32 : MoveToRetvalInst; +def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), + "cvt.u32.u16\tfunc_retval$num, $val;", + [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; +def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val), + "cvt.u32.u16\tfunc_retval$num, $val;", + [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>; + +def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; +def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; +def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; +def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; + +class CallArgInst : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), regclass:$a)]>; + +class LastCallArgInst : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), regclass:$a)]>; + +def CallArgI64 : CallArgInst; +def CallArgI32 : CallArgInst; +def CallArgI16 : CallArgInst; +def CallArgI8 : CallArgInst; + +def CallArgF64 : CallArgInst; +def CallArgF32 : CallArgInst; + +def LastCallArgI64 : LastCallArgInst; +def LastCallArgI32 : LastCallArgInst; +def LastCallArgI16 : LastCallArgInst; +def LastCallArgI8 : LastCallArgInst; + +def LastCallArgF64 : LastCallArgInst; +def LastCallArgF32 : LastCallArgInst; + +def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", + [(CallArg (i32 0), (i32 imm:$a))]>; +def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", + [(LastCallArg (i32 0), (i32 imm:$a))]>; + +def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", + [(CallArg (i32 1), (i32 imm:$a))]>; +def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", + [(LastCallArg (i32 1), (i32 imm:$a))]>; + +def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), + "$addr, ", + [(CallVoid (Wrapper tglobaladdr:$addr))]>; +def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), + "$addr, ", + [(CallVoid Int32Regs:$addr)]>; +def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), + "$addr, ", + [(CallVoid Int64Regs:$addr)]>; +def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), + ", prototype_$val;", + [(Prototype (i32 imm:$val))]>; + +def DeclareRetMemInst : NVPTXInst<(outs), + (ins i32imm:$align, i32imm:$size, i32imm:$num), + ".param .align $align .b8 retval$num[$size];", + [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".param .b$size retval$num;", + [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".reg .b$size retval$num;", + [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; + +def DeclareParamInst : NVPTXInst<(outs), + (ins i32imm:$align, i32imm:$a, i32imm:$size), + ".param .align $align .b8 param$a[$size];", + [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; +def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".param .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; +def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".reg .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; + +class MoveParamInst : + NVPTXInst<(outs regclass:$dst), (ins regclass:$src), + !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), + [(set regclass:$dst, (MoveParam regclass:$src))]>; + +def MoveParamI64 : MoveParamInst; +def MoveParamI32 : MoveParamInst; +def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.u16.u32\t$dst, $src;", + [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; +def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), + "cvt.u16.u32\t$dst, $src;", + [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>; +def MoveParamF64 : MoveParamInst; +def MoveParamF32 : MoveParamInst; + +class PseudoUseParamInst : + NVPTXInst<(outs), (ins regclass:$src), + "// Pseudo use of $src", + [(PseudoUseParam regclass:$src)]>; + +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamI8 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; + + +// +// Load / Store Handling +// +multiclass LD { + def _avar : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr];"), []>; + def _areg : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr];"), []>; + def _ari : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr+$offset];"), []>; + def _asi : NVPTXInst<(outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), +!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t$dst, [$addr+$offset];"), []>; +} + +let mayLoad=1, neverHasSideEffects=1 in { +defm LD_i8 : LD; +defm LD_i16 : LD; +defm LD_i32 : LD; +defm LD_i64 : LD; +defm LD_f32 : LD; +defm LD_f64 : LD; +} + +let VecInstType=isVecLD.Value, mayLoad=1, neverHasSideEffects=1 in { +defm LD_v2i8 : LD; +defm LD_v4i8 : LD; +defm LD_v2i16 : LD; +defm LD_v4i16 : LD; +defm LD_v2i32 : LD; +defm LD_v4i32 : LD; +defm LD_v2f32 : LD; +defm LD_v4f32 : LD; +defm LD_v2i64 : LD; +defm LD_v2f64 : LD; +} + +multiclass ST { + def _avar : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr], $src;"), []>; + def _areg : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr], $src;"), []>; + def _ari : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr+$offset], $src;"), []>; + def _asi : NVPTXInst<(outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), +!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", + " \t[$addr+$offset], $src;"), []>; +} + +let mayStore=1, neverHasSideEffects=1 in { +defm ST_i8 : ST; +defm ST_i16 : ST; +defm ST_i32 : ST; +defm ST_i64 : ST; +defm ST_f32 : ST; +defm ST_f64 : ST; +} + +let VecInstType=isVecST.Value, mayStore=1, neverHasSideEffects=1 in { +defm ST_v2i8 : ST; +defm ST_v4i8 : ST; +defm ST_v2i16 : ST; +defm ST_v4i16 : ST; +defm ST_v2i32 : ST; +defm ST_v4i32 : ST; +defm ST_v2f32 : ST; +defm ST_v4f32 : ST; +defm ST_v2i64 : ST; +defm ST_v2f64 : ST; +} + +// The following is used only in and after vector elementizations. +// Vector elementization happens at the machine instruction level, so the +// following instruction +// never appears in the DAG. +multiclass LD_VEC { + def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; + def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; + def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, + regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; + def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; + def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), + []>; +} +let mayLoad=1, neverHasSideEffects=1 in { +defm LDV_i8 : LD_VEC; +defm LDV_i16 : LD_VEC; +defm LDV_i32 : LD_VEC; +defm LDV_i64 : LD_VEC; +defm LDV_f32 : LD_VEC; +defm LDV_f64 : LD_VEC; +} + +multiclass ST_VEC { + def _v2_avar : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_areg : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; + def _v2_ari : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v2_asi : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, + i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; + def _v4_avar : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_areg : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; + def _v4_ari : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; + def _v4_asi : NVPTXInst<(outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), + []>; +} +let mayStore=1, neverHasSideEffects=1 in { +defm STV_i8 : ST_VEC; +defm STV_i16 : ST_VEC; +defm STV_i32 : ST_VEC; +defm STV_i64 : ST_VEC; +defm STV_f32 : ST_VEC; +defm STV_f64 : ST_VEC; +} + + +//---- Conversion ---- + +multiclass CVT_INT_TO_FP { +// FIXME: need to add f16 support +// def CVTf16i8 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>; +// def CVTf16i16 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; +// def CVTf16i32 : +// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), +// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), +// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; +// def CVTf16i64: +// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), +// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), +// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; + + def CVTf32i1 : + NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), + "selp.f32 \t$d, 1.0, 0.0, $a;", + [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; + def CVTf32i8 : + NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>; + def CVTf32i16 : + NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; + def CVTf32i32 : + NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; + def CVTf32i64: + NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), + !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), + [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; + + def CVTf64i1 : + NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), + "selp.f64 \t$d, 1.0, 0.0, $a;", + [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; + def CVTf64i8 : + NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>; + def CVTf64i16 : + NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; + def CVTf64i32 : + NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; + def CVTf64i64: + NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), + !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), + [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; +} + +defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; +defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; + +multiclass CVT_FP_TO_INT { +// FIXME: need to add f16 support +// def CVTi8f16: +// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"), +// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi8f32_ftz: + NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi8f32: + NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi8f64: + NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), + [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi16f16: +// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), +// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi16f32_ftz: + NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi16f32: + NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi16f64: + NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), + [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi32f16: def CVTi32f16: +// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), +// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi32f32_ftz: + NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi32f32: + NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi32f64: + NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), + [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; + +// FIXME: need to add f16 support +// def CVTi64f16: +// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), +// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), +// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; + def CVTi64f32_ftz: + NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; + def CVTi64f32: + NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; + def CVTi64f64: + NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), + !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), + [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; +} + +defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; +defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; + +multiclass INT_EXTEND_UNSIGNED_1 { + def ext1to8: + NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), + "selp.u16 \t$d, 1, 0, $a;", + [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), + "selp.u16 \t$d, 1, 0, $a;", + [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), + "selp.u32 \t$d, 1, 0, $a;", + [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), + "selp.u64 \t$d, 1, 0, $a;", + [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; +} + +multiclass INT_EXTEND_SIGNED_1 { + def ext1to8: + NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), + "selp.s16 \t$d, -1, 0, $a;", + [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), + "selp.s16 \t$d, -1, 0, $a;", + [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), + "selp.s32 \t$d, -1, 0, $a;", + [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; + def ext1to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), + "selp.s64 \t$d, -1, 0, $a;", + [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; +} + +multiclass INT_EXTEND { + // All Int8Regs are emiited as 16bit registers in ptx. + // And there is no selp.u8 in ptx. + def ext8to16: + NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>; + def ext8to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>; + def ext8to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "8 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>; + def ext16to32: + NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", + !strconcat(OpStr, "16 \t$d, $a;")))), + [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; + def ext16to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "16 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; + def ext32to64: + NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), + !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", + !strconcat(OpStr, "32 \t$d, $a;")))), + [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; +} + +defm Sint_extend_1 : INT_EXTEND_SIGNED_1; +defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1; +defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1; + +defm Sint_extend : INT_EXTEND <"s", sext>; +defm Zint_extend : INT_EXTEND <"u", zext>; +defm Aint_extend : INT_EXTEND <"u", anyext>; + +class TRUNC_to1_asm { + string s = !strconcat("{{\n\t", + !strconcat(".reg ", + !strconcat(sz, + !strconcat(" temp;\n\t", + !strconcat("and", + !strconcat(sz, + !strconcat("\t temp, $a, 1;\n\t", + !strconcat("setp", + !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); +} + +def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "cvt.u32.u64 \t$d, $a;", + [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), + "cvt.u16.u64 \t$d, $a;", + [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a), + "cvt.u8.u64 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), + "cvt.u16.u32 \t$d, $a;", + [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a), + "cvt.u8.u32 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a), + "cvt.u8.u16 \t$d, $a;", + [(set Int8Regs:$d, (trunc Int16Regs:$a))]>; +def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + TRUNC_to1_asm<".b64">.s, + [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; +def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + TRUNC_to1_asm<".b32">.s, + [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; +def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), + TRUNC_to1_asm<".b16">.s, + [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; +def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a), + TRUNC_to1_asm<".b16">.s, + [(set Int1Regs:$d, (trunc Int8Regs:$a))]>; + +// Select instructions +def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b), + (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), + (SELECTi16rr Int16Regs:$a, Int16Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), + (SELECTi32rr Int32Regs:$a, Int32Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), + (SELECTi64rr Int64Regs:$a, Int64Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), + (SELECTf32rr Float32Regs:$a, Float32Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; +def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), + (SELECTf64rr Float64Regs:$a, Float64Regs:$b, + (TRUNC_32to1 Int32Regs:$pred))>; + +class F_BITCONVERT : + NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), + !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), + [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; + +def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; +def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; +def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; +def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; + +// pack a set of smaller int registers to a larger int register +def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int8Regs:$s1, Int8Regs:$s2, + Int8Regs:$s3, Int8Regs:$s4), + !strconcat("{{\n\t.reg .b8\t%t<4>;", + !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", + !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", + !strconcat("\n\tcvt.u8.u8\t%t2, $s3;", + !strconcat("\n\tcvt.u8.u8\t%t3, $s4;", + "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))), + []>; +def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", + []>; +def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d), + (ins Int8Regs:$s1, Int8Regs:$s2), + !strconcat("{{\n\t.reg .b8\t%t<2>;", + !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", + !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", + "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))), + []>; +def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2), + "mov.b32\t$d, {{$s1, $s2}};", + []>; +def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int32Regs:$s1, Int32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", + []>; +def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), + (ins Float32Regs:$s1, Float32Regs:$s2), + "mov.b64\t$d, {{$s1, $s2}};", + []>; + +// unpack a larger int register to a set of smaller int registers +def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2, + Int8Regs:$d3, Int8Regs:$d4), + (ins Int32Regs:$s), + !strconcat("{{\n\t.reg .b8\t%t<4>;", + !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;", + !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", + !strconcat("\n\tcvt.u8.u8\t$d2, %t1;", + !strconcat("\n\tcvt.u8.u8\t$d3, %t2;", + "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))), + []>; +def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", + []>; +def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2), + (ins Int16Regs:$s), + !strconcat("{{\n\t.reg .b8\t%t<2>;", + !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;", + !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", + "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))), + []>; +def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), + (ins Int32Regs:$s), + "mov.b32\t{{$d1, $d2}}, $s;", + []>; +def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), + (ins Int64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", + []>; +def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), + (ins Float64Regs:$s), + "mov.b64\t{{$d1, $d2}}, $s;", + []>; + +def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), + "cvt.rn.ftz.f32.f64 \t$d, $a;", + [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; + +def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), + "cvt.rn.f32.f64 \t$d, $a;", + [(set Float32Regs:$d, (fround Float64Regs:$a))]>; + +def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), + "cvt.ftz.f64.f32 \t$d, $a;", + [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; + +def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), + "cvt.f64.f32 \t$d, $a;", + [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; + +def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +//----------------------------------- +// Control-flow +//----------------------------------- + +let isTerminator=1 in { + let isReturn=1, isBarrier=1 in + def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; + + let isBranch=1 in + def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@$a bra \t$target;", + [(brcond Int1Regs:$a, bb:$target)]>; + let isBranch=1 in + def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@!$a bra \t$target;", + []>; + + let isBranch=1, isBarrier=1 in + def GOTO : NVPTXInst<(outs), (ins brtarget:$target), + "bra.uni \t$target;", + [(br bb:$target)]>; +} + +def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch + (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; + +// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a +// conditional branch if +// the target block is the next block so that the code can fall through to the +// target block. +// The invertion is done by 'xor condition, 1', which will be translated to +// (setne condition, -1). +// Since ptx supports '@!pred bra target', we should use it. +def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), + (CBranchOther Int1Regs:$a, bb:$target)>; + +// Call +def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPSideEffect]>; + +def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def calltarget : Operand; +let isCall=1 in { + def CALL : NVPTXInst<(outs), (ins calltarget:$dst), + "call \t$dst, (1);", []>; +} + +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; + +// Pseudo instructions. +class Pseudo pattern> + : NVPTXInst; + +// @TODO: We use some tricks here to emit curly braces. Can we clean this up +// a bit without TableGen modifications? +def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), + "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// }}", + [(callseq_start timm:$amt)]>; +def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "\n\t//{{\n\t}}// Callseq End $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; + +// trap instruction + +def trapinst : NVPTXInst<(outs), (ins), + "trap;", + [(trap)]>; + +include "NVPTXVector.td" + +include "NVPTXIntrinsics.td" + + +//----------------------------------- +// Notes +//----------------------------------- +// BSWAP is currently expanded. The following is a more efficient +// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register +// - for sm_20, use pmpt (use vector scalar mov to get the pack and +// unpack). sm_20 supports native 32-bit register, but not native 16-bit +// register. diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td new file mode 100644 index 0000000..028a94b --- /dev/null +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -0,0 +1,1675 @@ +//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def immFloat0 : PatLeaf<(fpimm), [{ + float f = (float)N->getValueAPF().convertToFloat(); + return (f==0.0f); +}]>; + +def immFloat1 : PatLeaf<(fpimm), [{ + float f = (float)N->getValueAPF().convertToFloat(); + return (f==1.0f); +}]>; + +def immDouble0 : PatLeaf<(fpimm), [{ + double d = (double)N->getValueAPF().convertToDouble(); + return (d==0.0); +}]>; + +def immDouble1 : PatLeaf<(fpimm), [{ + double d = (double)N->getValueAPF().convertToDouble(); + return (d==1.0); +}]>; + + + +//----------------------------------- +// Synchronization Functions +//----------------------------------- +def INT_CUDA_SYNCTHREADS : NVPTXInst<(outs), (ins), + "bar.sync \t0;", + [(int_cuda_syncthreads)]>; +def INT_BARRIER0 : NVPTXInst<(outs), (ins), + "bar.sync \t0;", + [(int_nvvm_barrier0)]>; +def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t", + !strconcat("}}", ""))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; +def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat(".reg .pred \t%p2; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t", + !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", + !strconcat("}}", ""))))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; +def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), + !strconcat("{{ \n\t", + !strconcat(".reg .pred \t%p1; \n\t", + !strconcat(".reg .pred \t%p2; \n\t", + !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t", + !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t", + !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t", + !strconcat("}}", ""))))))), + [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; + + +//----------------------------------- +// Explicit Memory Fence Functions +//----------------------------------- +class MEMBAR : + NVPTXInst<(outs), (ins), + StrOp, [(IntOP)]>; + +def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; +def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; +def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; + + +//----------------------------------- +// Math Functions +//----------------------------------- + +// Map min(1.0, max(0.0, x)) to sat(x) +multiclass SAT { + + // fmin(1.0, fmax(0.0, x)) => sat(x) + def SAT11 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp f1:$srcf0 , + (IntMaxOp f0:$srcf1, regclass:$src)))]>; + + // fmin(1.0, fmax(x, 0.0)) => sat(x) + def SAT12 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp f1:$srcf0 , + (IntMaxOp regclass:$src, f0:$srcf1)))]>; + + // fmin(fmax(0.0, x), 1.0) => sat(x) + def SAT13 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp + (IntMaxOp f0:$srcf0, regclass:$src), f1:$srcf1))]>; + + // fmin(fmax(x, 0.0), 1.0) => sat(x) + def SAT14 : NVPTXInst<(outs regclass:$dst), + (ins fimm:$srcf0, fimm:$srcf1, regclass:$src), + OpStr, + [(set regclass:$dst, (IntMinOp + (IntMaxOp regclass:$src, f0:$srcf0), f1:$srcf1))]>; + +} +// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x +// is NaN +// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. +// Same story for fmax, fmin. + +defm SAT_fmin_fmax_f : SAT; +defm SAT_fmin_fmax_d : SAT; + + +// We need a full string for OpcStr here because we need to deal with case like +// INT_PTX_RECIP. +class F_MATH_1 + : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), + OpcStr, + [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; + +// We need a full string for OpcStr here because we need to deal with the case +// like INT_PTX_NATIVE_POWR_F. +class F_MATH_2 + : NVPTXInst<(outs t_regclass:$dst), + (ins s0_regclass:$src0, s1_regclass:$src1), + OpcStr, + [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; + +class F_MATH_3 + : NVPTXInst<(outs t_regclass:$dst), + (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), + OpcStr, + [(set t_regclass:$dst, + (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; + +// +// MISC +// + +def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_clz_i>; +def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs, + int_nvvm_clz_ll>; + +def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_popc_i>; +def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs, + int_nvvm_popc_ll>; + +def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, + Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; + +// +// Min Max +// + +def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_min_i>; +def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_min_ui>; + +def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_min_ll>; +def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_min_ull>; + +def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_max_i>; +def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_max_ui>; + +def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_max_ll>; +def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_max_ull>; + +def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_fmin_f>; +def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; + +def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_fmax_f>; +def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; + +def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, + Float64Regs, Float64Regs, int_nvvm_fmin_d>; +def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, + Float64Regs, Float64Regs, int_nvvm_fmax_d>; + +// +// Multiplication +// + +def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_mulhi_i>; +def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, + Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; + +def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; +def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, + Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; + +def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; +def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; +def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; +def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; +def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; +def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; +def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; +def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; + +def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; +def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; +def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; +def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; + +def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", + Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; +def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", + Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; + +// +// Div +// + +def INT_NVVM_DIV_APPROX_FTZ_F + : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, + Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; +def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; + +def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; +def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; +def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; +def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; +def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; +def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; +def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; +def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; + +def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; +def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; +def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; +def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; + +// +// Brev +// + +def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_brev32>; +def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs, + int_nvvm_brev64>; + +// +// Sad +// + +def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", + Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; +def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", + Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; + +// +// Floor Ceil +// + +def INT_NVVM_FLOOR_FTZ_F : F_MATH_1<"cvt.rmi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_floor_ftz_f>; +def INT_NVVM_FLOOR_F : F_MATH_1<"cvt.rmi.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_floor_f>; +def INT_NVVM_FLOOR_D : F_MATH_1<"cvt.rmi.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_floor_d>; + +def INT_NVVM_CEIL_FTZ_F : F_MATH_1<"cvt.rpi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ceil_ftz_f>; +def INT_NVVM_CEIL_F : F_MATH_1<"cvt.rpi.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ceil_f>; +def INT_NVVM_CEIL_D : F_MATH_1<"cvt.rpi.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_ceil_d>; + +// +// Abs +// + +def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs, + int_nvvm_abs_i>; +def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs, + int_nvvm_abs_ll>; + +def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_fabs_ftz_f>; +def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_fabs_f>; + +def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_fabs_d>; + +// +// Round +// + +def INT_NVVM_ROUND_FTZ_F : F_MATH_1<"cvt.rni.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_round_ftz_f>; +def INT_NVVM_ROUND_F : F_MATH_1<"cvt.rni.f32.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_round_f>; + +def INT_NVVM_ROUND_D : F_MATH_1<"cvt.rni.f64.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_round_d>; + +// +// Trunc +// + +def INT_NVVM_TRUNC_FTZ_F : F_MATH_1<"cvt.rzi.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_trunc_ftz_f>; +def INT_NVVM_TRUNC_F : F_MATH_1<"cvt.rzi.f32.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_trunc_f>; + +def INT_NVVM_TRUNC_D : F_MATH_1<"cvt.rzi.f64.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_trunc_d>; + +// +// Saturate +// + +def INT_NVVM_SATURATE_FTZ_F : F_MATH_1<"cvt.sat.ftz.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_saturate_ftz_f>; +def INT_NVVM_SATURATE_F : F_MATH_1<"cvt.sat.f32.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_saturate_f>; + +def INT_NVVM_SATURATE_D : F_MATH_1<"cvt.sat.f64.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_saturate_d>; + +// +// Exp2 Log2 +// + +def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; +def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; +def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; + +def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; +def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; +def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; + +// +// Sin Cos +// + +def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; +def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; + +def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; +def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; + +// +// Fma +// + +def INT_NVVM_FMA_RN_FTZ_F + : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; +def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; +def INT_NVVM_FMA_RZ_FTZ_F + : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; +def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; +def INT_NVVM_FMA_RM_FTZ_F + : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; +def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; +def INT_NVVM_FMA_RP_FTZ_F + : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, + Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; +def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", + Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; + +def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; +def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; +def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; +def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", + Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; + +// +// Rcp +// + +def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; +def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; +def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; +def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; +def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; +def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; +def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; +def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; + +def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rn_d>; +def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rz_d>; +def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rm_d>; +def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_rcp_rp_d>; + +def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; + +// +// Sqrt +// + +def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; +def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rn_f>; +def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; +def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rz_f>; +def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; +def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rm_f>; +def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; +def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, + Float32Regs, int_nvvm_sqrt_rp_f>; +def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; +def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; + +def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rn_d>; +def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rz_d>; +def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rm_d>; +def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, + Float64Regs, int_nvvm_sqrt_rp_d>; + +// +// Rsqrt +// + +def INT_NVVM_RSQRT_APPROX_FTZ_F + : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, + int_nvvm_rsqrt_approx_ftz_f>; +def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", + Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; +def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", + Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; + +// +// Add +// + +def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; +def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; +def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; +def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; +def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; +def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; +def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; +def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", + Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; + +def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; +def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; +def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; +def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", + Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; + +// +// Convert +// + +def INT_NVVM_D2F_RN_FTZ : F_MATH_1<"cvt.rn.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rn_ftz>; +def INT_NVVM_D2F_RN : F_MATH_1<"cvt.rn.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rn>; +def INT_NVVM_D2F_RZ_FTZ : F_MATH_1<"cvt.rz.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rz_ftz>; +def INT_NVVM_D2F_RZ : F_MATH_1<"cvt.rz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rz>; +def INT_NVVM_D2F_RM_FTZ : F_MATH_1<"cvt.rm.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rm_ftz>; +def INT_NVVM_D2F_RM : F_MATH_1<"cvt.rm.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rm>; +def INT_NVVM_D2F_RP_FTZ : F_MATH_1<"cvt.rp.ftz.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rp_ftz>; +def INT_NVVM_D2F_RP : F_MATH_1<"cvt.rp.f32.f64 \t$dst, $src0;", + Float32Regs, Float64Regs, int_nvvm_d2f_rp>; + +def INT_NVVM_D2I_RN : F_MATH_1<"cvt.rni.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rn>; +def INT_NVVM_D2I_RZ : F_MATH_1<"cvt.rzi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rz>; +def INT_NVVM_D2I_RM : F_MATH_1<"cvt.rmi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rm>; +def INT_NVVM_D2I_RP : F_MATH_1<"cvt.rpi.s32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2i_rp>; + +def INT_NVVM_D2UI_RN : F_MATH_1<"cvt.rni.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rn>; +def INT_NVVM_D2UI_RZ : F_MATH_1<"cvt.rzi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rz>; +def INT_NVVM_D2UI_RM : F_MATH_1<"cvt.rmi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rm>; +def INT_NVVM_D2UI_RP : F_MATH_1<"cvt.rpi.u32.f64 \t$dst, $src0;", + Int32Regs, Float64Regs, int_nvvm_d2ui_rp>; + +def INT_NVVM_I2D_RN : F_MATH_1<"cvt.rn.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rn>; +def INT_NVVM_I2D_RZ : F_MATH_1<"cvt.rz.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rz>; +def INT_NVVM_I2D_RM : F_MATH_1<"cvt.rm.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rm>; +def INT_NVVM_I2D_RP : F_MATH_1<"cvt.rp.f64.s32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_i2d_rp>; + +def INT_NVVM_UI2D_RN : F_MATH_1<"cvt.rn.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rn>; +def INT_NVVM_UI2D_RZ : F_MATH_1<"cvt.rz.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rz>; +def INT_NVVM_UI2D_RM : F_MATH_1<"cvt.rm.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rm>; +def INT_NVVM_UI2D_RP : F_MATH_1<"cvt.rp.f64.u32 \t$dst, $src0;", + Float64Regs, Int32Regs, int_nvvm_ui2d_rp>; + +def INT_NVVM_F2I_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rn_ftz>; +def INT_NVVM_F2I_RN : F_MATH_1<"cvt.rni.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rn>; +def INT_NVVM_F2I_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rz_ftz>; +def INT_NVVM_F2I_RZ : F_MATH_1<"cvt.rzi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rz>; +def INT_NVVM_F2I_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rm_ftz>; +def INT_NVVM_F2I_RM : F_MATH_1<"cvt.rmi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rm>; +def INT_NVVM_F2I_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2i_rp_ftz>; +def INT_NVVM_F2I_RP : F_MATH_1<"cvt.rpi.s32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2i_rp>; + +def INT_NVVM_F2UI_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rn_ftz>; +def INT_NVVM_F2UI_RN : F_MATH_1<"cvt.rni.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rn>; +def INT_NVVM_F2UI_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rz_ftz>; +def INT_NVVM_F2UI_RZ : F_MATH_1<"cvt.rzi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rz>; +def INT_NVVM_F2UI_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rm_ftz>; +def INT_NVVM_F2UI_RM : F_MATH_1<"cvt.rmi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rm>; +def INT_NVVM_F2UI_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u32.f32 \t$dst, $src0;", + Int32Regs, Float32Regs, int_nvvm_f2ui_rp_ftz>; +def INT_NVVM_F2UI_RP : F_MATH_1<"cvt.rpi.u32.f32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_f2ui_rp>; + +def INT_NVVM_I2F_RN : F_MATH_1<"cvt.rn.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rn>; +def INT_NVVM_I2F_RZ : F_MATH_1<"cvt.rz.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rz>; +def INT_NVVM_I2F_RM : F_MATH_1<"cvt.rm.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rm>; +def INT_NVVM_I2F_RP : F_MATH_1<"cvt.rp.f32.s32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_i2f_rp>; + +def INT_NVVM_UI2F_RN : F_MATH_1<"cvt.rn.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rn>; +def INT_NVVM_UI2F_RZ : F_MATH_1<"cvt.rz.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rz>; +def INT_NVVM_UI2F_RM : F_MATH_1<"cvt.rm.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rm>; +def INT_NVVM_UI2F_RP : F_MATH_1<"cvt.rp.f32.u32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_ui2f_rp>; + +def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", + Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; + +def INT_NVVM_D2I_LO : F_MATH_1; +def INT_NVVM_D2I_HI : F_MATH_1; + +def INT_NVVM_F2LL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rn_ftz>; +def INT_NVVM_F2LL_RN : F_MATH_1<"cvt.rni.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rn>; +def INT_NVVM_F2LL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rz_ftz>; +def INT_NVVM_F2LL_RZ : F_MATH_1<"cvt.rzi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rz>; +def INT_NVVM_F2LL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rm_ftz>; +def INT_NVVM_F2LL_RM : F_MATH_1<"cvt.rmi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rm>; +def INT_NVVM_F2LL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.s64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ll_rp_ftz>; +def INT_NVVM_F2LL_RP : F_MATH_1<"cvt.rpi.s64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ll_rp>; + +def INT_NVVM_F2ULL_RN_FTZ : F_MATH_1<"cvt.rni.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rn_ftz>; +def INT_NVVM_F2ULL_RN : F_MATH_1<"cvt.rni.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rn>; +def INT_NVVM_F2ULL_RZ_FTZ : F_MATH_1<"cvt.rzi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rz_ftz>; +def INT_NVVM_F2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rz>; +def INT_NVVM_F2ULL_RM_FTZ : F_MATH_1<"cvt.rmi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rm_ftz>; +def INT_NVVM_F2ULL_RM : F_MATH_1<"cvt.rmi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rm>; +def INT_NVVM_F2ULL_RP_FTZ : F_MATH_1<"cvt.rpi.ftz.u64.f32 \t$dst, $src0;", + Int64Regs, Float32Regs, int_nvvm_f2ull_rp_ftz>; +def INT_NVVM_F2ULL_RP : F_MATH_1<"cvt.rpi.u64.f32 \t$dst, $src0;", Int64Regs, + Float32Regs, int_nvvm_f2ull_rp>; + +def INT_NVVM_D2LL_RN : F_MATH_1<"cvt.rni.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rn>; +def INT_NVVM_D2LL_RZ : F_MATH_1<"cvt.rzi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rz>; +def INT_NVVM_D2LL_RM : F_MATH_1<"cvt.rmi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rm>; +def INT_NVVM_D2LL_RP : F_MATH_1<"cvt.rpi.s64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ll_rp>; + +def INT_NVVM_D2ULL_RN : F_MATH_1<"cvt.rni.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rn>; +def INT_NVVM_D2ULL_RZ : F_MATH_1<"cvt.rzi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rz>; +def INT_NVVM_D2ULL_RM : F_MATH_1<"cvt.rmi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rm>; +def INT_NVVM_D2ULL_RP : F_MATH_1<"cvt.rpi.u64.f64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_d2ull_rp>; + +def INT_NVVM_LL2F_RN : F_MATH_1<"cvt.rn.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rn>; +def INT_NVVM_LL2F_RZ : F_MATH_1<"cvt.rz.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rz>; +def INT_NVVM_LL2F_RM : F_MATH_1<"cvt.rm.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rm>; +def INT_NVVM_LL2F_RP : F_MATH_1<"cvt.rp.f32.s64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ll2f_rp>; +def INT_NVVM_ULL2F_RN : F_MATH_1<"cvt.rn.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rn>; +def INT_NVVM_ULL2F_RZ : F_MATH_1<"cvt.rz.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rz>; +def INT_NVVM_ULL2F_RM : F_MATH_1<"cvt.rm.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rm>; +def INT_NVVM_ULL2F_RP : F_MATH_1<"cvt.rp.f32.u64 \t$dst, $src0;", Float32Regs, + Int64Regs, int_nvvm_ull2f_rp>; + +def INT_NVVM_LL2D_RN : F_MATH_1<"cvt.rn.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rn>; +def INT_NVVM_LL2D_RZ : F_MATH_1<"cvt.rz.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rz>; +def INT_NVVM_LL2D_RM : F_MATH_1<"cvt.rm.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rm>; +def INT_NVVM_LL2D_RP : F_MATH_1<"cvt.rp.f64.s64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ll2d_rp>; +def INT_NVVM_ULL2D_RN : F_MATH_1<"cvt.rn.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rn>; +def INT_NVVM_ULL2D_RZ : F_MATH_1<"cvt.rz.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rz>; +def INT_NVVM_ULL2D_RM : F_MATH_1<"cvt.rm.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rm>; +def INT_NVVM_ULL2D_RP : F_MATH_1<"cvt.rp.f64.u64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_ull2d_rp>; + +def INT_NVVM_F2H_RN_FTZ : F_MATH_1; +def INT_NVVM_F2H_RN : F_MATH_1; + +def INT_NVVM_H2F : F_MATH_1; + +// +// Bitcast +// + +def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, + Float32Regs, int_nvvm_bitcast_f2i>; +def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, + Int32Regs, int_nvvm_bitcast_i2f>; + +def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, + Int64Regs, int_nvvm_bitcast_ll2d>; +def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, + Float64Regs, int_nvvm_bitcast_d2ll>; + +//----------------------------------- +// Atomic Functions +//----------------------------------- + +class ATOMIC_GLOBAL_CHK + : PatFrag; +class ATOMIC_SHARED_CHK + : PatFrag; +class ATOMIC_GENERIC_CHK + : PatFrag; + +multiclass F_ATOMIC_2_imp { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, + Requires<[Pred]>; + def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_2 { + defm p32 : F_ATOMIC_2_imp; + defm p64 : F_ATOMIC_2_imp; +} + +// has 2 operands, neg the second one +multiclass F_ATOMIC_2_NEG_imp { + def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), + !strconcat("{{ \n\t", + !strconcat(".reg \t.s", + !strconcat(TypeStr, + !strconcat(" temp; \n\t", + !strconcat("neg.s", + !strconcat(TypeStr, + !strconcat(" \ttemp, $b; \n\t", + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(".u", + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], temp; \n\t", + !strconcat("}}", "")))))))))))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_2_NEG { + defm p32: F_ATOMIC_2_NEG_imp ; + defm p64: F_ATOMIC_2_NEG_imp ; +} + +// has 3 operands +multiclass F_ATOMIC_3_imp { + def reg : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, regclass:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, + (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, + Requires<[Pred]>; + def imm1 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, regclass:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, + Requires<[Pred]>; + def imm2 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, regclass:$b, IMMType:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, + Requires<[Pred]>; + def imm3 : NVPTXInst<(outs regclass:$dst), + (ins ptrclass:$addr, IMMType:$b, IMMType:$c), + !strconcat("atom", + !strconcat(SpaceStr, + !strconcat(OpcStr, + !strconcat(TypeStr, + !strconcat(" \t$dst, [$addr], $b, $c;", ""))))), + [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, + Requires<[Pred]>; +} +multiclass F_ATOMIC_3 { + defm p32 : F_ATOMIC_3_imp; + defm p64 : F_ATOMIC_3_imp; +} + +// atom_add + +def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_add_32 node:$a, node:$b)>; +def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_add_64 node:$a, node:$b)>; +def atomic_load_add_f32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; +def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_add_f32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2; + +defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2; + +defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2; +defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2; + +// atom_sub + +def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_sub_32 node:$a, node:$b)>; +def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; +def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; +def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_sub_64 node:$a, node:$b)>; + +defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG; +defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG; + +// atom_swap + +def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_swap_32 node:$a, node:$b)>; +def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; +def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; +def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_swap_64 node:$a, node:$b)>; + +defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2; +defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2; + +// atom_max + +def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) + , (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umax_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2; + +// atom_min + +def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umin_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2; + +// atom_inc atom_dec + +def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; +def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; +def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; +def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2; + +// atom_and + +def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_and_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2; + +// atom_or + +def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_or_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2; +defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2; + +// atom_xor + +def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_xor_32 node:$a, node:$b)>; + +defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2; +defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2; + +// atom_cas + +def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; +def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), + (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; + +defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3; +defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3; + + +//----------------------------------- +// Read Special Registers +//----------------------------------- +class F_SREG : + NVPTXInst<(outs regclassOut:$dst), (ins), + OpStr, + [(set regclassOut:$dst, (IntOp))]>; + +def INT_PTX_SREG_TID_X : F_SREG<"mov.u32 \t$dst, %tid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_x>; +def INT_PTX_SREG_TID_Y : F_SREG<"mov.u32 \t$dst, %tid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_y>; +def INT_PTX_SREG_TID_Z : F_SREG<"mov.u32 \t$dst, %tid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_tid_z>; + +def INT_PTX_SREG_NTID_X : F_SREG<"mov.u32 \t$dst, %ntid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_x>; +def INT_PTX_SREG_NTID_Y : F_SREG<"mov.u32 \t$dst, %ntid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_y>; +def INT_PTX_SREG_NTID_Z : F_SREG<"mov.u32 \t$dst, %ntid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_ntid_z>; + +def INT_PTX_SREG_CTAID_X : F_SREG<"mov.u32 \t$dst, %ctaid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_x>; +def INT_PTX_SREG_CTAID_Y : F_SREG<"mov.u32 \t$dst, %ctaid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_y>; +def INT_PTX_SREG_CTAID_Z : F_SREG<"mov.u32 \t$dst, %ctaid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_ctaid_z>; + +def INT_PTX_SREG_NCTAID_X : F_SREG<"mov.u32 \t$dst, %nctaid.x;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_x>; +def INT_PTX_SREG_NCTAID_Y : F_SREG<"mov.u32 \t$dst, %nctaid.y;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_y>; +def INT_PTX_SREG_NCTAID_Z : F_SREG<"mov.u32 \t$dst, %nctaid.z;", Int32Regs, + int_nvvm_read_ptx_sreg_nctaid_z>; + +def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, + int_nvvm_read_ptx_sreg_warpsize>; + + +//----------------------------------- +// Support for ldu on sm_20 or later +//----------------------------------- + +// Scalar +// @TODO: Revisit this, Changed imemAny to imem +multiclass LDU_G { + def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; + def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, + Requires<[hasLDU]>; + def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; + def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; +} + +defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int8Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, +int_nvvm_ldu_global_i>; +defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs, +int_nvvm_ldu_global_f>; +defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs, +int_nvvm_ldu_global_f>; +defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, +int_nvvm_ldu_global_p>; +defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, +int_nvvm_ldu_global_p>; + +// vector + +// Elementized vector ldu +multiclass VLDU_G_ELE_V2 { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), + (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; +} + +multiclass VLDU_G_ELE_V4 { + def _32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), []>; + def _64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, + regclass:$dst4), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), []>; +} + +defm INT_PTX_LDU_G_v2i8_ELE + : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int8Regs>; +defm INT_PTX_LDU_G_v2i16_ELE + : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; +defm INT_PTX_LDU_G_v2i32_ELE + : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; +defm INT_PTX_LDU_G_v2f32_ELE + : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; +defm INT_PTX_LDU_G_v2i64_ELE + : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; +defm INT_PTX_LDU_G_v2f64_ELE + : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; +defm INT_PTX_LDU_G_v4i8_ELE + : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int8Regs>; +defm INT_PTX_LDU_G_v4i16_ELE + : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Int16Regs>; +defm INT_PTX_LDU_G_v4i32_ELE + : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Int32Regs>; +defm INT_PTX_LDU_G_v4f32_ELE + : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", + Float32Regs>; + +// Vector ldu +multiclass VLDU_G { + def _32: NVPTXVecInst<(outs regclass:$result), (ins Int32Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int32Regs:$src))], eleInst>, + Requires<[hasLDU]>; + def _64: NVPTXVecInst<(outs regclass:$result), (ins Int64Regs:$src), + !strconcat("ldu.global.", TyStr), + [(set regclass:$result, (IntOp Int64Regs:$src))], eleInst64>, + Requires<[hasLDU]>; +} + +let VecInstType=isVecLD.Value in { +defm INT_PTX_LDU_G_v2i8 : VLDU_G<"v2.u8 \t${result:vecfull}, [$src];", + V2I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i8_ELE_32, + INT_PTX_LDU_G_v2i8_ELE_64>; +defm INT_PTX_LDU_G_v4i8 : VLDU_G<"v4.u8 \t${result:vecfull}, [$src];", + V4I8Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i8_ELE_32, + INT_PTX_LDU_G_v4i8_ELE_64>; +defm INT_PTX_LDU_G_v2i16 : VLDU_G<"v2.u16 \t${result:vecfull}, [$src];", + V2I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i16_ELE_32, + INT_PTX_LDU_G_v2i16_ELE_64>; +defm INT_PTX_LDU_G_v4i16 : VLDU_G<"v4.u16 \t${result:vecfull}, [$src];", + V4I16Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i16_ELE_32, + INT_PTX_LDU_G_v4i16_ELE_64>; +defm INT_PTX_LDU_G_v2i32 : VLDU_G<"v2.u32 \t${result:vecfull}, [$src];", + V2I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i32_ELE_32, + INT_PTX_LDU_G_v2i32_ELE_64>; +defm INT_PTX_LDU_G_v4i32 : VLDU_G<"v4.u32 \t${result:vecfull}, [$src];", + V4I32Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v4i32_ELE_32, + INT_PTX_LDU_G_v4i32_ELE_64>; +defm INT_PTX_LDU_G_v2f32 : VLDU_G<"v2.f32 \t${result:vecfull}, [$src];", + V2F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f32_ELE_32, + INT_PTX_LDU_G_v2f32_ELE_64>; +defm INT_PTX_LDU_G_v4f32 : VLDU_G<"v4.f32 \t${result:vecfull}, [$src];", + V4F32Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v4f32_ELE_32, + INT_PTX_LDU_G_v4f32_ELE_64>; +defm INT_PTX_LDU_G_v2i64 : VLDU_G<"v2.u64 \t${result:vecfull}, [$src];", + V2I64Regs, int_nvvm_ldu_global_i, INT_PTX_LDU_G_v2i64_ELE_32, + INT_PTX_LDU_G_v2i64_ELE_64>; +defm INT_PTX_LDU_G_v2f64 : VLDU_G<"v2.f64 \t${result:vecfull}, [$src];", + V2F64Regs, int_nvvm_ldu_global_f, INT_PTX_LDU_G_v2f64_ELE_32, + INT_PTX_LDU_G_v2f64_ELE_64>; +} + + + +multiclass NG_TO_G { + def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[hasGenericLdSt]>; + +// @TODO: Are these actually needed? I believe global addresses will be copied +// to register values anyway. + /*def __addr_yes : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), + !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, + Requires<[hasGenericLdSt]>; + def __addr_yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), + !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>, + Requires<[hasGenericLdSt]>;*/ + + def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; + +// @TODO: Are these actually needed? I believe global addresses will be copied +// to register values anyway. + /*def _addr_no : NVPTXInst<(outs Int32Regs:$result), (ins imem:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>; + def _addr_no_64 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin (Wrapper tglobaladdr:$src)))]>;*/ +} + +multiclass G_TO_NG { + def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")), + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")), + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>, + Requires<[hasGenericLdSt]>; + def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; + def _no_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; +} + +defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; +defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; +defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; + +defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; +defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; +defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; + +def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>; +def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>; + + + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the def. +/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen + (Wrapper tglobaladdr:$src)))]>; +def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen + (Wrapper tglobaladdr:$src)))]>;*/ + + +def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>; +def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>; + + +// nvvm.ptr.gen.to.param +def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), + (ins Int32Regs:$src), + "mov.u32 \t$result, $src;", + [(set Int32Regs:$result, + (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; +def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), + (ins Int64Regs:$src), + "mov.u64 \t$result, $src;", + [(set Int64Regs:$result, + (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; + + +// nvvm.move intrinsicc +def nvvm_move_i8 : NVPTXInst<(outs Int8Regs:$r), (ins Int8Regs:$s), + "mov.b16 \t$r, $s;", + [(set Int8Regs:$r, + (int_nvvm_move_i8 Int8Regs:$s))]>; +def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), + "mov.b16 \t$r, $s;", + [(set Int16Regs:$r, + (int_nvvm_move_i16 Int16Regs:$s))]>; +def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), + "mov.b32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_i32 Int32Regs:$s))]>; +def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), + "mov.b64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_i64 Int64Regs:$s))]>; +def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), + "mov.f32 \t$r, $s;", + [(set Float32Regs:$r, + (int_nvvm_move_float Float32Regs:$s))]>; +def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), + "mov.f64 \t$r, $s;", + [(set Float64Regs:$r, + (int_nvvm_move_double Float64Regs:$s))]>; +def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), + "mov.u32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_ptr Int32Regs:$s))]>; +def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), + "mov.u64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_ptr Int64Regs:$s))]>; + +// @TODO: Are these actually needed, or will we always just see symbols +// copied to registers first? +/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), + "mov.u32 \t$r, $s;", + [(set Int32Regs:$r, + (int_nvvm_move_ptr texternalsym:$s))]>; +def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), + "mov.u64 \t$r, $s;", + [(set Int64Regs:$r, + (int_nvvm_move_ptr texternalsym:$s))]>;*/ + + +// MoveParam %r1, param +// ptr_local_to_gen %r2, %r1 +// ptr_gen_to_local %r3, %r2 +// -> +// mov %r1, param + +// @TODO: Revisit this. There is a type +// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym +// instructions are not currently defined. However, we can use the ptr +// variants and the asm printer will do the right thing. +def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr64 texternalsym:$src)>; +def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen + (MoveParam texternalsym:$src)))), + (nvvm_move_ptr32 texternalsym:$src)>; + + +//----------------------------------- +// Compiler Error Warn +// - Just ignore them in codegen +//----------------------------------- + +def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), + "// llvm.nvvm.compiler.warn()", + [(int_nvvm_compiler_warn Int32Regs:$a)]>; +def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), + "// llvm.nvvm.compiler.warn()", + [(int_nvvm_compiler_warn Int64Regs:$a)]>; +def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), + "// llvm.nvvm.compiler.error()", + [(int_nvvm_compiler_error Int32Regs:$a)]>; +def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), + "// llvm.nvvm.compiler.error()", + [(int_nvvm_compiler_error Int64Regs:$a)]>; + + + +//===-- Old PTX Back-end Intrinsics ---------------------------------------===// + +// These intrinsics are handled to retain compatibility with the old backend. + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64 + : NVPTXInst<(outs Int64Regs:$d), (ins), + !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"), + [(set Int64Regs:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32 + : NVPTXInst<(outs Int32Regs:$d), (ins), + !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"), + [(set Int32Regs:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; + +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; + +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; + +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp new file mode 100644 index 0000000..56b2372 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -0,0 +1,208 @@ +//===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when +// the size is large or is not a compile-time constant. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXLowerAggrCopies.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +namespace llvm { +FunctionPass *createLowerAggrCopies(); +} + +char NVPTXLowerAggrCopies::ID = 0; + +// Lower MemTransferInst or load-store pair to loop +static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, + Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, + LLVMContext &Context, Function &F) { + Type *indType = len->getType(); + + BasicBlock *origBB = splitAt->getParent(); + BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); + BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + + origBB->getTerminator()->setSuccessor(0, loopBB); + IRBuilder<> builder(origBB, origBB->getTerminator()); + + // srcAddr and dstAddr are expected to be pointer types, + // so no check is made here. + unsigned srcAS = + dyn_cast(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = + dyn_cast(dstAddr->getType())->getAddressSpace(); + + // Cast pointers to (char *) + srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); + dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS)); + + IRBuilder<> loop(loopBB); + // The loop index (ind) is a phi node. + PHINode *ind = loop.CreatePHI(indType, 0); + // Incoming value for ind is 0 + ind->addIncoming(ConstantInt::get(indType, 0), origBB); + + // load from srcAddr+ind + Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile); + // store at dstAddr+ind + loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile); + + // The value for ind coming from backedge is (ind + 1) + Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1)); + ind->addIncoming(newind, loopBB); + + loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); +} + +// Lower MemSetInst to loop +static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, + Value *len, Value *val, LLVMContext &Context, + Function &F) { + BasicBlock *origBB = splitAt->getParent(); + BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split"); + BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB); + + origBB->getTerminator()->setSuccessor(0, loopBB); + IRBuilder<> builder(origBB, origBB->getTerminator()); + + unsigned dstAS = + dyn_cast(dstAddr->getType())->getAddressSpace(); + + // Cast pointer to the type of value getting stored + dstAddr = builder.CreateBitCast(dstAddr, + PointerType::get(val->getType(), dstAS)); + + IRBuilder<> loop(loopBB); + PHINode *ind = loop.CreatePHI(len->getType(), 0); + ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB); + + loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false); + + Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1)); + ind->addIncoming(newind, loopBB); + + loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB); +} + +bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { + SmallVector aggrLoads; + SmallVector aggrMemcpys; + SmallVector aggrMemsets; + + TargetData *TD = &getAnalysis(); + LLVMContext &Context = F.getParent()->getContext(); + + // + // Collect all the aggrLoads, aggrMemcpys and addrMemsets. + // + //const BasicBlock *firstBB = &F.front(); // first BB in F + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + //BasicBlock *bb = BI; + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + if (LoadInst * load = dyn_cast(II)) { + + if (load->hasOneUse() == false) continue; + + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + + User *use = *(load->use_begin()); + if (StoreInst * store = dyn_cast(use)) { + if (store->getOperand(0) != load) //getValueOperand + continue; + aggrLoads.push_back(load); + } + } else if (MemTransferInst * intr = dyn_cast(II)) { + Value *len = intr->getLength(); + // If the number of elements being copied is greater + // than MaxAggrCopySize, lower it to a loop + if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (len_int->getZExtValue() >= MaxAggrCopySize) { + aggrMemcpys.push_back(intr); + } + } else { + // turn variable length memcpy/memmov into loop + aggrMemcpys.push_back(intr); + } + } else if (MemSetInst * memsetintr = dyn_cast(II)) { + Value *len = memsetintr->getLength(); + if (ConstantInt * len_int = dyn_cast(len)) { + if (len_int->getZExtValue() >= MaxAggrCopySize) { + aggrMemsets.push_back(memsetintr); + } + } else { + // turn variable length memset into loop + aggrMemsets.push_back(memsetintr); + } + } + } + } + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) + && (aggrMemsets.size() == 0)) return false; + + // + // Do the transformation of an aggr load/copy/set to a loop + // + for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) { + LoadInst *load = aggrLoads[i]; + StoreInst *store = dyn_cast(*load->use_begin()); + Value *srcAddr = load->getOperand(0); + Value *dstAddr = store->getOperand(1); + unsigned numLoads = TD->getTypeStoreSize(load->getType()); + Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads); + + convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(), + store->isVolatile(), Context, F); + + store->eraseFromParent(); + load->eraseFromParent(); + } + + for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) { + MemTransferInst *cpy = aggrMemcpys[i]; + Value *len = cpy->getLength(); + // llvm 2.7 version of memcpy does not have volatile + // operand yet. So always making it non-volatile + // optimistically, so that we don't see unnecessary + // st.volatile in ptx + convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false, + false, Context, F); + cpy->eraseFromParent(); + } + + for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) { + MemSetInst *memsetinst = aggrMemsets[i]; + Value *len = memsetinst->getLength(); + Value *val = memsetinst->getValue(); + convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context, + F); + memsetinst->eraseFromParent(); + } + + return true; +} + +FunctionPass *llvm::createLowerAggrCopies() { + return new NVPTXLowerAggrCopies(); +} diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h new file mode 100644 index 0000000..ac7f150 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -0,0 +1,47 @@ +//===-- llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVIDIA specific lowering of +// aggregate copies +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_LOWER_AGGR_COPIES_H +#define NVPTX_LOWER_AGGR_COPIES_H + +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +// actual analysis class, which is a functionpass +struct NVPTXLowerAggrCopies : public FunctionPass { + static char ID; + + NVPTXLowerAggrCopies() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + } + + virtual bool runOnFunction(Function &F); + + static const unsigned MaxAggrCopySize = 128; + + virtual const char *getPassName() const { + return "Lower aggregate copies/intrinsics into loops"; + } +}; + +extern FunctionPass *createLowerAggrCopies(); +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h new file mode 100644 index 0000000..b4a4dbc --- /dev/null +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -0,0 +1,20 @@ + +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_NUM_REGISTERS_H +#define NVPTX_NUM_REGISTERS_H + +namespace llvm { + +const unsigned NVPTXNumRegisters = 396; + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp new file mode 100644 index 0000000..e3cd46f --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -0,0 +1,325 @@ +//===- NVPTXRegisterInfo.cpp - NVPTX Register Information -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "nvptx-reg-info" + +#include "NVPTX.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" + + +using namespace llvm; + +namespace llvm +{ +std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { + if (RC == &NVPTX::Float32RegsRegClass) { + return ".f32"; + } + if (RC == &NVPTX::Float64RegsRegClass) { + return ".f64"; + } + else if (RC == &NVPTX::Int64RegsRegClass) { + return ".s64"; + } + else if (RC == &NVPTX::Int32RegsRegClass) { + return ".s32"; + } + else if (RC == &NVPTX::Int16RegsRegClass) { + return ".s16"; + } + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { + return ".s16"; + } + else if (RC == &NVPTX::Int1RegsRegClass) { + return ".pred"; + } + else if (RC == &NVPTX::SpecialRegsRegClass) { + return "!Special!"; + } + else if (RC == &NVPTX::V2F32RegsRegClass) { + return ".v2.f32"; + } + else if (RC == &NVPTX::V4F32RegsRegClass) { + return ".v4.f32"; + } + else if (RC == &NVPTX::V2I32RegsRegClass) { + return ".v2.s32"; + } + else if (RC == &NVPTX::V4I32RegsRegClass) { + return ".v4.s32"; + } + else if (RC == &NVPTX::V2F64RegsRegClass) { + return ".v2.f64"; + } + else if (RC == &NVPTX::V2I64RegsRegClass) { + return ".v2.s64"; + } + else if (RC == &NVPTX::V2I16RegsRegClass) { + return ".v2.s16"; + } + else if (RC == &NVPTX::V4I16RegsRegClass) { + return ".v4.s16"; + } + else if (RC == &NVPTX::V2I8RegsRegClass) { + return ".v2.s16"; + } + else if (RC == &NVPTX::V4I8RegsRegClass) { + return ".v4.s16"; + } + else { + return "INTERNAL"; + } + return ""; +} + +std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { + if (RC == &NVPTX::Float32RegsRegClass) { + return "%f"; + } + if (RC == &NVPTX::Float64RegsRegClass) { + return "%fd"; + } + else if (RC == &NVPTX::Int64RegsRegClass) { + return "%rd"; + } + else if (RC == &NVPTX::Int32RegsRegClass) { + return "%r"; + } + else if (RC == &NVPTX::Int16RegsRegClass) { + return "%rs"; + } + else if (RC == &NVPTX::Int8RegsRegClass) { + return "%rc"; + } + else if (RC == &NVPTX::Int1RegsRegClass) { + return "%p"; + } + else if (RC == &NVPTX::SpecialRegsRegClass) { + return "!Special!"; + } + else if (RC == &NVPTX::V2F32RegsRegClass) { + return "%v2f"; + } + else if (RC == &NVPTX::V4F32RegsRegClass) { + return "%v4f"; + } + else if (RC == &NVPTX::V2I32RegsRegClass) { + return "%v2r"; + } + else if (RC == &NVPTX::V4I32RegsRegClass) { + return "%v4r"; + } + else if (RC == &NVPTX::V2F64RegsRegClass) { + return "%v2fd"; + } + else if (RC == &NVPTX::V2I64RegsRegClass) { + return "%v2rd"; + } + else if (RC == &NVPTX::V2I16RegsRegClass) { + return "%v2s"; + } + else if (RC == &NVPTX::V4I16RegsRegClass) { + return "%v4rs"; + } + else if (RC == &NVPTX::V2I8RegsRegClass) { + return "%v2rc"; + } + else if (RC == &NVPTX::V4I8RegsRegClass) { + return "%v4rc"; + } + else { + return "INTERNAL"; + } + return ""; +} + +bool isNVPTXVectorRegClass(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return true; + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return true; + return false; +} + +std::string getNVPTXElemClassName(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float64RegsRegClass); + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int64RegsRegClass); + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return getNVPTXRegClassName(&NVPTX::Int8RegsRegClass); + llvm_unreachable("Not a vector register class"); +} + +const TargetRegisterClass *getNVPTXElemClass(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return (&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return (&NVPTX::Float64RegsRegClass); + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return (&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return (&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return (&NVPTX::Int64RegsRegClass); + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return (&NVPTX::Int8RegsRegClass); + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return (&NVPTX::Float32RegsRegClass); + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return (&NVPTX::Int16RegsRegClass); + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return (&NVPTX::Int32RegsRegClass); + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return (&NVPTX::Int8RegsRegClass); + llvm_unreachable("Not a vector register class"); +} + +int getNVPTXVectorSize(TargetRegisterClass const *RC) { + if (RC->getID() == NVPTX::V2F32RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2F64RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I16RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I32RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I64RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V2I8RegsRegClassID) + return 2; + if (RC->getID() == NVPTX::V4F32RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I16RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I32RegsRegClassID) + return 4; + if (RC->getID() == NVPTX::V4I8RegsRegClassID) + return 4; + llvm_unreachable("Not a vector register class"); +} +} + +NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, + const NVPTXSubtarget &st) + : NVPTXGenRegisterInfo(0), + Is64Bit(st.is64Bit()) {} + +#define GET_REGINFO_TARGET_DESC +#include "NVPTXGenRegisterInfo.inc" + +/// NVPTX Callee Saved Registers +const uint16_t* NVPTXRegisterInfo:: +getCalleeSavedRegs(const MachineFunction *MF) const { + static const uint16_t CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; +} + +// NVPTX Callee Saved Reg Classes +const TargetRegisterClass* const* +NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + return CalleeSavedRegClasses; +} + +BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; +} + +void NVPTXRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(i+1).getImm(); + + // Using I0 as the frame pointer + MI.getOperand(i).ChangeToRegister(NVPTX::VRFrame, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); +} + + +int NVPTXRegisterInfo:: +getDwarfRegNum(unsigned RegNum, bool isEH) const { + return 0; +} + +unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return NVPTX::VRFrame; +} + +unsigned NVPTXRegisterInfo::getRARegister() const { + return 0; +} + +// This function eliminates ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void NVPTXRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, + // ADJCALLSTACKUP instructions. + MBB.erase(I); +} diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h new file mode 100644 index 0000000..5951783 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -0,0 +1,92 @@ +//===- NVPTXRegisterInfo.h - NVPTX Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the NVPTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXREGISTERINFO_H +#define NVPTXREGISTERINFO_H + +#include "ManagedStringPool.h" +#include "llvm/Target/TargetRegisterInfo.h" + + +#define GET_REGINFO_HEADER +#include "NVPTXGenRegisterInfo.inc" +#include "llvm/Target/TargetRegisterInfo.h" +#include + +namespace llvm { + +// Forward Declarations. +class TargetInstrInfo; +class NVPTXSubtarget; + +class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { +private: + bool Is64Bit; + // Hold Strings that can be free'd all together with NVPTXRegisterInfo + ManagedStringPool ManagedStrPool; + +public: + NVPTXRegisterInfo(const TargetInstrInfo &tii, + const NVPTXSubtarget &st); + + + //------------------------------------------------------ + // Pure virtual functions from TargetRegisterInfo + //------------------------------------------------------ + + // NVPTX callee saved registers + virtual const uint16_t* + getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + // NVPTX callee saved register classes + virtual const TargetRegisterClass* const * + getCalleeSavedRegClasses(const MachineFunction *MF) const; + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, + RegScavenger *RS=NULL) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; + virtual unsigned getFrameRegister(const MachineFunction &MF) const; + virtual unsigned getRARegister() const; + + ManagedStringPool *getStrPool() const { + return const_cast(&ManagedStrPool); + } + + const char *getName(unsigned RegNo) const { + std::stringstream O; + O << "reg" << RegNo; + return getStrPool()->getManagedString(O.str().c_str())->c_str(); + } + +}; + + +std::string getNVPTXRegClassName (const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); +bool isNVPTXVectorRegClass (const TargetRegisterClass *RC); +std::string getNVPTXElemClassName (const TargetRegisterClass *RC); +int getNVPTXVectorSize (const TargetRegisterClass *RC); +const TargetRegisterClass *getNVPTXElemClass(const TargetRegisterClass *RC); + +} // end namespace llvm + + +#endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td new file mode 100644 index 0000000..ba15825 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -0,0 +1,108 @@ +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class NVPTXReg : Register { + let Namespace = "NVPTX"; +} + +class NVPTXRegClass regTypes, int alignment, dag regList> + : RegisterClass <"NVPTX", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// Special Registers used as stack pointer +def VRFrame : NVPTXReg<"%SP">; +def VRFrameLocal : NVPTXReg<"%SPL">; + +// Special Registers used as the stack +def VRDepot : NVPTXReg<"%Depot">; + +foreach i = 0-395 in { + def P#i : NVPTXReg<"%p"#i>; // Predicate + def RC#i : NVPTXReg<"%rc"#i>; // 8-bit + def RS#i : NVPTXReg<"%rs"#i>; // 16-bit + def R#i : NVPTXReg<"%r"#i>; // 32-bit + def RL#i : NVPTXReg<"%rl"#i>; // 64-bit + def F#i : NVPTXReg<"%f"#i>; // 32-bit float + def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float + // Vectors + foreach s = [ "2b8", "2b16", "2b32", "2b64", "4b8", "4b16", "4b32" ] in + def v#s#_#i : NVPTXReg<"%v"#s#"_"#i>; + + // Arguments + def ia#i : NVPTXReg<"%ia"#i>; + def la#i : NVPTXReg<"%la"#i>; + def fa#i : NVPTXReg<"%fa"#i>; + def da#i : NVPTXReg<"%da"#i>; +} + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 395))>; +def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%u", 0, 395))>; +def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 395))>; +def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 395))>; +def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 395))>; +def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 395))>; +def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 395))>; +def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 395))>; +def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%u", 0, 395))>; +def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 395))>; +def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 395))>; + +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; + +class NVPTXVecRegClass regTypes, int alignment, dag regList, + NVPTXRegClass sClass, + int e, + string n> + : NVPTXRegClass +{ + NVPTXRegClass scalarClass=sClass; + int elems=e; + string name=n; +} +def V2F32Regs + : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%u", 0, 395)), + Float32Regs, 2, ".v2.f32">; +def V4F32Regs + : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%u", 0, 395)), + Float32Regs, 4, ".v4.f32">; +def V2I32Regs + : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%u", 0, 395)), + Int32Regs, 2, ".v2.u32">; +def V4I32Regs + : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%u", 0, 395)), + Int32Regs, 4, ".v4.u32">; +def V2F64Regs + : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%u", 0, 395)), + Float64Regs, 2, ".v2.f64">; +def V2I64Regs + : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%u", 0, 395)), + Int64Regs, 2, ".v2.u64">; +def V2I16Regs + : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%u", 0, 395)), + Int16Regs, 2, ".v2.u16">; +def V4I16Regs + : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%u", 0, 395)), + Int16Regs, 4, ".v4.u16">; +def V2I8Regs + : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%u", 0, 395)), + Int8Regs, 2, ".v2.u8">; +def V4I8Regs + : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%u", 0, 395)), + Int8Regs, 4, ".v4.u8">; diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h new file mode 100644 index 0000000..f1ca466 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -0,0 +1,45 @@ +//===- NVPTXSection.h - NVPTX-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTXSection class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_NVPTXSECTION_H +#define LLVM_NVPTXSECTION_H + +#include "llvm/MC/MCSection.h" +#include "llvm/GlobalVariable.h" +#include + +namespace llvm { +/// NVPTXSection - Represents a section in PTX +/// PTX does not have sections. We create this class in order to use +/// the ASMPrint interface. +/// +class NVPTXSection : public MCSection { + +public: + NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {} + ~NVPTXSection() {} + + /// Override this as NVPTX has its own way of printing switching + /// to a section. + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const {} + + /// Base address of PTX sections is zero. + virtual bool isBaseAddressKnownZero() const { return true; } + virtual bool UseCodeAlign() const { return false; } + virtual bool isVirtualSection() const { return false; } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp new file mode 100644 index 0000000..2836cad --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -0,0 +1,77 @@ +//===- NVPTXSplitBBatBar.cpp - Split BB at Barrier --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Split basic blocks so that a basic block that contains a barrier instruction +// only contains the barrier instruction. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/InstIterator.h" +#include "NVPTXUtilities.h" +#include "NVPTXSplitBBatBar.h" + +using namespace llvm; + +namespace llvm { +FunctionPass *createSplitBBatBarPass(); +} + +char NVPTXSplitBBatBar::ID = 0; + +bool NVPTXSplitBBatBar::runOnFunction(Function &F) { + + SmallVector SplitPoints; + bool changed = false; + + // Collect all the split points in SplitPoints + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock::iterator IB = BI->begin(); + BasicBlock::iterator II = IB; + BasicBlock::iterator IE = BI->end(); + + // Skit the first intruction. No splitting is needed at this + // point even if this is a bar. + while (II != IE) { + if (IntrinsicInst *inst = dyn_cast(II)) { + Intrinsic::ID id = inst->getIntrinsicID(); + // If this is a barrier, split at this instruction + // and the next instruction. + if (llvm::isBarrierIntrinsic(id)) { + if (II != IB) + SplitPoints.push_back(II); + II++; + if ((II != IE) && (!II->isTerminator())) { + SplitPoints.push_back(II); + II++; + } + continue; + } + } + II++; + } + } + + for (unsigned i = 0; i != SplitPoints.size(); i++) { + changed = true; + Instruction *inst = SplitPoints[i]; + inst->getParent()->splitBasicBlock(inst, "bar_split"); + } + + return changed; +} + +// This interface will most likely not be necessary, because this pass will +// not be invoked by the driver, but will be used as a prerequisite to +// another pass. +FunctionPass *llvm::createSplitBBatBarPass() { + return new NVPTXSplitBBatBar(); +} diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h new file mode 100644 index 0000000..9e4d5a0 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h @@ -0,0 +1,41 @@ +//===-- llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.h ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVIDIA specific declarations +// for splitting basic blocks at barrier instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_SPLIT_BB_AT_BAR_H +#define NVPTX_SPLIT_BB_AT_BAR_H + +#include "llvm/Pass.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" + +namespace llvm { + +// actual analysis class, which is a functionpass +struct NVPTXSplitBBatBar : public FunctionPass { + static char ID; + + NVPTXSplitBBatBar() : FunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + } + virtual bool runOnFunction(Function &F); + + virtual const char *getPassName() const { + return "Split basic blocks at barrier"; + } +}; + +extern FunctionPass *createSplitBBatBarPass(); +} + +#endif //NVPTX_SPLIT_BB_AT_BAR_H diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp new file mode 100644 index 0000000..6aadd43 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -0,0 +1,57 @@ +//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the NVPTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXSubtarget.h" +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "NVPTXGenSubtargetInfo.inc" + +using namespace llvm; + +// Select Driver Interface +#include "llvm/Support/CommandLine.h" +namespace { +cl::opt +DriverInterface(cl::desc("Choose driver interface:"), + cl::values( + clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), + clEnumValEnd), + cl::init(NVPTX::NVCL)); +} + +NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) +:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, + // because we don't register all + // nvptx targets. + Is64Bit(is64Bit) { + + drvInterface = DriverInterface; + + // Provide the default CPU if none + std::string defCPU = "sm_10"; + + // Get the TargetName from the FS if available + if (FS.empty() && CPU.empty()) + TargetName = defCPU; + else if (!CPU.empty()) + TargetName = CPU; + else + llvm_unreachable("we are not using FeatureStr"); + + // Set up the SmVersion + SmVersion = atoi(TargetName.c_str()+3); +} diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h new file mode 100644 index 0000000..8f2a629 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -0,0 +1,92 @@ +//=====-- NVPTXSubtarget.h - Define Subtarget for the NVPTX ---*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTX specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXSUBTARGET_H +#define NVPTXSUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "NVPTX.h" + +#define GET_SUBTARGETINFO_HEADER +#include "NVPTXGenSubtargetInfo.inc" + +#include + +namespace llvm { + +class NVPTXSubtarget : public NVPTXGenSubtargetInfo { + + unsigned int SmVersion; + std::string TargetName; + NVPTX::DrvInterface drvInterface; + bool dummy; // For the 'dummy' feature, see NVPTX.td + bool Is64Bit; + +public: + /// This constructor initializes the data members to match that + /// of the specified module. + /// + NVPTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + bool hasBrkPt() const { return SmVersion >= 11; } + bool hasAtomRedG32() const { return SmVersion >= 11; } + bool hasAtomRedS32() const { return SmVersion >= 12; } + bool hasAtomRedG64() const { return SmVersion >= 12; } + bool hasAtomRedS64() const { return SmVersion >= 20; } + bool hasAtomRedGen32() const { return SmVersion >= 20; } + bool hasAtomRedGen64() const { return SmVersion >= 20; } + bool hasAtomAddF32() const { return SmVersion >= 20; } + bool hasVote() const { return SmVersion >= 12; } + bool hasDouble() const { return SmVersion >= 13; } + bool reqPTX20() const { return SmVersion >= 20; } + bool hasF32FTZ() const { return SmVersion >= 20; } + bool hasFMAF32() const { return SmVersion >= 20; } + bool hasFMAF64() const { return SmVersion >= 13; } + bool hasLDU() const { return SmVersion >= 20; } + bool hasGenericLdSt() const { return SmVersion >= 20; } + inline bool hasHWROT32() const { return false; } + inline bool hasSWROT32() const { + return true; + } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasROT64() const { return SmVersion >= 20; } + + + bool is64Bit() const { return Is64Bit; } + + unsigned int getSmVersion() const { return SmVersion; } + NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } + std::string getTargetName() const { return TargetName; } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + std::string getDataLayout() const { + const char *p; + if (is64Bit()) + p = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + else + p = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + + return std::string(p); + } + +}; + +} // End llvm namespace + +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp new file mode 100644 index 0000000..433f415 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -0,0 +1,133 @@ +//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the NVPTX target. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXTargetMachine.h" +#include "NVPTX.h" +#include "NVPTXSplitBBatBar.h" +#include "NVPTXLowerAggrCopies.h" +#include "MCTargetDesc/NVPTXMCAsmInfo.h" +#include "NVPTXAllocaHoisting.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" + + +using namespace llvm; + + +extern "C" void LLVMInitializeNVPTXTarget() { + // Register the target. + RegisterTargetMachine X(TheNVPTXTarget32); + RegisterTargetMachine Y(TheNVPTXTarget64); + + RegisterMCAsmInfo A(TheNVPTXTarget32); + RegisterMCAsmInfo B(TheNVPTXTarget64); + +} + +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, + StringRef TT, + StringRef CPU, + StringRef FS, + const TargetOptions& Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL, + bool is64bit) +: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), + DataLayout(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) +/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { +} + + + +void NVPTXTargetMachine32::anchor() {} + +NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { +} + +void NVPTXTargetMachine64::anchor() {} + +NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) +: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { +} + + +namespace llvm { +class NVPTXPassConfig : public TargetPassConfig { +public: + NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + NVPTXTargetMachine &getNVPTXTargetMachine() const { + return getTM(); + } + + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); +}; +} + +TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { + NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); + return PassConfig; +} + +bool NVPTXPassConfig::addInstSelector() { + addPass(createLowerAggrCopies()); + addPass(createSplitBBatBarPass()); + addPass(createAllocaHoisting()); + addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + addPass(createVectorElementizePass(getNVPTXTargetMachine())); + return false; +} + +bool NVPTXPassConfig::addPreRegAlloc() { + return false; +} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h new file mode 100644 index 0000000..b3f9cac --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -0,0 +1,125 @@ +//===-- NVPTXTargetMachine.h - Define TargetMachine for NVPTX ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the NVPTX specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef NVPTX_TARGETMACHINE_H +#define NVPTX_TARGETMACHINE_H + +#include "NVPTXInstrInfo.h" +#include "NVPTXISelLowering.h" +#include "NVPTXRegisterInfo.h" +#include "NVPTXSubtarget.h" +#include "NVPTXFrameLowering.h" +#include "ManagedStringPool.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// NVPTXTargetMachine +/// +class NVPTXTargetMachine : public LLVMTargetMachine { + NVPTXSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; + + // NVPTX does not have any call stack frame, but need a NVPTX specific + // FrameLowering class because TargetFrameLowering is abstract. + NVPTXFrameLowering FrameLowering; + + // Hold Strings that can be free'd all together with NVPTXTargetMachine + ManagedStringPool ManagedStrPool; + + //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + // bool DisableVerify, MCContext *&OutCtx); + +public: + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OP, + bool is64bit); + + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + + virtual const NVPTXRegisterInfo *getRegisterInfo() const { + return &(InstrInfo.getRegisterInfo()); + } + + virtual NVPTXTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + + //virtual bool addInstSelector(PassManagerBase &PM, + // CodeGenOpt::Level OptLevel); + + //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); + + ManagedStringPool *getManagedStrPool() const { + return const_cast(&ManagedStrPool); + } + + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + bool = true) { + return true; + } + +}; // NVPTXTargetMachine. + +class NVPTXTargetMachine32 : public NVPTXTargetMachine { + virtual void anchor(); +public: + NVPTXTargetMachine32(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +class NVPTXTargetMachine64 : public NVPTXTargetMachine { + virtual void anchor(); +public: + NVPTXTargetMachine64(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h new file mode 100644 index 0000000..b5698a2 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -0,0 +1,105 @@ +//===-- NVPTXTargetObjectFile.h - NVPTX Object Info -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H +#define LLVM_TARGET_NVPTX_TARGETOBJECTFILE_H + +#include "NVPTXSection.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include + +namespace llvm { +class GlobalVariable; +class Module; + +class NVPTXTargetObjectFile : public TargetLoweringObjectFile { + +public: + NVPTXTargetObjectFile() {} + ~NVPTXTargetObjectFile() { + delete TextSection; + delete DataSection; + delete BSSSection; + delete ReadOnlySection; + + delete StaticCtorSection; + delete StaticDtorSection; + delete LSDASection; + delete EHFrameSection; + delete DwarfAbbrevSection; + delete DwarfInfoSection; + delete DwarfLineSection; + delete DwarfFrameSection; + delete DwarfPubTypesSection; + delete DwarfDebugInlineSection; + delete DwarfStrSection; + delete DwarfLocSection; + delete DwarfARangesSection; + delete DwarfRangesSection; + delete DwarfMacroInfoSection; + } + + virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { + TextSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getText()); + DataSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getBSS()); + ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getReadOnly()); + + StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + LSDASection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + EHFrameSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, + SectionKind::getMetadata()); + } + + virtual const MCSection *getSectionForConstant(SectionKind Kind) const { + return ReadOnlySection; + } + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const { + return DataSection; + } + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp new file mode 100644 index 0000000..3f52251 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -0,0 +1,514 @@ +//===- NVPTXUtilities.cpp - Utility Functions -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains miscellaneous utility functions +//===----------------------------------------------------------------------===// + +#include "NVPTXUtilities.h" +#include "NVPTX.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/Operator.h" +#include +#include +#include +#include +#include +//#include +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/InstIterator.h" + +using namespace llvm; + +typedef std::map > key_val_pair_t; +typedef std::map global_val_annot_t; +typedef std::map per_module_annot_t; + +ManagedStatic annotationCache; + + +static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { + assert(md && "Invalid mdnode for annotation"); + assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); + // start index = 1, to skip the global variable key + // increment = 2, to skip the value for each property-value pairs + for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) { + // property + const MDString *prop = dyn_cast(md->getOperand(i)); + assert(prop && "Annotation property not a string"); + + // value + ConstantInt *Val = dyn_cast(md->getOperand(i+1)); + assert(Val && "Value operand not a constant int"); + + std::string keyname = prop->getString().str(); + if (retval.find(keyname) != retval.end()) + retval[keyname].push_back(Val->getZExtValue()); + else { + std::vector tmp; + tmp.push_back(Val->getZExtValue()); + retval[keyname] = tmp; + } + } +} + +static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { + NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations); + if (!NMD) + return; + key_val_pair_t tmp; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *elem = NMD->getOperand(i); + + Value *entity = elem->getOperand(0); + // entity may be null due to DCE + if (!entity) + continue; + if (entity != gv) + continue; + + // accumulate annotations for entity in tmp + cacheAnnotationFromMD(elem, tmp); + } + + if (tmp.empty()) // no annotations for this gv + return; + + if ((*annotationCache).find(m) != (*annotationCache).end()) + (*annotationCache)[m][gv] = tmp; + else { + global_val_annot_t tmp1; + tmp1[gv] = tmp; + (*annotationCache)[m] = tmp1; + } +} + +bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop, + unsigned &retval) { + const Module *m = gv->getParent(); + if ((*annotationCache).find(m) == (*annotationCache).end()) + cacheAnnotationFromMD(m, gv); + else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + cacheAnnotationFromMD(m, gv); + if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + return false; + retval = (*annotationCache)[m][gv][prop][0]; + return true; +} + +bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, + std::vector &retval) { + const Module *m = gv->getParent(); + if ((*annotationCache).find(m) == (*annotationCache).end()) + cacheAnnotationFromMD(m, gv); + else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end()) + cacheAnnotationFromMD(m, gv); + if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end()) + return false; + retval = (*annotationCache)[m][gv][prop]; + return true; +} + +bool llvm::isTexture(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { + assert((annot == 1) && "Unexpected annotation on a texture symbol"); + return true; + } + } + return false; +} + +bool llvm::isSurface(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { + assert((annot == 1) && "Unexpected annotation on a surface symbol"); + return true; + } + } + return false; +} + +bool llvm::isSampler(const llvm::Value &val) { + if (const GlobalValue *gv = dyn_cast(&val)) { + unsigned annot; + if (llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { + assert((annot == 1) && "Unexpected annotation on a sampler symbol"); + return true; + } + } + if (const Argument *arg = dyn_cast(&val)) { + const Function *func = arg->getParent(); + std::vector annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImageReadOnly(const llvm::Value &val) { + if (const Argument *arg = dyn_cast(&val)) { + const Function *func = arg->getParent(); + std::vector annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImageWriteOnly(const llvm::Value &val) { + if (const Argument *arg = dyn_cast(&val)) { + const Function *func = arg->getParent(); + std::vector annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + +bool llvm::isImage(const llvm::Value &val) { + return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val); +} + +std::string llvm::getTextureName(const llvm::Value &val) { + assert(val.hasName() && "Found texture variable with no name"); + return val.getName(); +} + +std::string llvm::getSurfaceName(const llvm::Value &val) { + assert(val.hasName() && "Found surface variable with no name"); + return val.getName(); +} + +std::string llvm::getSamplerName(const llvm::Value &val) { + assert(val.hasName() && "Found sampler variable with no name"); + return val.getName(); +} + +bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], + x)); +} + +bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], + y)); +} + +bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], + z)); +} + +bool llvm::getReqNTIDx(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], + x)); +} + +bool llvm::getReqNTIDy(const Function &F, unsigned &y) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], + y)); +} + +bool llvm::getReqNTIDz(const Function &F, unsigned &z) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], + z)); +} + +bool llvm::getMinCTASm(const Function &F, unsigned &x) { + return (llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], + x)); +} + +bool llvm::isKernelFunction(const Function &F) { + unsigned x = 0; + bool retval = llvm::findOneNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], + x); + if (retval == false) { + // There is no NVVM metadata, check the calling convention + if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) + return true; + else + return false; + } + return (x==1); +} + +bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { + std::vector Vs; + bool retval = llvm::findAllNVVMAnnotation(&F, + llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], + Vs); + if (retval == false) + return false; + for (int i=0, e=Vs.size(); i> 16) == index ) { + align = v & 0xFFFF; + return true; + } + } + return false; +} + +bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { + if (MDNode *alignNode = I.getMetadata("callalign")) { + for (int i=0, n = alignNode->getNumOperands(); + i(alignNode->getOperand(i))) { + unsigned v = CI->getZExtValue(); + if ( (v>>16) == index ) { + align = v & 0xFFFF; + return true; + } + if ( (v>>16) > index ) { + return false; + } + } + } + } + return false; +} + +bool llvm::isBarrierIntrinsic(Intrinsic::ID id) { + if ((id == Intrinsic::nvvm_barrier0) || + (id == Intrinsic::nvvm_barrier0_popc) || + (id == Intrinsic::nvvm_barrier0_and) || + (id == Intrinsic::nvvm_barrier0_or) || + (id == Intrinsic::cuda_syncthreads)) + return true; + return false; +} + +// Interface for checking all memory space transfer related intrinsics +bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { + if (id == Intrinsic::nvvm_ptr_local_to_gen || + id == Intrinsic::nvvm_ptr_shared_to_gen || + id == Intrinsic::nvvm_ptr_global_to_gen || + id == Intrinsic::nvvm_ptr_constant_to_gen || + id == Intrinsic::nvvm_ptr_gen_to_global || + id == Intrinsic::nvvm_ptr_gen_to_shared || + id == Intrinsic::nvvm_ptr_gen_to_local || + id == Intrinsic::nvvm_ptr_gen_to_constant || + id == Intrinsic::nvvm_ptr_gen_to_param) { + return true; + } + + return false; +} + +// consider several special intrinsics in striping pointer casts, and +// provide an option to ignore GEP indicies for find out the base address only +// which could be used in simple alias disambigurate. +const Value *llvm::skipPointerTransfer(const Value *V, + bool ignore_GEP_indices) { + V = V->stripPointerCasts(); + while (true) { + if (const IntrinsicInst *IS = dyn_cast(V)) { + if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { + V = IS->getArgOperand(0)->stripPointerCasts(); + continue; + } + } else if (ignore_GEP_indices) + if (const GEPOperator *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand()->stripPointerCasts(); + continue; + } + break; + } + return V; +} + +// consider several special intrinsics in striping pointer casts, and +// - ignore GEP indicies for find out the base address only, and +// - tracking PHINode +// which could be used in simple alias disambigurate. +const Value *llvm::skipPointerTransfer(const Value *V, + std::set &processed) { + if (processed.find(V) != processed.end()) + return NULL; + processed.insert(V); + + const Value *V2 = V->stripPointerCasts(); + if (V2 != V && processed.find(V2) != processed.end()) + return NULL; + processed.insert(V2); + + V = V2; + + while (true) { + if (const IntrinsicInst *IS = dyn_cast(V)) { + if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) { + V = IS->getArgOperand(0)->stripPointerCasts(); + continue; + } + } else if (const GEPOperator *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand()->stripPointerCasts(); + continue; + } else if (const PHINode *PN = dyn_cast(V)) { + if (V != V2 && processed.find(V) != processed.end()) + return NULL; + processed.insert(PN); + const Value *common = 0; + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + const Value *pv = PN->getIncomingValue(i); + const Value *base = skipPointerTransfer(pv, processed); + if (base) { + if (common == 0) + common = base; + else if (common != base) + return PN; + } + } + if (common == 0) + return PN; + V = common; + } + break; + } + return V; +} + + +// The following are some useful utilities for debuggung + +BasicBlock *llvm::getParentBlock(Value *v) { + if (BasicBlock *B = dyn_cast(v)) + return B; + + if (Instruction *I = dyn_cast(v)) + return I->getParent(); + + return 0; +} + +Function *llvm::getParentFunction(Value *v) { + if (Function *F = dyn_cast(v)) + return F; + + if (Instruction *I = dyn_cast(v)) + return I->getParent()->getParent(); + + if (BasicBlock *B = dyn_cast(v)) + return B->getParent(); + + return 0; +} + +// Dump a block by name +void llvm::dumpBlock(Value *v, char *blockName) { + Function *F = getParentFunction(v); + if (F == 0) + return; + + for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { + BasicBlock *B = it; + if (strcmp(B->getName().data(), blockName) == 0) { + B->dump(); + return; + } + } +} + +// Find an instruction by name +Instruction *llvm::getInst(Value *base, char *instName) { + Function *F = getParentFunction(base); + if (F == 0) + return 0; + + for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) { + Instruction *I = &*it; + if (strcmp(I->getName().data(), instName) == 0) { + return I; + } + } + + return 0; +} + +// Dump an instruction by nane +void llvm::dumpInst(Value *base, char *instName) { + Instruction *I = getInst(base, instName); + if (I) + I->dump(); +} + +// Dump an instruction and all dependent instructions +void llvm::dumpInstRec(Value *v, std::set *visited) { + if (Instruction *I = dyn_cast(v)) { + + if (visited->find(I) != visited->end()) + return; + + visited->insert(I); + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + dumpInstRec(I->getOperand(i), visited); + + I->dump(); + } +} + +// Dump an instruction and all dependent instructions +void llvm::dumpInstRec(Value *v) { + std::set visited; + + //BasicBlock *B = getParentBlock(v); + + dumpInstRec(v, &visited); +} + +// Dump the parent for Instruction, block or function +void llvm::dumpParent(Value *v) { + if (Instruction *I = dyn_cast(v)) { + I->getParent()->dump(); + return; + } + + if (BasicBlock *B = dyn_cast(v)) { + B->getParent()->dump(); + return; + } + + if (Function *F = dyn_cast(v)) { + F->getParent()->dump(); + return; + } +} diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h new file mode 100644 index 0000000..fe6ad55 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -0,0 +1,94 @@ +//===-- NVPTXUtilities - Utilities -----------------------------*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the NVVM specific utility functions. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTXUTILITIES_H +#define NVPTXUTILITIES_H + +#include "llvm/Value.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include +#include +#include +#include + +namespace llvm +{ + +#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" +#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" + +bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &); +bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string, + std::vector &); + +bool isTexture(const llvm::Value &); +bool isSurface(const llvm::Value &); +bool isSampler(const llvm::Value &); +bool isImage(const llvm::Value &); +bool isImageReadOnly(const llvm::Value &); +bool isImageWriteOnly(const llvm::Value &); + +std::string getTextureName(const llvm::Value &); +std::string getSurfaceName(const llvm::Value &); +std::string getSamplerName(const llvm::Value &); + +bool getMaxNTIDx(const llvm::Function &, unsigned &); +bool getMaxNTIDy(const llvm::Function &, unsigned &); +bool getMaxNTIDz(const llvm::Function &, unsigned &); + +bool getReqNTIDx(const llvm::Function &, unsigned &); +bool getReqNTIDy(const llvm::Function &, unsigned &); +bool getReqNTIDz(const llvm::Function &, unsigned &); + +bool getMinCTASm(const llvm::Function &, unsigned &); +bool isKernelFunction(const llvm::Function &); + +bool getAlign(const llvm::Function &, unsigned index, unsigned &); +bool getAlign(const llvm::CallInst &, unsigned index, unsigned &); + +bool isBarrierIntrinsic(llvm::Intrinsic::ID); + +/// make_vector - Helper function which is useful for building temporary vectors +/// to pass into type construction of CallInst ctors. This turns a null +/// terminated list of pointers (or other value types) into a real live vector. +/// +template +inline std::vector make_vector(T A, ...) { + va_list Args; + va_start(Args, A); + std::vector Result; + Result.push_back(A); + while (T Val = va_arg(Args, T)) + Result.push_back(Val); + va_end(Args); + return Result; +} + +bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); +const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); +const Value *skipPointerTransfer(const Value *V, + std::set &processed); +BasicBlock *getParentBlock(Value *v); +Function *getParentFunction(Value *v); +void dumpBlock(Value *v, char *blockName); +Instruction *getInst(Value *base, char *instName); +void dumpInst(Value *base, char *instName); +void dumpInstRec(Value *v, std::set *visited); +void dumpInstRec(Value *v); +void dumpParent(Value *v); + +} + +#endif diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td new file mode 100644 index 0000000..775df19 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXVector.td @@ -0,0 +1,1481 @@ +//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//----------------------------------- +// Vector Specific +//----------------------------------- + +// +// All vector instructions derive from NVPTXVecInst +// + +class NVPTXVecInst pattern, + NVPTXInst sInst=NOP> + : NVPTXInst { + NVPTXInst scalarInst=sInst; +} + +let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { +// Extract v2i16 +def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), + (ins V2I16Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int16Regs:$dst, (vector_extract + (v2i16 V2I16Regs:$src), imm:$c))], + IMOV16rr>; + +// Extract v4i16 +def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), + (ins V4I16Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int16Regs:$dst, (vector_extract + (v4i16 V4I16Regs:$src), imm:$c))], + IMOV16rr>; + +// Extract v2i8 +def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), + (ins V2I8Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int8Regs:$dst, (vector_extract + (v2i8 V2I8Regs:$src), imm:$c))], + IMOV8rr>; + +// Extract v4i8 +def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), + (ins V4I8Regs:$src, i8imm:$c), + "mov.u16 \t$dst, $src${c:vecelem};", + [(set Int8Regs:$dst, (vector_extract + (v4i8 V4I8Regs:$src), imm:$c))], + IMOV8rr>; + +// Extract v2i32 +def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), + (ins V2I32Regs:$src, i8imm:$c), + "mov.u32 \t$dst, $src${c:vecelem};", + [(set Int32Regs:$dst, (vector_extract + (v2i32 V2I32Regs:$src), imm:$c))], + IMOV32rr>; + +// Extract v2f32 +def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), + (ins V2F32Regs:$src, i8imm:$c), + "mov.f32 \t$dst, $src${c:vecelem};", + [(set Float32Regs:$dst, (vector_extract + (v2f32 V2F32Regs:$src), imm:$c))], + FMOV32rr>; + +// Extract v2i64 +def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), + (ins V2I64Regs:$src, i8imm:$c), + "mov.u64 \t$dst, $src${c:vecelem};", + [(set Int64Regs:$dst, (vector_extract + (v2i64 V2I64Regs:$src), imm:$c))], + IMOV64rr>; + +// Extract v2f64 +def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), + (ins V2F64Regs:$src, i8imm:$c), + "mov.f64 \t$dst, $src${c:vecelem};", + [(set Float64Regs:$dst, (vector_extract + (v2f64 V2F64Regs:$src), imm:$c))], + FMOV64rr>; + +// Extract v4i32 +def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), + (ins V4I32Regs:$src, i8imm:$c), + "mov.u32 \t$dst, $src${c:vecelem};", + [(set Int32Regs:$dst, (vector_extract + (v4i32 V4I32Regs:$src), imm:$c))], + IMOV32rr>; + +// Extract v4f32 +def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), + (ins V4F32Regs:$src, i8imm:$c), + "mov.f32 \t$dst, $src${c:vecelem};", + [(set Float32Regs:$dst, (vector_extract + (v4f32 V4F32Regs:$src), imm:$c))], + FMOV32rr>; +} + +let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { +// Insert v2i8 +def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), + (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), + "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V2I8Regs:$dst, + (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))], + IMOV8rr>; + +// Insert v4i8 +def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), + (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), + "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V4I8Regs:$dst, + (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))], + IMOV8rr>; + +// Insert v2i16 +def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), + (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), + "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V2I16Regs:$dst, + (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; + +// Insert v4i16 +def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), + (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), + "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u16 \t$dst${c:vecelem}, $val;", + [(set V4I16Regs:$dst, + (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))], + IMOV16rr>; + +// Insert v2i32 +def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), + (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), + "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u32 \t$dst${c:vecelem}, $val;", + [(set V2I32Regs:$dst, + (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; + +// Insert v2f32 +def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), + (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), + "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f32 \t$dst${c:vecelem}, $val;", + [(set V2F32Regs:$dst, + (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; + +// Insert v2i64 +def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), + (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), + "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u64 \t$dst${c:vecelem}, $val;", + [(set V2I64Regs:$dst, + (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))], + IMOV64rr>; + +// Insert v2f64 +def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), + (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), + "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f64 \t$dst${c:vecelem}, $val;", + [(set V2F64Regs:$dst, + (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))], + FMOV64rr>; + +// Insert v4i32 +def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), + (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), + "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.u32 \t$dst${c:vecelem}, $val;", + [(set V4I32Regs:$dst, + (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))], + IMOV32rr>; + +// Insert v4f32 +def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), + (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), + "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" + "\n\tmov.f32 \t$dst${c:vecelem}, $val;", + [(set V4F32Regs:$dst, + (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))], + FMOV32rr>; +} + +class BinOpAsmString { + string s = c; +} + +class V4AsmStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; + +class V2AsmStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; + +class V4MADStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; + +class V2MADStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; + +class V4UnaryStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat(!strconcat( + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1;\n\t"), + opcode), " \t${dst}_2, ${a}_2;\n\t"), + opcode), " \t${dst}_3, ${a}_3;")>; + +class V2UnaryStr : BinOpAsmString< + !strconcat(!strconcat(!strconcat( + opcode, " \t${dst}_0, ${a}_0;\n\t"), + opcode), " \t${dst}_1, ${a}_1;")>; + +class VecBinaryOp : + NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), + asmstr.s, + [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], + sInst>; + +class VecShiftOp : + NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), + asmstr.s, + [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], + sInst>; + +class VecUnaryOp : + NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), + asmstr.s, + [(set regclass:$dst, (OpNode regclass:$a))], sInst>; + +multiclass IntBinVOp { + def V2I64 : VecBinaryOp, OpNode, V2I64Regs, + i64op>; + def V4I32 : VecBinaryOp, OpNode, V4I32Regs, + i32op>; + def V2I32 : VecBinaryOp, OpNode, V2I32Regs, + i32op>; + def V4I16 : VecBinaryOp, OpNode, V4I16Regs, + i16op>; + def V2I16 : VecBinaryOp, OpNode, V2I16Regs, + i16op>; + def V4I8 : VecBinaryOp, OpNode, V4I8Regs, + i8op>; + def V2I8 : VecBinaryOp, OpNode, V2I8Regs, + i8op>; +} + +multiclass FloatBinVOp { + def V2F64 : VecBinaryOp, OpNode, + V2F64Regs, f64>; + def V4F32_ftz : VecBinaryOp, OpNode, + V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; + def V2F32_ftz : VecBinaryOp, OpNode, + V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; + def V4F32 : VecBinaryOp, OpNode, + V4F32Regs, f32>; + def V2F32 : VecBinaryOp, OpNode, + V2F32Regs, f32>; +} + +multiclass IntUnaryVOp { + def V2I64 : VecUnaryOp, OpNode, + V2I64Regs, i64op>; + def V4I32 : VecUnaryOp, OpNode, + V4I32Regs, i32op>; + def V2I32 : VecUnaryOp, OpNode, + V2I32Regs, i32op>; + def V4I16 : VecUnaryOp, OpNode, + V4I16Regs, i16op>; + def V2I16 : VecUnaryOp, OpNode, + V2I16Regs, i16op>; + def V4I8 : VecUnaryOp, OpNode, + V4I8Regs, i8op>; + def V2I8 : VecUnaryOp, OpNode, + V2I8Regs, i8op>; +} + + +// Integer Arithmetic +let VecInstType=isVecOther.Value in { +defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; +defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; + +def AddCCV4I32 : VecBinaryOp, addc, V4I32Regs, + ADDCCi32rr>; +def AddCCV2I32 : VecBinaryOp, addc, V2I32Regs, + ADDCCi32rr>; +def SubCCV4I32 : VecBinaryOp, subc, V4I32Regs, + SUBCCi32rr>; +def SubCCV2I32 : VecBinaryOp, subc, V2I32Regs, + SUBCCi32rr>; +def AddCCCV4I32 : VecBinaryOp, adde, V4I32Regs, + ADDCCCi32rr>; +def AddCCCV2I32 : VecBinaryOp, adde, V2I32Regs, + ADDCCCi32rr>; +def SubCCCV4I32 : VecBinaryOp, sube, V4I32Regs, + SUBCCCi32rr>; +def SubCCCV2I32 : VecBinaryOp, sube, V2I32Regs, + SUBCCCi32rr>; + +def ShiftLV2I64 : VecShiftOp, shl, V2I64Regs, V2I32Regs, + SHLi64rr>; +def ShiftLV2I32 : VecShiftOp, shl, V2I32Regs, V2I32Regs, + SHLi32rr>; +def ShiftLV4I32 : VecShiftOp, shl, V4I32Regs, V4I32Regs, + SHLi32rr>; +def ShiftLV2I16 : VecShiftOp, shl, V2I16Regs, V2I32Regs, + SHLi16rr>; +def ShiftLV4I16 : VecShiftOp, shl, V4I16Regs, V4I32Regs, + SHLi16rr>; +def ShiftLV2I8 : VecShiftOp, shl, V2I8Regs, V2I32Regs, + SHLi8rr>; +def ShiftLV4I8 : VecShiftOp, shl, V4I8Regs, V4I32Regs, + SHLi8rr>; +} + +// cvt to v*i32, helpers for shift +class CVTtoVeci32 : + NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; + +class VecCVTStrHelper { + string s=!strconcat(op, !strconcat("\t", + !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); +} + +class Vec2CVTStr { + string s=!strconcat(VecCVTStrHelper.s, + !strconcat("\n\t", VecCVTStrHelper.s)); +} + +class Vec4CVTStr { + string s=!strconcat(VecCVTStrHelper.s, + !strconcat("\n\t", + !strconcat(VecCVTStrHelper.s, + !strconcat("\n\t", + !strconcat(VecCVTStrHelper.s, + !strconcat("\n\t", VecCVTStrHelper.s)))))); +} + +let VecInstType=isVecOther.Value in { +def CVTv2i8tov2i32 : CVTtoVeci32.s, Zint_extendext8to32>; +def CVTv2i16tov2i32 : CVTtoVeci32.s, Zint_extendext16to32>; +def CVTv4i8tov4i32 : CVTtoVeci32.s, Zint_extendext8to32>; +def CVTv4i16tov4i32 : CVTtoVeci32.s, Zint_extendext16to32>; +def CVTv2i64tov2i32 : CVTtoVeci32.s, TRUNC_64to32>; +} + +def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +let VecInstType=isVecOther.Value in { +def ShiftRAV2I64 : VecShiftOp, sra, V2I64Regs, V2I32Regs, + SRAi64rr>; +def ShiftRAV2I32 : VecShiftOp, sra, V2I32Regs, V2I32Regs, + SRAi32rr>; +def ShiftRAV4I32 : VecShiftOp, sra, V4I32Regs, V4I32Regs, + SRAi32rr>; +def ShiftRAV2I16 : VecShiftOp, sra, V2I16Regs, V2I32Regs, + SRAi16rr>; +def ShiftRAV4I16 : VecShiftOp, sra, V4I16Regs, V4I32Regs, + SRAi16rr>; +def ShiftRAV2I8 : VecShiftOp, sra, V2I8Regs, V2I32Regs, + SRAi8rr>; +def ShiftRAV4I8 : VecShiftOp, sra, V4I8Regs, V4I32Regs, + SRAi8rr>; + +def ShiftRLV2I64 : VecShiftOp, srl, V2I64Regs, V2I32Regs, + SRLi64rr>; +def ShiftRLV2I32 : VecShiftOp, srl, V2I32Regs, V2I32Regs, + SRLi32rr>; +def ShiftRLV4I32 : VecShiftOp, srl, V4I32Regs, V4I32Regs, + SRLi32rr>; +def ShiftRLV2I16 : VecShiftOp, srl, V2I16Regs, V2I32Regs, + SRLi16rr>; +def ShiftRLV4I16 : VecShiftOp, srl, V4I16Regs, V4I32Regs, + SRLi16rr>; +def ShiftRLV2I8 : VecShiftOp, srl, V2I8Regs, V2I32Regs, + SRLi8rr>; +def ShiftRLV4I8 : VecShiftOp, srl, V4I8Regs, V4I32Regs, + SRLi8rr>; + +defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, + MULTi8rr>; +defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, + MULTHSi16rr, + MULTHSi8rr>; +defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, + MULTHUi16rr, + MULTHUi8rr>; +defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, + SDIVi8rr>; +defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, + UDIVi8rr>; +defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, + SREMi8rr>; +defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, + UREMi8rr>; +} + +def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), + (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; +def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), + (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; +def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), + (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; + +def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), + (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; +def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), + (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; + +multiclass VMAD { + def V4 : NVPTXVecInst<(outs regclassv4:$dst), + (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), + V4MADStr.s, + [(set regclassv4:$dst, + (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], + sop>, + Requires<[Pred]>; + def V2 : NVPTXVecInst<(outs regclassv2:$dst), + (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), + V2MADStr.s, + [(set regclassv2:$dst, + (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], + sop>, + Requires<[Pred]>; +} + +multiclass VMADV2Only { + def V2 : NVPTXVecInst<(outs regclass:$dst), + (ins regclass:$a, regclass:$b, regclass:$c), + V2MADStr.s, + [(set regclass:$dst, (add + (mul regclass:$a, regclass:$b), regclass:$c))], sop>, + Requires<[Pred]>; +} +multiclass VFMADV2Only { + def V2 : NVPTXVecInst<(outs regclass:$dst), + (ins regclass:$a, regclass:$b, regclass:$c), + V2MADStr.s, + [(set regclass:$dst, (fadd + (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, + Requires<[Pred]>; +} + +let VecInstType=isVecOther.Value in { +defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; +defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, + true>; +defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, + true>; +defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; + +defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; + +defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; +defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; +defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; + +defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, + FMAD32_ftzrrr, doFMADF32_ftz>; +defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, + FMA32_ftzrrr, doFMAF32_ftz>; +defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, + doFMADF32>; +defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, + doFMAF32>; +defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; +} + +let VecInstType=isVecOther.Value in { +def V4F32Div_prec_ftz : VecBinaryOp, fdiv, V4F32Regs, + FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; +def V2F32Div_prec_ftz : VecBinaryOp, fdiv, V2F32Regs, + FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; +def V4F32Div_prec : VecBinaryOp, fdiv, V4F32Regs, + FDIV32rr_prec>, Requires<[reqPTX20]>; +def V2F32Div_prec : VecBinaryOp, fdiv, V2F32Regs, + FDIV32rr_prec>, Requires<[reqPTX20]>; +def V2F32Div_ftz : VecBinaryOp, fdiv, V2F32Regs, + FDIV32rr_ftz>, Requires<[doF32FTZ]>; +def V4F32Div_ftz : VecBinaryOp, fdiv, V4F32Regs, + FDIV32rr_ftz>, Requires<[doF32FTZ]>; +def V2F32Div : VecBinaryOp, fdiv, V2F32Regs, FDIV32rr>; +def V4F32Div : VecBinaryOp, fdiv, V4F32Regs, FDIV32rr>; +def V2F64Div : VecBinaryOp, fdiv, V2F64Regs, FDIV64rr>; +} + +def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; + +let VecInstType=isVecOther.Value in { +def VNegv2f32_ftz : VecUnaryOp, fnegpat, V2F32Regs, + FNEGf32_ftz>, Requires<[doF32FTZ]>; +def VNegv4f32_ftz : VecUnaryOp, fnegpat, V4F32Regs, + FNEGf32_ftz>, Requires<[doF32FTZ]>; +def VNegv2f32 : VecUnaryOp, fnegpat, V2F32Regs, FNEGf32>; +def VNegv4f32 : VecUnaryOp, fnegpat, V4F32Regs, FNEGf32>; +def VNegv2f64 : VecUnaryOp, fnegpat, V2F64Regs, FNEGf64>; + +// Logical Arithmetic +defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; +defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; +defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; + +defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; +} + + +multiclass V2FPCONTRACT32_SUB_PAT { + def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), + (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), + (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, + Requires<[Pred]>; +} + +defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT; +defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT; +defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT; +defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT; + +multiclass V4FPCONTRACT32_SUB_PAT { + def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), + (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), + (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, + Requires<[Pred]>; +} + +defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT; +defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT; +defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT; +defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT; + +multiclass V2FPCONTRACT64_SUB_PAT { + def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), + (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, + Requires<[Pred]>; + + def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), + (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, + Requires<[Pred]>; +} + +defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT; + +class VecModStr +{ + string t1 = !strconcat("${c", elem); + string t2 = !strconcat(t1, ":vecv"); + string t3 = !strconcat(t2, vecsize); + string t4 = !strconcat(t3, extra); + string t5 = !strconcat(t4, l); + string s = !strconcat(t5, "}"); +} +class ShuffleOneLine +{ + string t1 = VecModStr.s; + string t2 = !strconcat(t1, "mov."); + string t3 = !strconcat(t2, type); + string t4 = !strconcat(t3, " \t${dst}_"); + string t5 = !strconcat(t4, elem); + string t6 = !strconcat(t5, ", $src1"); + string t7 = !strconcat(t6, VecModStr.s); + string t8 = !strconcat(t7, ";\n\t"); + string t9 = !strconcat(t8, VecModStr.s); + string t10 = !strconcat(t9, "mov."); + string t11 = !strconcat(t10, type); + string t12 = !strconcat(t11, " \t${dst}_"); + string t13 = !strconcat(t12, elem); + string t14 = !strconcat(t13, ", $src2"); + string t15 = !strconcat(t14, VecModStr.s); + string s = !strconcat(t15, ";"); +} +class ShuffleAsmStr2 +{ + string t1 = ShuffleOneLine<"2", "0", type>.s; + string t2 = !strconcat(t1, "\n\t"); + string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); +} +class ShuffleAsmStr4 +{ + string t1 = ShuffleOneLine<"4", "0", type>.s; + string t2 = !strconcat(t1, "\n\t"); + string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); + string t4 = !strconcat(t3, "\n\t"); + string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); + string t6 = !strconcat(t5, "\n\t"); + string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); +} + +let neverHasSideEffects=1, VecInstType=isVecShuffle.Value in { +def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), + (ins V4F32Regs:$src1, V4F32Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"f32">.s), + [], FMOV32rr>; + +def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), + (ins V4I32Regs:$src1, V4I32Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u32">.s), + [], IMOV32rr>; + +def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), + (ins V4I16Regs:$src1, V4I16Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u16">.s), + [], IMOV16rr>; + +def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), + (ins V4I8Regs:$src1, V4I8Regs:$src2, + i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", + ShuffleAsmStr4<"u16">.s), + [], IMOV8rr>; + +def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), + (ins V2F32Regs:$src1, V2F32Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"f32">.s), + [], FMOV32rr>; + +def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), + (ins V2I32Regs:$src1, V2I32Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u32">.s), + [], IMOV32rr>; + +def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), + (ins V2I8Regs:$src1, V2I8Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u16">.s), + [], IMOV8rr>; + +def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), + (ins V2I16Regs:$src1, V2I16Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u16">.s), + [], IMOV16rr>; + +def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), + (ins V2F64Regs:$src1, V2F64Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"f64">.s), + [], FMOV64rr>; + +def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), + (ins V2I64Regs:$src1, V2I64Regs:$src2, + i8imm:$c0, i8imm:$c1), + !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", + ShuffleAsmStr2<"u64">.s), + [], IMOV64rr>; +} + +def ShuffleMask0 : SDNodeXForm(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(0), MVT::i32); +}]>; +def ShuffleMask1 : SDNodeXForm(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(1), MVT::i32); +}]>; +def ShuffleMask2 : SDNodeXForm(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(2), MVT::i32); +}]>; +def ShuffleMask3 : SDNodeXForm(N); + return CurDAG->getTargetConstant(SVOp->getMaskElt(3), MVT::i32); +}]>; + +// The spurious call is here to silence a compiler warning about N being +// unused. +def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), + [{ N->getGluedNode(); return true; }]>; + +def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), + (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), + (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), + (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), + (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), + (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), + (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), + (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), + (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), + (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), + (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; + +def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), + (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, + (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; + +class Build_Vector2 + : NVPTXVecInst<(outs vclass:$dst), + (ins sclass:$a1, sclass:$a2), + !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), + [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], + si>; +class Build_Vector4 + : NVPTXVecInst<(outs vclass:$dst), + (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), + !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), + [(set vclass:$dst, + (build_vector sclass:$a1, sclass:$a2, + sclass:$a3, sclass:$a4))], si>; + +let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { +def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, + FMOV32rr>; +def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, + FMOV64rr>; + +def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, + IMOV32rr>; +def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, + IMOV64rr>; +def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, + IMOV16rr>; +def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, + IMOV8rr>; + +def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, + FMOV32rr>; + +def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, + IMOV32rr>; +def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, + IMOV16rr>; +def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, + IMOV8rr>; +} + +class Vec_Move + : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), + !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), + [], sop>; + +let isAsCheapAsAMove=1, neverHasSideEffects=1, IsSimpleMove=1, + VecInstType=isVecOther.Value in { +def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; +def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; + +def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; +def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; + +def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; +def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; + +def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; +def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; + +def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; +def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; +} + +// extract subvector patterns +def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", + SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; + +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), + (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), + (V4f32Extract V4F32Regs:$src, 1))>; +def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), + (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), + (V4f32Extract V4F32Regs:$src, 3))>; +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), + (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), + (V4i32Extract V4I32Regs:$src, 1))>; +def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), + (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), + (V4i32Extract V4I32Regs:$src, 3))>; +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), + (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), + (V4i16Extract V4I16Regs:$src, 1))>; +def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), + (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), + (V4i16Extract V4I16Regs:$src, 3))>; +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), + (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), + (V4i8Extract V4I8Regs:$src, 1))>; +def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), + (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), + (V4i8Extract V4I8Regs:$src, 3))>; + +// Select instructions +class Select_OneLine { + string t1 = !strconcat("selp.", type); + string t2 = !strconcat(t1, " \t${dst}_"); + string t3 = !strconcat(t2, pos); + string t4 = !strconcat(t3, ", ${src1}_"); + string t5 = !strconcat(t4, pos); + string t6 = !strconcat(t5, ", ${src2}_"); + string t7 = !strconcat(t6, pos); + string s = !strconcat(t7, ", $p;"); +} + +class Select_Str2 { + string t1 = Select_OneLine.s; + string t2 = !strconcat(t1, "\n\t"); + string s = !strconcat(t2, Select_OneLine.s); +} + +class Select_Str4 { + string t1 = Select_OneLine.s; + string t2 = !strconcat(t1, "\n\t"); + string t3 = !strconcat(t2, Select_OneLine.s); + string t4 = !strconcat(t3, "\n\t"); + string t5 = !strconcat(t4, Select_OneLine.s); + string t6 = !strconcat(t5, "\n\t"); + string s = !strconcat(t6, Select_OneLine.s); + +} + +class Vec_Select + : NVPTXVecInst<(outs vclass:$dst), + (ins vclass:$src1, vclass:$src2, Int1Regs:$p), + asmstr, + [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, + vclass:$src2))], + sop>; + +let VecInstType=isVecOther.Value in { +def V2I64_Select : Vec_Select.s, SELECTi64rr>; +def V4I32_Select : Vec_Select.s, SELECTi32rr>; +def V2I32_Select : Vec_Select.s, SELECTi32rr>; +def V4I16_Select : Vec_Select.s, SELECTi16rr>; +def V2I16_Select : Vec_Select.s, SELECTi16rr>; +def V4I8_Select : Vec_Select.s, SELECTi8rr>; +def V2I8_Select : Vec_Select.s, SELECTi8rr>; + +def V2F64_Select : Vec_Select.s, SELECTf64rr>; +def V4F32_Select : Vec_Select.s, SELECTf32rr>; +def V2F32_Select : Vec_Select.s, SELECTf32rr>; +} + +// Comparison instructions + +// setcc convenience fragments. +def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOEQ)>; +def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOGT)>; +def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOGE)>; +def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOLT)>; +def vsetole : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETOLE)>; +def vsetone : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETONE)>; +def vseto : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETO)>; +def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUO)>; +def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUEQ)>; +def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUGT)>; +def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUGE)>; +def vsetult : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETULT)>; +def vsetule : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETULE)>; +def vsetune : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETUNE)>; +def vseteq : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETEQ)>; +def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETGT)>; +def vsetge : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETGE)>; +def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETLT)>; +def vsetle : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETLE)>; +def vsetne : PatFrag<(ops node:$lhs, node:$rhs), + (setcc node:$lhs, node:$rhs, SETNE)>; + +class Vec_Compare + : NVPTXVecInst<(outs outrclass:$dst), + (ins inrclass:$a, inrclass:$b), + "Unsupported", + [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], + sop>; + +multiclass Vec_Compare_All +{ + def V2I8 : Vec_Compare; + def V4I8 : Vec_Compare; + def V2I16 : Vec_Compare; + def V4I16 : Vec_Compare; + def V2I32 : Vec_Compare; + def V4I32 : Vec_Compare; + def V2I64 : Vec_Compare; +} + +let VecInstType=isVecOther.Value in { + defm VecSGT : Vec_Compare_All; + defm VecUGT : Vec_Compare_All; + defm VecSLT : Vec_Compare_All; + defm VecULT : Vec_Compare_All; + defm VecSGE : Vec_Compare_All; + defm VecUGE : Vec_Compare_All; + defm VecSLE : Vec_Compare_All; + defm VecULE : Vec_Compare_All; + defm VecSEQ : Vec_Compare_All; + defm VecUEQ : Vec_Compare_All; + defm VecSNE : Vec_Compare_All; + defm VecUNE : Vec_Compare_All; +} + +multiclass FVec_Compare_All +{ + def V2F32 : Vec_Compare; + def V4F32 : Vec_Compare; + def V2F64 : Vec_Compare; +} + +let VecInstType=isVecOther.Value in { + defm FVecGT : FVec_Compare_All; + defm FVecLT : FVec_Compare_All; + defm FVecGE : FVec_Compare_All; + defm FVecLE : FVec_Compare_All; + defm FVecEQ : FVec_Compare_All; + defm FVecNE : FVec_Compare_All; + + defm FVecUGT : FVec_Compare_All; + defm FVecULT : FVec_Compare_All; + defm FVecUGE : FVec_Compare_All; + defm FVecULE : FVec_Compare_All; + defm FVecUEQ : FVec_Compare_All; + defm FVecUNE : FVec_Compare_All; + + defm FVecNUM : FVec_Compare_All; + defm FVecNAN : FVec_Compare_All; +} + +class LoadParamScalar4Inst : + NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), + (ins i32imm:$a, i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; + +class LoadParamScalar2Inst : + NVPTXInst<(outs regclass:$d1, regclass:$d2), + (ins i32imm:$a, i32imm:$b), + !strconcat(!strconcat("ld.param", opstr), + "\t{{$d1, $d2}}, [retval0+$b];"), []>; + + +class StoreParamScalar4Inst : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, + i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; + +class StoreParamScalar2Inst : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), + !strconcat(!strconcat("st.param", opstr), + "\t[param$a+$b], {{$s1, $s2}};"), []>; + +class StoreRetvalScalar4Inst : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, + i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; + +class StoreRetvalScalar2Inst : + NVPTXInst<(outs), + (ins regclass:$s1, regclass:$s2, i32imm:$a), + !strconcat(!strconcat("st.param", opstr), + "\t[func_retval+$a], {{$s1, $s2}};"), []>; + +def LoadParamScalar4I32 : LoadParamScalar4Inst; +def LoadParamScalar4I16 : LoadParamScalar4Inst; +def LoadParamScalar4I8 : LoadParamScalar4Inst; + +def LoadParamScalar2I64 : LoadParamScalar2Inst; +def LoadParamScalar2I32 : LoadParamScalar2Inst; +def LoadParamScalar2I16 : LoadParamScalar2Inst; +def LoadParamScalar2I8 : LoadParamScalar2Inst; + +def LoadParamScalar4F32 : LoadParamScalar4Inst; +def LoadParamScalar2F32 : LoadParamScalar2Inst; +def LoadParamScalar2F64 : LoadParamScalar2Inst; + +def StoreParamScalar4I32 : StoreParamScalar4Inst; +def StoreParamScalar4I16 : StoreParamScalar4Inst; +def StoreParamScalar4I8 : StoreParamScalar4Inst; + +def StoreParamScalar2I64 : StoreParamScalar2Inst; +def StoreParamScalar2I32 : StoreParamScalar2Inst; +def StoreParamScalar2I16 : StoreParamScalar2Inst; +def StoreParamScalar2I8 : StoreParamScalar2Inst; + +def StoreParamScalar4F32 : StoreParamScalar4Inst; +def StoreParamScalar2F32 : StoreParamScalar2Inst; +def StoreParamScalar2F64 : StoreParamScalar2Inst; + +def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst; +def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst; +def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst; + +def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst; +def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst; +def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst; +def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst; + +def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst; +def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst; +def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst; + +class LoadParamVecInst: + NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), + "loadparam : $dst <- [$a, $b]", + [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], + sop>; + +class StoreParamVecInst + : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + "storeparam : [$a, $b] <- $val", + [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; + +class StoreRetvalVecInst + : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), + "storeretval : retval[$a] <- $val", + [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; + +let VecInstType=isVecLD.Value in { +def LoadParamV4I32 : LoadParamVecInst; +def LoadParamV4I16 : LoadParamVecInst; +def LoadParamV4I8 : LoadParamVecInst; + +def LoadParamV2I64 : LoadParamVecInst; +def LoadParamV2I32 : LoadParamVecInst; +def LoadParamV2I16 : LoadParamVecInst; +def LoadParamV2I8 : LoadParamVecInst; + +def LoadParamV4F32 : LoadParamVecInst; +def LoadParamV2F32 : LoadParamVecInst; +def LoadParamV2F64 : LoadParamVecInst; +} + +let VecInstType=isVecST.Value in { +def StoreParamV4I32 : StoreParamVecInst; +def StoreParamV4I16 : StoreParamVecInst; +def StoreParamV4I8 : StoreParamVecInst; + +def StoreParamV2I64 : StoreParamVecInst; +def StoreParamV2I32 : StoreParamVecInst; +def StoreParamV2I16 : StoreParamVecInst; +def StoreParamV2I8 : StoreParamVecInst; + +def StoreParamV4F32 : StoreParamVecInst; +def StoreParamV2F32 : StoreParamVecInst; +def StoreParamV2F64 : StoreParamVecInst; + +def StoreRetvalV4I32 : StoreRetvalVecInst; +def StoreRetvalV4I16 : StoreRetvalVecInst; +def StoreRetvalV4I8 : StoreRetvalVecInst; + +def StoreRetvalV2I64 : StoreRetvalVecInst; +def StoreRetvalV2I32 : StoreRetvalVecInst; +def StoreRetvalV2I16 : StoreRetvalVecInst; +def StoreRetvalV2I8 : StoreRetvalVecInst; + +def StoreRetvalV4F32 : StoreRetvalVecInst; +def StoreRetvalV2F32 : StoreRetvalVecInst; +def StoreRetvalV2F64 : StoreRetvalVecInst; + +} + + +// Int vector to int scalar bit convert +// v4i8 -> i32 +def : Pat<(i32 (bitconvert V4I8Regs:$s)), + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; +// v4i16 -> i64 +def : Pat<(i64 (bitconvert V4I16Regs:$s)), + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), + (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), + (V4i16Extract V4I16Regs:$s,3))>; +// v2i8 -> i16 +def : Pat<(i16 (bitconvert V2I8Regs:$s)), + (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; +// v2i16 -> i32 +def : Pat<(i32 (bitconvert V2I16Regs:$s)), + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), + (V2i16Extract V2I16Regs:$s,1))>; +// v2i32 -> i64 +def : Pat<(i64 (bitconvert V2I32Regs:$s)), + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), + (V2i32Extract V2I32Regs:$s,1))>; + +// Int scalar to int vector bit convert +let VecInstType=isVecDest.Value in { +// i32 -> v4i8 +def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), + "Error!", + [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], + I32toV4I8>; +// i64 -> v4i16 +def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), + "Error!", + [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], + I64toV4I16>; +// i16 -> v2i8 +def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), + "Error!", + [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], + I16toV2I8>; +// i32 -> v2i16 +def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), + "Error!", + [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], + I32toV2I16>; +// i64 -> v2i32 +def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), + "Error!", + [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], + I64toV2I32>; +} + +// Int vector to int vector bit convert +// v4i8 -> v2i16 +def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), + (VecI32toV2I16 + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; +// v4i16 -> v2i32 +def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), + (VecI64toV2I32 + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; +// v2i16 -> v4i8 +def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), + (VecI32toV4I8 + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; +// v2i32 -> v4i16 +def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), + (VecI64toV4I16 + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; +// v2i64 -> v4i32 +def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), + (Build_Vector4_i32 + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), + (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; +// v4i32 -> v2i64 +def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), + (Build_Vector2_i64 + (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), + (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; + +// Fp scalar to fp vector convert +// f64 -> v2f32 +let VecInstType=isVecDest.Value in { +def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), + "Error!", + [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], + F64toV2F32>; +} + +// Fp vector to fp scalar convert +// v2f32 -> f64 +def : Pat<(f64 (bitconvert V2F32Regs:$s)), + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; + +// Fp scalar to int vector convert +// f32 -> v4i8 +def : Pat<(v4i8 (bitconvert Float32Regs:$s)), + (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; +// f32 -> v2i16 +def : Pat<(v2i16 (bitconvert Float32Regs:$s)), + (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; +// f64 -> v4i16 +def : Pat<(v4i16 (bitconvert Float64Regs:$s)), + (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; +// f64 -> v2i32 +def : Pat<(v2i32 (bitconvert Float64Regs:$s)), + (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; + +// Int vector to fp scalar convert +// v4i8 -> f32 +def : Pat<(f32 (bitconvert V4I8Regs:$s)), + (BITCONVERT_32_I2F + (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), + (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; +// v4i16 -> f64 +def : Pat<(f64 (bitconvert V4I16Regs:$s)), + (BITCONVERT_64_I2F + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; +// v2i16 -> f32 +def : Pat<(f32 (bitconvert V2I16Regs:$s)), + (BITCONVERT_32_I2F + (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; +// v2i32 -> f64 +def : Pat<(f64 (bitconvert V2I32Regs:$s)), + (BITCONVERT_64_I2F + (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; + +// Int scalar to fp vector convert +// i64 -> v2f32 +def : Pat<(v2f32 (bitconvert Int64Regs:$s)), + (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; + +// Fp vector to int scalar convert +// v2f32 -> i64 +def : Pat<(i64 (bitconvert V2F32Regs:$s)), + (BITCONVERT_64_F2I + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; + +// Int vector to fp vector convert +// v2i64 -> v4f32 +def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), + (Build_Vector4_f32 + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 0)), 0)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 0)), 1)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 1)), 0)), + (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 + (V2i64Extract V2I64Regs:$s, 1)), 1)))>; +// v2i64 -> v2f64 +def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), + (Build_Vector2_f64 + (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), + (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; +// v2i32 -> v2f32 +def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), + (Build_Vector2_f32 + (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), + (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; +// v4i32 -> v2f64 +def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), + (Build_Vector2_f64 + (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), + (V4i32Extract V4I32Regs:$s,1))), + (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), + (V4i32Extract V4I32Regs:$s,3))))>; +// v4i32 -> v4f32 +def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), + (Build_Vector4_f32 + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), + (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; +// v4i16 -> v2f32 +def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), + (VecF64toV2F32 (BITCONVERT_64_I2F + (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), + (V4i16Extract V4I16Regs:$s,1), + (V4i16Extract V4I16Regs:$s,2), + (V4i16Extract V4I16Regs:$s,3))))>; + +// Fp vector to int vector convert +// v2i64 <- v4f32 +def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), + (Build_Vector2_i64 + (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), + (V4f32Extract V4F32Regs:$s,1))), + (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), + (V4f32Extract V4F32Regs:$s,3))))>; +// v2i64 <- v2f64 +def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), + (Build_Vector2_i64 + (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), + (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; +// v2i32 <- v2f32 +def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), + (Build_Vector2_i32 + (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), + (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; +// v4i32 <- v2f64 +def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), + (Build_Vector4_i32 + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 0)), 0)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 0)), 1)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 1)), 0)), + (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 + (V2f64Extract V2F64Regs:$s, 1)), 1)))>; +// v4i32 <- v4f32 +def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), + (Build_Vector4_i32 + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), + (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; +// v4i16 <- v2f32 +def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), + (VecI64toV4I16 (BITCONVERT_64_F2I + (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), + (V2f32Extract V2F32Regs:$s,1))))>; diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp new file mode 100644 index 0000000..6a0e532 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -0,0 +1,92 @@ +//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the functions that can be used in CodeGen. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXutil.h" +#include "NVPTX.h" + +using namespace llvm; + +namespace llvm { + +bool isParamLoad(const MachineInstr *MI) +{ + if ((MI->getOpcode() != NVPTX::LD_i32_avar) && + (MI->getOpcode() != NVPTX::LD_i64_avar)) + return false; + if (MI->getOperand(2).isImm() == false) + return false; + if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM) + return false; + return true; +} + +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 + +static int encode_leb128(uint64_t val, int *nbytes, + char *space, int splen) +{ + char *a; + char *end = space + splen; + + a = space; + do { + unsigned char uc; + + if (a >= end) + return 1; + uc = val & DATA_MASK; + val >>= DIGIT_WIDTH; + if (val != 0) + uc |= MORE_BYTES; + *a = uc; + a++; + } while (val); + *nbytes = a - space; + return 0; +} + +#undef DATA_MASK +#undef DIGIT_WIDTH +#undef MORE_BYTES + +uint64_t encode_leb128(const char *str) +{ + union { uint64_t x; char a[8]; } temp64; + + temp64.x = 0; + + for (unsigned i=0,e=strlen(str); i!=e; ++i) + temp64.a[i] = str[e-1-i]; + + char encoded[16]; + int nbytes; + + int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); + + (void)retval; + assert(retval == 0 && + "Encoding to leb128 failed"); + + assert(nbytes <= 8 && + "Cannot support register names with leb128 encoding > 8 bytes"); + + temp64.x = 0; + for (int i=0; i X(TheNVPTXTarget32, "nvptx", + "NVIDIA PTX 32-bit"); + RegisterTarget Y(TheNVPTXTarget64, "nvptx64", + "NVIDIA PTX 64-bit"); +} diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp new file mode 100644 index 0000000..8043e2d --- /dev/null +++ b/lib/Target/NVPTX/VectorElementize.cpp @@ -0,0 +1,1248 @@ +//===-- VectorElementize.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts operations on vector types to operations on their +// element types. +// +// For generic binary and unary vector instructions, the conversion is simple. +// Suppose we have +// av = bv Vop cv +// where av, bv, and cv are vector virtual registers, and Vop is a vector op. +// This gets converted to the following : +// a1 = b1 Sop c1 +// a2 = b2 Sop c2 +// +// VectorToScalarMap maintains the vector vreg to scalar vreg mapping. +// For the above example, the map will look as follows: +// av => [a1, a2] +// bv => [b1, b2] +// +// In addition, initVectorInfo creates the following opcode->opcode map. +// Vop => Sop +// OtherVop => OtherSop +// ... +// +// For vector specific instructions like vecbuild, vecshuffle etc, the +// conversion is different. Look at comments near the functions with +// prefix createVec<...>. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Constant.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "NVPTX.h" +#include "NVPTXTargetMachine.h" + +using namespace llvm; + +namespace { + +class LLVM_LIBRARY_VISIBILITY VectorElementize : public MachineFunctionPass { + virtual bool runOnMachineFunction(MachineFunction &F); + + NVPTXTargetMachine &TM; + MachineRegisterInfo *MRI; + const NVPTXRegisterInfo *RegInfo; + const NVPTXInstrInfo *InstrInfo; + + llvm::DenseMap + RegClassMap; + llvm::DenseMap SimpleMoveMap; + + llvm::DenseMap > VectorToScalarMap; + + bool isVectorInstr(MachineInstr *); + + SmallVector getScalarRegisters(unsigned); + unsigned getScalarVersion(unsigned); + unsigned getScalarVersion(MachineInstr *); + + bool isVectorRegister(unsigned); + const TargetRegisterClass *getScalarRegClass(const TargetRegisterClass *RC); + unsigned numCopiesNeeded(MachineInstr *); + + void createLoadCopy(MachineFunction&, MachineInstr *, + std::vector&); + void createStoreCopy(MachineFunction&, MachineInstr *, + std::vector&); + + void createVecDest(MachineFunction&, MachineInstr *, + std::vector&); + + void createCopies(MachineFunction&, MachineInstr *, + std::vector&); + + unsigned copyProp(MachineFunction&); + unsigned removeDeadMoves(MachineFunction&); + + void elementize(MachineFunction&); + + bool isSimpleMove(MachineInstr *); + + void createVecShuffle(MachineFunction& F, MachineInstr *Instr, + std::vector& copies); + + void createVecExtract(MachineFunction& F, MachineInstr *Instr, + std::vector& copies); + + void createVecInsert(MachineFunction& F, MachineInstr *Instr, + std::vector& copies); + + void createVecBuild(MachineFunction& F, MachineInstr *Instr, + std::vector& copies); + +public: + + static char ID; // Pass identification, replacement for typeid + VectorElementize(NVPTXTargetMachine &tm) + : MachineFunctionPass(ID), TM(tm) {} + + virtual const char *getPassName() const { + return "Convert LLVM vector types to their element types"; + } +}; + +char VectorElementize::ID = 1; +} + +static cl::opt +RemoveRedundantMoves("nvptx-remove-redundant-moves", + cl::desc("NVPTX: Remove redundant moves introduced by vector lowering"), + cl::init(true)); + +#define VECINST(x) ((((x)->getDesc().TSFlags) & NVPTX::VecInstTypeMask) \ + >> NVPTX::VecInstTypeShift) +#define ISVECINST(x) (VECINST(x) != NVPTX::VecNOP) +#define ISVECLOAD(x) (VECINST(x) == NVPTX::VecLoad) +#define ISVECSTORE(x) (VECINST(x) == NVPTX::VecStore) +#define ISVECBUILD(x) (VECINST(x) == NVPTX::VecBuild) +#define ISVECSHUFFLE(x) (VECINST(x) == NVPTX::VecShuffle) +#define ISVECEXTRACT(x) (VECINST(x) == NVPTX::VecExtract) +#define ISVECINSERT(x) (VECINST(x) == NVPTX::VecInsert) +#define ISVECDEST(x) (VECINST(x) == NVPTX::VecDest) + +bool VectorElementize::isSimpleMove(MachineInstr *mi) { + if (mi->isCopy()) + return true; + unsigned TSFlags = (mi->getDesc().TSFlags & NVPTX::SimpleMoveMask) + >> NVPTX::SimpleMoveShift; + return (TSFlags == 1); +} + +bool VectorElementize::isVectorInstr(MachineInstr *mi) { + if ((mi->getOpcode() == NVPTX::PHI) || + (mi->getOpcode() == NVPTX::IMPLICIT_DEF) || mi->isCopy()) { + MachineOperand dest = mi->getOperand(0); + return isVectorRegister(dest.getReg()); + } + return ISVECINST(mi); +} + +unsigned VectorElementize::getScalarVersion(MachineInstr *mi) { + return getScalarVersion(mi->getOpcode()); +} + +///============================================================================= +///Instr is assumed to be a vector instruction. For most vector instructions, +///the size of the destination vector register gives the number of scalar copies +///needed. For VecStore, size of getOperand(1) gives the number of scalar copies +///needed. For VecExtract, the dest is a scalar. So getOperand(1) gives the +///number of scalar copies needed. +///============================================================================= +unsigned VectorElementize::numCopiesNeeded(MachineInstr *Instr) { + unsigned numDefs=0; + unsigned def; + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + + if (!oper.isReg()) continue; + if (!oper.isDef()) continue; + def = i; + numDefs++; + } + assert((numDefs <= 1) && "Only 0 or 1 defs supported"); + + if (numDefs == 1) { + unsigned regnum = Instr->getOperand(def).getReg(); + if (ISVECEXTRACT(Instr)) + regnum = Instr->getOperand(1).getReg(); + return getNVPTXVectorSize(MRI->getRegClass(regnum)); + } + else if (numDefs == 0) { + assert(ISVECSTORE(Instr) + && "Only 0 def instruction supported is vector store"); + + unsigned regnum = Instr->getOperand(0).getReg(); + return getNVPTXVectorSize(MRI->getRegClass(regnum)); + } + return 1; +} + +const TargetRegisterClass *VectorElementize:: +getScalarRegClass(const TargetRegisterClass *RC) { + assert(isNVPTXVectorRegClass(RC) && + "Not a vector register class"); + return getNVPTXElemClass(RC); +} + +bool VectorElementize::isVectorRegister(unsigned reg) { + const TargetRegisterClass *RC=MRI->getRegClass(reg); + return isNVPTXVectorRegClass(RC); +} + +///============================================================================= +///For every vector register 'v' that is not already in the VectorToScalarMap, +///create n scalar registers of the corresponding element type, where n +///is 2 or 4 (getNVPTXVectorSize) and add it VectorToScalarMap. +///============================================================================= +SmallVector VectorElementize::getScalarRegisters(unsigned regnum) { + assert(isVectorRegister(regnum) && "Expecting a vector register here"); + // Create the scalar registers and put them in the map, if not already there. + if (VectorToScalarMap.find(regnum) == VectorToScalarMap.end()) { + const TargetRegisterClass *vecClass = MRI->getRegClass(regnum); + const TargetRegisterClass *scalarClass = getScalarRegClass(vecClass); + + SmallVector temp; + + for (unsigned i=0, e=getNVPTXVectorSize(vecClass); i!=e; ++i) + temp.push_back(MRI->createVirtualRegister(scalarClass)); + + VectorToScalarMap[regnum] = temp; + } + return VectorToScalarMap[regnum]; +} + +///============================================================================= +///For a vector load of the form +///va <= ldv2 [addr] +///the following multi output instruction is created : +///[v1, v2] <= LD [addr] +///Look at NVPTXVector.td for the definitions of multi output loads. +///============================================================================= +void VectorElementize::createLoadCopy(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + // Remove the dest, that should be a vector operand. + MachineOperand dest = copy->getOperand(0); + unsigned regnum = dest.getReg(); + + SmallVector scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) { + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); + } + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); + +} + +///============================================================================= +///For a vector store of the form +///stv2 va, [addr] +///the following multi input instruction is created : +///ST v1, v2, [addr] +///Look at NVPTXVector.td for the definitions of multi input stores. +///============================================================================= +void VectorElementize::createStoreCopy(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + MachineOperand src = copy->getOperand(0); + unsigned regnum = src.getReg(); + + SmallVector scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], false)); + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); +} + +///============================================================================= +///va <= shufflev2 vb, vc, , +///gets converted to 2 moves into a1 and a2. The source of the moves depend on +///i1 and i2. i1, i2 can belong to the set {0, 1, 2, 3} for shufflev2. For +///shufflev4 the set is {0,..7}. For example, if i1=3, i2=0, the move +///instructions will be +///a1 <= c2 +///a2 <= b1 +///============================================================================= +void VectorElementize::createVecShuffle(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + unsigned src1regnum = Instr->getOperand(1).getReg(); + unsigned src2regnum = Instr->getOperand(2).getReg(); + + SmallVector dest = getScalarRegisters(destregnum); + SmallVector src1 = getScalarRegisters(src1regnum); + SmallVector src2 = getScalarRegisters(src2regnum); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; iget(getScalarVersion(Instr)), dest[i]); + MachineOperand which=Instr->getOperand(3+i); + assert(which.isImm() && "Shuffle operand not a constant"); + + int src=which.getImm(); + int elem=src%numcopies; + + if (which.getImm() < numcopies) + copy->addOperand(MachineOperand::CreateReg(src1[elem], false)); + else + copy->addOperand(MachineOperand::CreateReg(src2[elem], false)); + copies.push_back(copy); + } +} + +///============================================================================= +///a <= extractv2 va, +///gets turned into a simple move to the scalar register a. The source depends +///on i1. +///============================================================================= +void VectorElementize::createVecExtract(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + unsigned srcregnum = Instr->getOperand(1).getReg(); + + SmallVector src = getScalarRegisters(srcregnum); + + MachineOperand which = Instr->getOperand(2); + assert(which.isImm() && "Extract operand not a constant"); + + DebugLoc DL = Instr->getDebugLoc(); + + MachineInstr *copy = BuildMI(F, DL, InstrInfo->get(getScalarVersion(Instr)), + Instr->getOperand(0).getReg()); + copy->addOperand(MachineOperand::CreateReg(src[which.getImm()], false)); + + copies.push_back(copy); +} + +///============================================================================= +///va <= vecinsertv2 vb, c, +///This instruction copies all elements of vb to va, except the 'i1'th element. +///The scalar value c becomes the 'i1'th element of va. +///This gets translated to 2 (4 for vecinsertv4) moves. +///============================================================================= +void VectorElementize::createVecInsert(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + unsigned srcregnum = Instr->getOperand(1).getReg(); + + SmallVector dest = getScalarRegisters(destregnum); + SmallVector src = getScalarRegisters(srcregnum); + + MachineOperand which=Instr->getOperand(3); + assert(which.isImm() && "Insert operand not a constant"); + unsigned int elem=which.getImm(); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; iget(getScalarVersion(Instr)), dest[i]); + + if (i != elem) + copy->addOperand(MachineOperand::CreateReg(src[i], false)); + else + copy->addOperand(Instr->getOperand(2)); + + copies.push_back(copy); + } + +} + +///============================================================================= +///va <= buildv2 b1, b2 +///gets translated to +///a1 <= b1 +///a2 <= b2 +///============================================================================= +void VectorElementize::createVecBuild(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + unsigned numcopies=numCopiesNeeded(Instr); + + unsigned destregnum = Instr->getOperand(0).getReg(); + + SmallVector dest = getScalarRegisters(destregnum); + + DebugLoc DL = Instr->getDebugLoc(); + + for (unsigned i=0; iget(getScalarVersion(Instr)), dest[i]); + + copy->addOperand(Instr->getOperand(1+i)); + + copies.push_back(copy); + } + +} + +///============================================================================= +///For a tex inst of the form +///va <= op [scalar operands] +///the following multi output instruction is created : +///[v1, v2] <= op' [scalar operands] +///============================================================================= +void VectorElementize::createVecDest(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + copies.push_back(F.CloneMachineInstr(Instr)); + + MachineInstr *copy=copies[0]; + copy->setDesc(InstrInfo->get(getScalarVersion(copy))); + + // Remove the dest, that should be a vector operand. + MachineOperand dest = copy->getOperand(0); + unsigned regnum = dest.getReg(); + + SmallVector scalarRegs = getScalarRegisters(regnum); + copy->RemoveOperand(0); + + std::vector otherOperands; + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + otherOperands.push_back(copy->getOperand(i)); + + for (unsigned i=0, e=copy->getNumOperands(); i!=e; ++i) + copy->RemoveOperand(0); + + for (unsigned i=0, e=scalarRegs.size(); i!=e; ++i) + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], true)); + + for (unsigned i=0, e=otherOperands.size(); i!=e; ++i) + copy->addOperand(otherOperands[i]); +} + +///============================================================================= +///Look at the vector instruction type and dispatch to the createVec<...> +///function that creates the scalar copies. +///============================================================================= +void VectorElementize::createCopies(MachineFunction& F, MachineInstr *Instr, + std::vector& copies) { + if (ISVECLOAD(Instr)) { + createLoadCopy(F, Instr, copies); + return; + } + if (ISVECSTORE(Instr)) { + createStoreCopy(F, Instr, copies); + return; + } + if (ISVECSHUFFLE(Instr)) { + createVecShuffle(F, Instr, copies); + return; + } + if (ISVECEXTRACT(Instr)) { + createVecExtract(F, Instr, copies); + return; + } + if (ISVECINSERT(Instr)) { + createVecInsert(F, Instr, copies); + return; + } + if (ISVECDEST(Instr)) { + createVecDest(F, Instr, copies); + return; + } + if (ISVECBUILD(Instr)) { + createVecBuild(F, Instr, copies); + return; + } + + unsigned numcopies=numCopiesNeeded(Instr); + + for (unsigned i=0; i allOperands; + std::vector isDef; + + for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) { + MachineOperand oper = copy->getOperand(j); + allOperands.push_back(oper); + if (oper.isReg()) + isDef.push_back(oper.isDef()); + else + isDef.push_back(false); + } + + for (unsigned j=0, e=copy->getNumOperands(); j!=e; ++j) + copy->RemoveOperand(0); + + copy->setDesc(InstrInfo->get(getScalarVersion(Instr))); + + for (unsigned j=0, e=allOperands.size(); j!=e; ++j) { + MachineOperand oper=allOperands[j]; + if (oper.isReg()) { + unsigned regnum = oper.getReg(); + if (isVectorRegister(regnum)) { + + SmallVector scalarRegs = getScalarRegisters(regnum); + copy->addOperand(MachineOperand::CreateReg(scalarRegs[i], isDef[j])); + } + else + copy->addOperand(oper); + } + else + copy->addOperand(oper); + } + } +} + +///============================================================================= +///Scan through all basic blocks, looking for vector instructions. +///For each vector instruction I, insert the scalar copies before I, and +///add I into toRemove vector. Finally remove all instructions in toRemove. +///============================================================================= +void VectorElementize::elementize(MachineFunction &F) { + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); + BI!=BE; ++BI) { + MachineBasicBlock *BB = &*BI; + + std::vector copies; + std::vector toRemove; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); + II!=IE; ++II) { + MachineInstr *Instr = &*II; + + if (!isVectorInstr(Instr)) + continue; + + copies.clear(); + createCopies(F, Instr, copies); + for (unsigned i=0, e=copies.size(); i!=e; ++i) + BB->insert(II, copies[i]); + + assert((copies.size() > 0) && "Problem in createCopies"); + toRemove.push_back(Instr); + } + for (unsigned i=0, e=toRemove.size(); i!=e; ++i) + F.DeleteMachineInstr(toRemove[i]->getParent()->remove(toRemove[i])); + } +} + +///============================================================================= +///a <= b +///... +///... +///x <= op(a, ...) +///gets converted to +/// +///x <= op(b, ...) +///The original move is still present. This works on SSA form machine code. +///Note that a <= b should be a simple vreg-to-vreg move instruction. +///TBD : I didn't find a function that can do replaceOperand, so I remove +///all operands and add all of them again, replacing the one while adding. +///============================================================================= +unsigned VectorElementize::copyProp(MachineFunction &F) { + unsigned numReplacements = 0; + + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; + ++BI) { + MachineBasicBlock *BB = &*BI; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; + ++II) { + MachineInstr *Instr = &*II; + + // Don't do copy propagation on PHI as it will cause unnecessary + // live range overlap. + if ((Instr->getOpcode() == TargetOpcode::PHI) || + (Instr->getOpcode() == TargetOpcode::DBG_VALUE)) + continue; + + bool needsReplacement = false; + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + if (!oper.isReg()) continue; + if (oper.isDef()) continue; + if (!RegInfo->isVirtualRegister(oper.getReg())) continue; + + MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); + + if (!defInstr) continue; + + if (!isSimpleMove(defInstr)) continue; + + MachineOperand defSrc = defInstr->getOperand(1); + if (!defSrc.isReg()) continue; + if (!RegInfo->isVirtualRegister(defSrc.getReg())) continue; + + needsReplacement = true; + + } + if (!needsReplacement) continue; + + numReplacements++; + + std::vector operands; + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) { + MachineOperand oper = Instr->getOperand(i); + bool flag = false; + do { + if (!(oper.isReg())) + break; + if (oper.isDef()) + break; + if (!(RegInfo->isVirtualRegister(oper.getReg()))) + break; + MachineInstr *defInstr = MRI->getVRegDef(oper.getReg()); + if (!(isSimpleMove(defInstr))) + break; + MachineOperand defSrc = defInstr->getOperand(1); + if (!(defSrc.isReg())) + break; + if (!(RegInfo->isVirtualRegister(defSrc.getReg()))) + break; + operands.push_back(defSrc); + flag = true; + } while (0); + if (flag == false) + operands.push_back(oper); + } + + for (unsigned i=0, e=Instr->getNumOperands(); i!=e; ++i) + Instr->RemoveOperand(0); + for (unsigned i=0, e=operands.size(); i!=e; ++i) + Instr->addOperand(operands[i]); + + } + } + return numReplacements; +} + +///============================================================================= +///Look for simple vreg-to-vreg instructions whose use_empty() is true, add +///them to deadMoves vector. Then remove all instructions in deadMoves. +///============================================================================= +unsigned VectorElementize::removeDeadMoves(MachineFunction &F) { + std::vector deadMoves; + for (MachineFunction::reverse_iterator BI=F.rbegin(), BE=F.rend(); BI!=BE; + ++BI) { + MachineBasicBlock *BB = &*BI; + + for (MachineBasicBlock::iterator II=BB->begin(), IE=BB->end(); II!=IE; + ++II) { + MachineInstr *Instr = &*II; + + if (!isSimpleMove(Instr)) continue; + + MachineOperand dest = Instr->getOperand(0); + assert(dest.isReg() && "dest of move not a register"); + assert(RegInfo->isVirtualRegister(dest.getReg()) && + "dest of move not a virtual register"); + + if (MRI->use_empty(dest.getReg())) { + deadMoves.push_back(Instr); + } + } + } + + for (unsigned i=0, e=deadMoves.size(); i!=e; ++i) + F.DeleteMachineInstr(deadMoves[i]->getParent()->remove(deadMoves[i])); + + return deadMoves.size(); +} + +///============================================================================= +///Main function for this pass. +///============================================================================= +bool VectorElementize::runOnMachineFunction(MachineFunction &F) { + MRI = &F.getRegInfo(); + + RegInfo = TM.getRegisterInfo(); + InstrInfo = TM.getInstrInfo(); + + VectorToScalarMap.clear(); + + elementize(F); + + if (RemoveRedundantMoves) + while (1) { + if (copyProp(F) == 0) break; + removeDeadMoves(F); + } + + return true; +} + +FunctionPass *llvm::createVectorElementizePass(NVPTXTargetMachine &tm) { + return new VectorElementize(tm); +} + +unsigned VectorElementize::getScalarVersion(unsigned opcode) { + if (opcode == NVPTX::PHI) + return opcode; + if (opcode == NVPTX::IMPLICIT_DEF) + return opcode; + switch(opcode) { + default: llvm_unreachable("Scalar version not set, fix NVPTXVector.td"); + case TargetOpcode::COPY: return TargetOpcode::COPY; + case NVPTX::AddCCCV2I32: return NVPTX::ADDCCCi32rr; + case NVPTX::AddCCCV4I32: return NVPTX::ADDCCCi32rr; + case NVPTX::AddCCV2I32: return NVPTX::ADDCCi32rr; + case NVPTX::AddCCV4I32: return NVPTX::ADDCCi32rr; + case NVPTX::Build_Vector2_f32: return NVPTX::FMOV32rr; + case NVPTX::Build_Vector2_f64: return NVPTX::FMOV64rr; + case NVPTX::Build_Vector2_i16: return NVPTX::IMOV16rr; + case NVPTX::Build_Vector2_i32: return NVPTX::IMOV32rr; + case NVPTX::Build_Vector2_i64: return NVPTX::IMOV64rr; + case NVPTX::Build_Vector2_i8: return NVPTX::IMOV8rr; + case NVPTX::Build_Vector4_f32: return NVPTX::FMOV32rr; + case NVPTX::Build_Vector4_i16: return NVPTX::IMOV16rr; + case NVPTX::Build_Vector4_i32: return NVPTX::IMOV32rr; + case NVPTX::Build_Vector4_i8: return NVPTX::IMOV8rr; + case NVPTX::CVTv2i16tov2i32: return NVPTX::Zint_extendext16to32; + case NVPTX::CVTv2i64tov2i32: return NVPTX::TRUNC_64to32; + case NVPTX::CVTv2i8tov2i32: return NVPTX::Zint_extendext8to32; + case NVPTX::CVTv4i16tov4i32: return NVPTX::Zint_extendext16to32; + case NVPTX::CVTv4i8tov4i32: return NVPTX::Zint_extendext8to32; + case NVPTX::F32MAD_ftzV2: return NVPTX::FMAD32_ftzrrr; + case NVPTX::F32MADV2: return NVPTX::FMAD32rrr; + case NVPTX::F32MAD_ftzV4: return NVPTX::FMAD32_ftzrrr; + case NVPTX::F32MADV4: return NVPTX::FMAD32rrr; + case NVPTX::F32FMA_ftzV2: return NVPTX::FMA32_ftzrrr; + case NVPTX::F32FMAV2: return NVPTX::FMA32rrr; + case NVPTX::F32FMA_ftzV4: return NVPTX::FMA32_ftzrrr; + case NVPTX::F32FMAV4: return NVPTX::FMA32rrr; + case NVPTX::F64FMAV2: return NVPTX::FMA64rrr; + case NVPTX::FVecEQV2F32: return NVPTX::FSetEQf32rr_toi32; + case NVPTX::FVecEQV2F64: return NVPTX::FSetEQf64rr_toi64; + case NVPTX::FVecEQV4F32: return NVPTX::FSetEQf32rr_toi32; + case NVPTX::FVecGEV2F32: return NVPTX::FSetGEf32rr_toi32; + case NVPTX::FVecGEV2F64: return NVPTX::FSetGEf64rr_toi64; + case NVPTX::FVecGEV4F32: return NVPTX::FSetGEf32rr_toi32; + case NVPTX::FVecGTV2F32: return NVPTX::FSetGTf32rr_toi32; + case NVPTX::FVecGTV2F64: return NVPTX::FSetGTf64rr_toi64; + case NVPTX::FVecGTV4F32: return NVPTX::FSetGTf32rr_toi32; + case NVPTX::FVecLEV2F32: return NVPTX::FSetLEf32rr_toi32; + case NVPTX::FVecLEV2F64: return NVPTX::FSetLEf64rr_toi64; + case NVPTX::FVecLEV4F32: return NVPTX::FSetLEf32rr_toi32; + case NVPTX::FVecLTV2F32: return NVPTX::FSetLTf32rr_toi32; + case NVPTX::FVecLTV2F64: return NVPTX::FSetLTf64rr_toi64; + case NVPTX::FVecLTV4F32: return NVPTX::FSetLTf32rr_toi32; + case NVPTX::FVecNANV2F32: return NVPTX::FSetNANf32rr_toi32; + case NVPTX::FVecNANV2F64: return NVPTX::FSetNANf64rr_toi64; + case NVPTX::FVecNANV4F32: return NVPTX::FSetNANf32rr_toi32; + case NVPTX::FVecNEV2F32: return NVPTX::FSetNEf32rr_toi32; + case NVPTX::FVecNEV2F64: return NVPTX::FSetNEf64rr_toi64; + case NVPTX::FVecNEV4F32: return NVPTX::FSetNEf32rr_toi32; + case NVPTX::FVecNUMV2F32: return NVPTX::FSetNUMf32rr_toi32; + case NVPTX::FVecNUMV2F64: return NVPTX::FSetNUMf64rr_toi64; + case NVPTX::FVecNUMV4F32: return NVPTX::FSetNUMf32rr_toi32; + case NVPTX::FVecUEQV2F32: return NVPTX::FSetUEQf32rr_toi32; + case NVPTX::FVecUEQV2F64: return NVPTX::FSetUEQf64rr_toi64; + case NVPTX::FVecUEQV4F32: return NVPTX::FSetUEQf32rr_toi32; + case NVPTX::FVecUGEV2F32: return NVPTX::FSetUGEf32rr_toi32; + case NVPTX::FVecUGEV2F64: return NVPTX::FSetUGEf64rr_toi64; + case NVPTX::FVecUGEV4F32: return NVPTX::FSetUGEf32rr_toi32; + case NVPTX::FVecUGTV2F32: return NVPTX::FSetUGTf32rr_toi32; + case NVPTX::FVecUGTV2F64: return NVPTX::FSetUGTf64rr_toi64; + case NVPTX::FVecUGTV4F32: return NVPTX::FSetUGTf32rr_toi32; + case NVPTX::FVecULEV2F32: return NVPTX::FSetULEf32rr_toi32; + case NVPTX::FVecULEV2F64: return NVPTX::FSetULEf64rr_toi64; + case NVPTX::FVecULEV4F32: return NVPTX::FSetULEf32rr_toi32; + case NVPTX::FVecULTV2F32: return NVPTX::FSetULTf32rr_toi32; + case NVPTX::FVecULTV2F64: return NVPTX::FSetULTf64rr_toi64; + case NVPTX::FVecULTV4F32: return NVPTX::FSetULTf32rr_toi32; + case NVPTX::FVecUNEV2F32: return NVPTX::FSetUNEf32rr_toi32; + case NVPTX::FVecUNEV2F64: return NVPTX::FSetUNEf64rr_toi64; + case NVPTX::FVecUNEV4F32: return NVPTX::FSetUNEf32rr_toi32; + case NVPTX::I16MADV2: return NVPTX::MAD16rrr; + case NVPTX::I16MADV4: return NVPTX::MAD16rrr; + case NVPTX::I32MADV2: return NVPTX::MAD32rrr; + case NVPTX::I32MADV4: return NVPTX::MAD32rrr; + case NVPTX::I64MADV2: return NVPTX::MAD64rrr; + case NVPTX::I8MADV2: return NVPTX::MAD8rrr; + case NVPTX::I8MADV4: return NVPTX::MAD8rrr; + case NVPTX::ShiftLV2I16: return NVPTX::SHLi16rr; + case NVPTX::ShiftLV2I32: return NVPTX::SHLi32rr; + case NVPTX::ShiftLV2I64: return NVPTX::SHLi64rr; + case NVPTX::ShiftLV2I8: return NVPTX::SHLi8rr; + case NVPTX::ShiftLV4I16: return NVPTX::SHLi16rr; + case NVPTX::ShiftLV4I32: return NVPTX::SHLi32rr; + case NVPTX::ShiftLV4I8: return NVPTX::SHLi8rr; + case NVPTX::ShiftRAV2I16: return NVPTX::SRAi16rr; + case NVPTX::ShiftRAV2I32: return NVPTX::SRAi32rr; + case NVPTX::ShiftRAV2I64: return NVPTX::SRAi64rr; + case NVPTX::ShiftRAV2I8: return NVPTX::SRAi8rr; + case NVPTX::ShiftRAV4I16: return NVPTX::SRAi16rr; + case NVPTX::ShiftRAV4I32: return NVPTX::SRAi32rr; + case NVPTX::ShiftRAV4I8: return NVPTX::SRAi8rr; + case NVPTX::ShiftRLV2I16: return NVPTX::SRLi16rr; + case NVPTX::ShiftRLV2I32: return NVPTX::SRLi32rr; + case NVPTX::ShiftRLV2I64: return NVPTX::SRLi64rr; + case NVPTX::ShiftRLV2I8: return NVPTX::SRLi8rr; + case NVPTX::ShiftRLV4I16: return NVPTX::SRLi16rr; + case NVPTX::ShiftRLV4I32: return NVPTX::SRLi32rr; + case NVPTX::ShiftRLV4I8: return NVPTX::SRLi8rr; + case NVPTX::SubCCCV2I32: return NVPTX::SUBCCCi32rr; + case NVPTX::SubCCCV4I32: return NVPTX::SUBCCCi32rr; + case NVPTX::SubCCV2I32: return NVPTX::SUBCCi32rr; + case NVPTX::SubCCV4I32: return NVPTX::SUBCCi32rr; + case NVPTX::V2F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; + case NVPTX::V2F32Div_prec: return NVPTX::FDIV32rr_prec; + case NVPTX::V2F32Div_ftz: return NVPTX::FDIV32rr_ftz; + case NVPTX::V2F32Div: return NVPTX::FDIV32rr; + case NVPTX::V2F32_Select: return NVPTX::SELECTf32rr; + case NVPTX::V2F64Div: return NVPTX::FDIV64rr; + case NVPTX::V2F64_Select: return NVPTX::SELECTf64rr; + case NVPTX::V2I16_Select: return NVPTX::SELECTi16rr; + case NVPTX::V2I32_Select: return NVPTX::SELECTi32rr; + case NVPTX::V2I64_Select: return NVPTX::SELECTi64rr; + case NVPTX::V2I8_Select: return NVPTX::SELECTi8rr; + case NVPTX::V2f32Extract: return NVPTX::FMOV32rr; + case NVPTX::V2f32Insert: return NVPTX::FMOV32rr; + case NVPTX::V2f32Mov: return NVPTX::FMOV32rr; + case NVPTX::V2f64Extract: return NVPTX::FMOV64rr; + case NVPTX::V2f64Insert: return NVPTX::FMOV64rr; + case NVPTX::V2f64Mov: return NVPTX::FMOV64rr; + case NVPTX::V2i16Extract: return NVPTX::IMOV16rr; + case NVPTX::V2i16Insert: return NVPTX::IMOV16rr; + case NVPTX::V2i16Mov: return NVPTX::IMOV16rr; + case NVPTX::V2i32Extract: return NVPTX::IMOV32rr; + case NVPTX::V2i32Insert: return NVPTX::IMOV32rr; + case NVPTX::V2i32Mov: return NVPTX::IMOV32rr; + case NVPTX::V2i64Extract: return NVPTX::IMOV64rr; + case NVPTX::V2i64Insert: return NVPTX::IMOV64rr; + case NVPTX::V2i64Mov: return NVPTX::IMOV64rr; + case NVPTX::V2i8Extract: return NVPTX::IMOV8rr; + case NVPTX::V2i8Insert: return NVPTX::IMOV8rr; + case NVPTX::V2i8Mov: return NVPTX::IMOV8rr; + case NVPTX::V4F32Div_prec_ftz: return NVPTX::FDIV32rr_prec_ftz; + case NVPTX::V4F32Div_prec: return NVPTX::FDIV32rr_prec; + case NVPTX::V4F32Div_ftz: return NVPTX::FDIV32rr_ftz; + case NVPTX::V4F32Div: return NVPTX::FDIV32rr; + case NVPTX::V4F32_Select: return NVPTX::SELECTf32rr; + case NVPTX::V4I16_Select: return NVPTX::SELECTi16rr; + case NVPTX::V4I32_Select: return NVPTX::SELECTi32rr; + case NVPTX::V4I8_Select: return NVPTX::SELECTi8rr; + case NVPTX::V4f32Extract: return NVPTX::FMOV32rr; + case NVPTX::V4f32Insert: return NVPTX::FMOV32rr; + case NVPTX::V4f32Mov: return NVPTX::FMOV32rr; + case NVPTX::V4i16Extract: return NVPTX::IMOV16rr; + case NVPTX::V4i16Insert: return NVPTX::IMOV16rr; + case NVPTX::V4i16Mov: return NVPTX::IMOV16rr; + case NVPTX::V4i32Extract: return NVPTX::IMOV32rr; + case NVPTX::V4i32Insert: return NVPTX::IMOV32rr; + case NVPTX::V4i32Mov: return NVPTX::IMOV32rr; + case NVPTX::V4i8Extract: return NVPTX::IMOV8rr; + case NVPTX::V4i8Insert: return NVPTX::IMOV8rr; + case NVPTX::V4i8Mov: return NVPTX::IMOV8rr; + case NVPTX::VAddV2I16: return NVPTX::ADDi16rr; + case NVPTX::VAddV2I32: return NVPTX::ADDi32rr; + case NVPTX::VAddV2I64: return NVPTX::ADDi64rr; + case NVPTX::VAddV2I8: return NVPTX::ADDi8rr; + case NVPTX::VAddV4I16: return NVPTX::ADDi16rr; + case NVPTX::VAddV4I32: return NVPTX::ADDi32rr; + case NVPTX::VAddV4I8: return NVPTX::ADDi8rr; + case NVPTX::VAddfV2F32: return NVPTX::FADDf32rr; + case NVPTX::VAddfV2F32_ftz: return NVPTX::FADDf32rr_ftz; + case NVPTX::VAddfV2F64: return NVPTX::FADDf64rr; + case NVPTX::VAddfV4F32: return NVPTX::FADDf32rr; + case NVPTX::VAddfV4F32_ftz: return NVPTX::FADDf32rr_ftz; + case NVPTX::VAndV2I16: return NVPTX::ANDb16rr; + case NVPTX::VAndV2I32: return NVPTX::ANDb32rr; + case NVPTX::VAndV2I64: return NVPTX::ANDb64rr; + case NVPTX::VAndV2I8: return NVPTX::ANDb8rr; + case NVPTX::VAndV4I16: return NVPTX::ANDb16rr; + case NVPTX::VAndV4I32: return NVPTX::ANDb32rr; + case NVPTX::VAndV4I8: return NVPTX::ANDb8rr; + case NVPTX::VMulfV2F32_ftz: return NVPTX::FMULf32rr_ftz; + case NVPTX::VMulfV2F32: return NVPTX::FMULf32rr; + case NVPTX::VMulfV2F64: return NVPTX::FMULf64rr; + case NVPTX::VMulfV4F32_ftz: return NVPTX::FMULf32rr_ftz; + case NVPTX::VMulfV4F32: return NVPTX::FMULf32rr; + case NVPTX::VMultHSV2I16: return NVPTX::MULTHSi16rr; + case NVPTX::VMultHSV2I32: return NVPTX::MULTHSi32rr; + case NVPTX::VMultHSV2I64: return NVPTX::MULTHSi64rr; + case NVPTX::VMultHSV2I8: return NVPTX::MULTHSi8rr; + case NVPTX::VMultHSV4I16: return NVPTX::MULTHSi16rr; + case NVPTX::VMultHSV4I32: return NVPTX::MULTHSi32rr; + case NVPTX::VMultHSV4I8: return NVPTX::MULTHSi8rr; + case NVPTX::VMultHUV2I16: return NVPTX::MULTHUi16rr; + case NVPTX::VMultHUV2I32: return NVPTX::MULTHUi32rr; + case NVPTX::VMultHUV2I64: return NVPTX::MULTHUi64rr; + case NVPTX::VMultHUV2I8: return NVPTX::MULTHUi8rr; + case NVPTX::VMultHUV4I16: return NVPTX::MULTHUi16rr; + case NVPTX::VMultHUV4I32: return NVPTX::MULTHUi32rr; + case NVPTX::VMultHUV4I8: return NVPTX::MULTHUi8rr; + case NVPTX::VMultV2I16: return NVPTX::MULTi16rr; + case NVPTX::VMultV2I32: return NVPTX::MULTi32rr; + case NVPTX::VMultV2I64: return NVPTX::MULTi64rr; + case NVPTX::VMultV2I8: return NVPTX::MULTi8rr; + case NVPTX::VMultV4I16: return NVPTX::MULTi16rr; + case NVPTX::VMultV4I32: return NVPTX::MULTi32rr; + case NVPTX::VMultV4I8: return NVPTX::MULTi8rr; + case NVPTX::VNegV2I16: return NVPTX::INEG16; + case NVPTX::VNegV2I32: return NVPTX::INEG32; + case NVPTX::VNegV2I64: return NVPTX::INEG64; + case NVPTX::VNegV2I8: return NVPTX::INEG8; + case NVPTX::VNegV4I16: return NVPTX::INEG16; + case NVPTX::VNegV4I32: return NVPTX::INEG32; + case NVPTX::VNegV4I8: return NVPTX::INEG8; + case NVPTX::VNegv2f32: return NVPTX::FNEGf32; + case NVPTX::VNegv2f32_ftz: return NVPTX::FNEGf32_ftz; + case NVPTX::VNegv2f64: return NVPTX::FNEGf64; + case NVPTX::VNegv4f32: return NVPTX::FNEGf32; + case NVPTX::VNegv4f32_ftz: return NVPTX::FNEGf32_ftz; + case NVPTX::VNotV2I16: return NVPTX::NOT16; + case NVPTX::VNotV2I32: return NVPTX::NOT32; + case NVPTX::VNotV2I64: return NVPTX::NOT64; + case NVPTX::VNotV2I8: return NVPTX::NOT8; + case NVPTX::VNotV4I16: return NVPTX::NOT16; + case NVPTX::VNotV4I32: return NVPTX::NOT32; + case NVPTX::VNotV4I8: return NVPTX::NOT8; + case NVPTX::VOrV2I16: return NVPTX::ORb16rr; + case NVPTX::VOrV2I32: return NVPTX::ORb32rr; + case NVPTX::VOrV2I64: return NVPTX::ORb64rr; + case NVPTX::VOrV2I8: return NVPTX::ORb8rr; + case NVPTX::VOrV4I16: return NVPTX::ORb16rr; + case NVPTX::VOrV4I32: return NVPTX::ORb32rr; + case NVPTX::VOrV4I8: return NVPTX::ORb8rr; + case NVPTX::VSDivV2I16: return NVPTX::SDIVi16rr; + case NVPTX::VSDivV2I32: return NVPTX::SDIVi32rr; + case NVPTX::VSDivV2I64: return NVPTX::SDIVi64rr; + case NVPTX::VSDivV2I8: return NVPTX::SDIVi8rr; + case NVPTX::VSDivV4I16: return NVPTX::SDIVi16rr; + case NVPTX::VSDivV4I32: return NVPTX::SDIVi32rr; + case NVPTX::VSDivV4I8: return NVPTX::SDIVi8rr; + case NVPTX::VSRemV2I16: return NVPTX::SREMi16rr; + case NVPTX::VSRemV2I32: return NVPTX::SREMi32rr; + case NVPTX::VSRemV2I64: return NVPTX::SREMi64rr; + case NVPTX::VSRemV2I8: return NVPTX::SREMi8rr; + case NVPTX::VSRemV4I16: return NVPTX::SREMi16rr; + case NVPTX::VSRemV4I32: return NVPTX::SREMi32rr; + case NVPTX::VSRemV4I8: return NVPTX::SREMi8rr; + case NVPTX::VSubV2I16: return NVPTX::SUBi16rr; + case NVPTX::VSubV2I32: return NVPTX::SUBi32rr; + case NVPTX::VSubV2I64: return NVPTX::SUBi64rr; + case NVPTX::VSubV2I8: return NVPTX::SUBi8rr; + case NVPTX::VSubV4I16: return NVPTX::SUBi16rr; + case NVPTX::VSubV4I32: return NVPTX::SUBi32rr; + case NVPTX::VSubV4I8: return NVPTX::SUBi8rr; + case NVPTX::VSubfV2F32_ftz: return NVPTX::FSUBf32rr_ftz; + case NVPTX::VSubfV2F32: return NVPTX::FSUBf32rr; + case NVPTX::VSubfV2F64: return NVPTX::FSUBf64rr; + case NVPTX::VSubfV4F32_ftz: return NVPTX::FSUBf32rr_ftz; + case NVPTX::VSubfV4F32: return NVPTX::FSUBf32rr; + case NVPTX::VUDivV2I16: return NVPTX::UDIVi16rr; + case NVPTX::VUDivV2I32: return NVPTX::UDIVi32rr; + case NVPTX::VUDivV2I64: return NVPTX::UDIVi64rr; + case NVPTX::VUDivV2I8: return NVPTX::UDIVi8rr; + case NVPTX::VUDivV4I16: return NVPTX::UDIVi16rr; + case NVPTX::VUDivV4I32: return NVPTX::UDIVi32rr; + case NVPTX::VUDivV4I8: return NVPTX::UDIVi8rr; + case NVPTX::VURemV2I16: return NVPTX::UREMi16rr; + case NVPTX::VURemV2I32: return NVPTX::UREMi32rr; + case NVPTX::VURemV2I64: return NVPTX::UREMi64rr; + case NVPTX::VURemV2I8: return NVPTX::UREMi8rr; + case NVPTX::VURemV4I16: return NVPTX::UREMi16rr; + case NVPTX::VURemV4I32: return NVPTX::UREMi32rr; + case NVPTX::VURemV4I8: return NVPTX::UREMi8rr; + case NVPTX::VXorV2I16: return NVPTX::XORb16rr; + case NVPTX::VXorV2I32: return NVPTX::XORb32rr; + case NVPTX::VXorV2I64: return NVPTX::XORb64rr; + case NVPTX::VXorV2I8: return NVPTX::XORb8rr; + case NVPTX::VXorV4I16: return NVPTX::XORb16rr; + case NVPTX::VXorV4I32: return NVPTX::XORb32rr; + case NVPTX::VXorV4I8: return NVPTX::XORb8rr; + case NVPTX::VecSEQV2I16: return NVPTX::ISetSEQi16rr_toi16; + case NVPTX::VecSEQV2I32: return NVPTX::ISetSEQi32rr_toi32; + case NVPTX::VecSEQV2I64: return NVPTX::ISetSEQi64rr_toi64; + case NVPTX::VecSEQV2I8: return NVPTX::ISetSEQi8rr_toi8; + case NVPTX::VecSEQV4I16: return NVPTX::ISetSEQi16rr_toi16; + case NVPTX::VecSEQV4I32: return NVPTX::ISetSEQi32rr_toi32; + case NVPTX::VecSEQV4I8: return NVPTX::ISetSEQi8rr_toi8; + case NVPTX::VecSGEV2I16: return NVPTX::ISetSGEi16rr_toi16; + case NVPTX::VecSGEV2I32: return NVPTX::ISetSGEi32rr_toi32; + case NVPTX::VecSGEV2I64: return NVPTX::ISetSGEi64rr_toi64; + case NVPTX::VecSGEV2I8: return NVPTX::ISetSGEi8rr_toi8; + case NVPTX::VecSGEV4I16: return NVPTX::ISetSGEi16rr_toi16; + case NVPTX::VecSGEV4I32: return NVPTX::ISetSGEi32rr_toi32; + case NVPTX::VecSGEV4I8: return NVPTX::ISetSGEi8rr_toi8; + case NVPTX::VecSGTV2I16: return NVPTX::ISetSGTi16rr_toi16; + case NVPTX::VecSGTV2I32: return NVPTX::ISetSGTi32rr_toi32; + case NVPTX::VecSGTV2I64: return NVPTX::ISetSGTi64rr_toi64; + case NVPTX::VecSGTV2I8: return NVPTX::ISetSGTi8rr_toi8; + case NVPTX::VecSGTV4I16: return NVPTX::ISetSGTi16rr_toi16; + case NVPTX::VecSGTV4I32: return NVPTX::ISetSGTi32rr_toi32; + case NVPTX::VecSGTV4I8: return NVPTX::ISetSGTi8rr_toi8; + case NVPTX::VecSLEV2I16: return NVPTX::ISetSLEi16rr_toi16; + case NVPTX::VecSLEV2I32: return NVPTX::ISetSLEi32rr_toi32; + case NVPTX::VecSLEV2I64: return NVPTX::ISetSLEi64rr_toi64; + case NVPTX::VecSLEV2I8: return NVPTX::ISetSLEi8rr_toi8; + case NVPTX::VecSLEV4I16: return NVPTX::ISetSLEi16rr_toi16; + case NVPTX::VecSLEV4I32: return NVPTX::ISetSLEi32rr_toi32; + case NVPTX::VecSLEV4I8: return NVPTX::ISetSLEi8rr_toi8; + case NVPTX::VecSLTV2I16: return NVPTX::ISetSLTi16rr_toi16; + case NVPTX::VecSLTV2I32: return NVPTX::ISetSLTi32rr_toi32; + case NVPTX::VecSLTV2I64: return NVPTX::ISetSLTi64rr_toi64; + case NVPTX::VecSLTV2I8: return NVPTX::ISetSLTi8rr_toi8; + case NVPTX::VecSLTV4I16: return NVPTX::ISetSLTi16rr_toi16; + case NVPTX::VecSLTV4I32: return NVPTX::ISetSLTi32rr_toi32; + case NVPTX::VecSLTV4I8: return NVPTX::ISetSLTi8rr_toi8; + case NVPTX::VecSNEV2I16: return NVPTX::ISetSNEi16rr_toi16; + case NVPTX::VecSNEV2I32: return NVPTX::ISetSNEi32rr_toi32; + case NVPTX::VecSNEV2I64: return NVPTX::ISetSNEi64rr_toi64; + case NVPTX::VecSNEV2I8: return NVPTX::ISetSNEi8rr_toi8; + case NVPTX::VecSNEV4I16: return NVPTX::ISetSNEi16rr_toi16; + case NVPTX::VecSNEV4I32: return NVPTX::ISetSNEi32rr_toi32; + case NVPTX::VecSNEV4I8: return NVPTX::ISetSNEi8rr_toi8; + case NVPTX::VecShuffle_v2f32: return NVPTX::FMOV32rr; + case NVPTX::VecShuffle_v2f64: return NVPTX::FMOV64rr; + case NVPTX::VecShuffle_v2i16: return NVPTX::IMOV16rr; + case NVPTX::VecShuffle_v2i32: return NVPTX::IMOV32rr; + case NVPTX::VecShuffle_v2i64: return NVPTX::IMOV64rr; + case NVPTX::VecShuffle_v2i8: return NVPTX::IMOV8rr; + case NVPTX::VecShuffle_v4f32: return NVPTX::FMOV32rr; + case NVPTX::VecShuffle_v4i16: return NVPTX::IMOV16rr; + case NVPTX::VecShuffle_v4i32: return NVPTX::IMOV32rr; + case NVPTX::VecShuffle_v4i8: return NVPTX::IMOV8rr; + case NVPTX::VecUEQV2I16: return NVPTX::ISetUEQi16rr_toi16; + case NVPTX::VecUEQV2I32: return NVPTX::ISetUEQi32rr_toi32; + case NVPTX::VecUEQV2I64: return NVPTX::ISetUEQi64rr_toi64; + case NVPTX::VecUEQV2I8: return NVPTX::ISetUEQi8rr_toi8; + case NVPTX::VecUEQV4I16: return NVPTX::ISetUEQi16rr_toi16; + case NVPTX::VecUEQV4I32: return NVPTX::ISetUEQi32rr_toi32; + case NVPTX::VecUEQV4I8: return NVPTX::ISetUEQi8rr_toi8; + case NVPTX::VecUGEV2I16: return NVPTX::ISetUGEi16rr_toi16; + case NVPTX::VecUGEV2I32: return NVPTX::ISetUGEi32rr_toi32; + case NVPTX::VecUGEV2I64: return NVPTX::ISetUGEi64rr_toi64; + case NVPTX::VecUGEV2I8: return NVPTX::ISetUGEi8rr_toi8; + case NVPTX::VecUGEV4I16: return NVPTX::ISetUGEi16rr_toi16; + case NVPTX::VecUGEV4I32: return NVPTX::ISetUGEi32rr_toi32; + case NVPTX::VecUGEV4I8: return NVPTX::ISetUGEi8rr_toi8; + case NVPTX::VecUGTV2I16: return NVPTX::ISetUGTi16rr_toi16; + case NVPTX::VecUGTV2I32: return NVPTX::ISetUGTi32rr_toi32; + case NVPTX::VecUGTV2I64: return NVPTX::ISetUGTi64rr_toi64; + case NVPTX::VecUGTV2I8: return NVPTX::ISetUGTi8rr_toi8; + case NVPTX::VecUGTV4I16: return NVPTX::ISetUGTi16rr_toi16; + case NVPTX::VecUGTV4I32: return NVPTX::ISetUGTi32rr_toi32; + case NVPTX::VecUGTV4I8: return NVPTX::ISetUGTi8rr_toi8; + case NVPTX::VecULEV2I16: return NVPTX::ISetULEi16rr_toi16; + case NVPTX::VecULEV2I32: return NVPTX::ISetULEi32rr_toi32; + case NVPTX::VecULEV2I64: return NVPTX::ISetULEi64rr_toi64; + case NVPTX::VecULEV2I8: return NVPTX::ISetULEi8rr_toi8; + case NVPTX::VecULEV4I16: return NVPTX::ISetULEi16rr_toi16; + case NVPTX::VecULEV4I32: return NVPTX::ISetULEi32rr_toi32; + case NVPTX::VecULEV4I8: return NVPTX::ISetULEi8rr_toi8; + case NVPTX::VecULTV2I16: return NVPTX::ISetULTi16rr_toi16; + case NVPTX::VecULTV2I32: return NVPTX::ISetULTi32rr_toi32; + case NVPTX::VecULTV2I64: return NVPTX::ISetULTi64rr_toi64; + case NVPTX::VecULTV2I8: return NVPTX::ISetULTi8rr_toi8; + case NVPTX::VecULTV4I16: return NVPTX::ISetULTi16rr_toi16; + case NVPTX::VecULTV4I32: return NVPTX::ISetULTi32rr_toi32; + case NVPTX::VecULTV4I8: return NVPTX::ISetULTi8rr_toi8; + case NVPTX::VecUNEV2I16: return NVPTX::ISetUNEi16rr_toi16; + case NVPTX::VecUNEV2I32: return NVPTX::ISetUNEi32rr_toi32; + case NVPTX::VecUNEV2I64: return NVPTX::ISetUNEi64rr_toi64; + case NVPTX::VecUNEV2I8: return NVPTX::ISetUNEi8rr_toi8; + case NVPTX::VecUNEV4I16: return NVPTX::ISetUNEi16rr_toi16; + case NVPTX::VecUNEV4I32: return NVPTX::ISetUNEi32rr_toi32; + case NVPTX::VecUNEV4I8: return NVPTX::ISetUNEi8rr_toi8; + case NVPTX::INT_PTX_LDU_G_v2i8_32: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i8_32: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i16_32: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i16_32: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i32_32: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4i32_32: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2f32_32: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v4f32_32: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i64_32: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2f64_32: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + case NVPTX::INT_PTX_LDU_G_v2i8_64: return NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i8_64: return NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i16_64: return NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i16_64: return NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i32_64: return NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4i32_64: return NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2f32_64: return NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v4f32_64: return NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2i64_64: return NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + case NVPTX::INT_PTX_LDU_G_v2f64_64: return NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + + case NVPTX::LoadParamV4I32: return NVPTX::LoadParamScalar4I32; + case NVPTX::LoadParamV4I16: return NVPTX::LoadParamScalar4I16; + case NVPTX::LoadParamV4I8: return NVPTX::LoadParamScalar4I8; + case NVPTX::LoadParamV2I64: return NVPTX::LoadParamScalar2I64; + case NVPTX::LoadParamV2I32: return NVPTX::LoadParamScalar2I32; + case NVPTX::LoadParamV2I16: return NVPTX::LoadParamScalar2I16; + case NVPTX::LoadParamV2I8: return NVPTX::LoadParamScalar2I8; + case NVPTX::LoadParamV4F32: return NVPTX::LoadParamScalar4F32; + case NVPTX::LoadParamV2F32: return NVPTX::LoadParamScalar2F32; + case NVPTX::LoadParamV2F64: return NVPTX::LoadParamScalar2F64; + case NVPTX::StoreParamV4I32: return NVPTX::StoreParamScalar4I32; + case NVPTX::StoreParamV4I16: return NVPTX::StoreParamScalar4I16; + case NVPTX::StoreParamV4I8: return NVPTX::StoreParamScalar4I8; + case NVPTX::StoreParamV2I64: return NVPTX::StoreParamScalar2I64; + case NVPTX::StoreParamV2I32: return NVPTX::StoreParamScalar2I32; + case NVPTX::StoreParamV2I16: return NVPTX::StoreParamScalar2I16; + case NVPTX::StoreParamV2I8: return NVPTX::StoreParamScalar2I8; + case NVPTX::StoreParamV4F32: return NVPTX::StoreParamScalar4F32; + case NVPTX::StoreParamV2F32: return NVPTX::StoreParamScalar2F32; + case NVPTX::StoreParamV2F64: return NVPTX::StoreParamScalar2F64; + case NVPTX::StoreRetvalV4I32: return NVPTX::StoreRetvalScalar4I32; + case NVPTX::StoreRetvalV4I16: return NVPTX::StoreRetvalScalar4I16; + case NVPTX::StoreRetvalV4I8: return NVPTX::StoreRetvalScalar4I8; + case NVPTX::StoreRetvalV2I64: return NVPTX::StoreRetvalScalar2I64; + case NVPTX::StoreRetvalV2I32: return NVPTX::StoreRetvalScalar2I32; + case NVPTX::StoreRetvalV2I16: return NVPTX::StoreRetvalScalar2I16; + case NVPTX::StoreRetvalV2I8: return NVPTX::StoreRetvalScalar2I8; + case NVPTX::StoreRetvalV4F32: return NVPTX::StoreRetvalScalar4F32; + case NVPTX::StoreRetvalV2F32: return NVPTX::StoreRetvalScalar2F32; + case NVPTX::StoreRetvalV2F64: return NVPTX::StoreRetvalScalar2F64; + case NVPTX::VecI32toV4I8: return NVPTX::I32toV4I8; + case NVPTX::VecI64toV4I16: return NVPTX::I64toV4I16; + case NVPTX::VecI16toV2I8: return NVPTX::I16toV2I8; + case NVPTX::VecI32toV2I16: return NVPTX::I32toV2I16; + case NVPTX::VecI64toV2I32: return NVPTX::I64toV2I32; + case NVPTX::VecF64toV2F32: return NVPTX::F64toV2F32; + + case NVPTX::LD_v2i8_avar: return NVPTX::LDV_i8_v2_avar; + case NVPTX::LD_v2i8_areg: return NVPTX::LDV_i8_v2_areg; + case NVPTX::LD_v2i8_ari: return NVPTX::LDV_i8_v2_ari; + case NVPTX::LD_v2i8_asi: return NVPTX::LDV_i8_v2_asi; + case NVPTX::LD_v4i8_avar: return NVPTX::LDV_i8_v4_avar; + case NVPTX::LD_v4i8_areg: return NVPTX::LDV_i8_v4_areg; + case NVPTX::LD_v4i8_ari: return NVPTX::LDV_i8_v4_ari; + case NVPTX::LD_v4i8_asi: return NVPTX::LDV_i8_v4_asi; + + case NVPTX::LD_v2i16_avar: return NVPTX::LDV_i16_v2_avar; + case NVPTX::LD_v2i16_areg: return NVPTX::LDV_i16_v2_areg; + case NVPTX::LD_v2i16_ari: return NVPTX::LDV_i16_v2_ari; + case NVPTX::LD_v2i16_asi: return NVPTX::LDV_i16_v2_asi; + case NVPTX::LD_v4i16_avar: return NVPTX::LDV_i16_v4_avar; + case NVPTX::LD_v4i16_areg: return NVPTX::LDV_i16_v4_areg; + case NVPTX::LD_v4i16_ari: return NVPTX::LDV_i16_v4_ari; + case NVPTX::LD_v4i16_asi: return NVPTX::LDV_i16_v4_asi; + + case NVPTX::LD_v2i32_avar: return NVPTX::LDV_i32_v2_avar; + case NVPTX::LD_v2i32_areg: return NVPTX::LDV_i32_v2_areg; + case NVPTX::LD_v2i32_ari: return NVPTX::LDV_i32_v2_ari; + case NVPTX::LD_v2i32_asi: return NVPTX::LDV_i32_v2_asi; + case NVPTX::LD_v4i32_avar: return NVPTX::LDV_i32_v4_avar; + case NVPTX::LD_v4i32_areg: return NVPTX::LDV_i32_v4_areg; + case NVPTX::LD_v4i32_ari: return NVPTX::LDV_i32_v4_ari; + case NVPTX::LD_v4i32_asi: return NVPTX::LDV_i32_v4_asi; + + case NVPTX::LD_v2f32_avar: return NVPTX::LDV_f32_v2_avar; + case NVPTX::LD_v2f32_areg: return NVPTX::LDV_f32_v2_areg; + case NVPTX::LD_v2f32_ari: return NVPTX::LDV_f32_v2_ari; + case NVPTX::LD_v2f32_asi: return NVPTX::LDV_f32_v2_asi; + case NVPTX::LD_v4f32_avar: return NVPTX::LDV_f32_v4_avar; + case NVPTX::LD_v4f32_areg: return NVPTX::LDV_f32_v4_areg; + case NVPTX::LD_v4f32_ari: return NVPTX::LDV_f32_v4_ari; + case NVPTX::LD_v4f32_asi: return NVPTX::LDV_f32_v4_asi; + + case NVPTX::LD_v2i64_avar: return NVPTX::LDV_i64_v2_avar; + case NVPTX::LD_v2i64_areg: return NVPTX::LDV_i64_v2_areg; + case NVPTX::LD_v2i64_ari: return NVPTX::LDV_i64_v2_ari; + case NVPTX::LD_v2i64_asi: return NVPTX::LDV_i64_v2_asi; + case NVPTX::LD_v2f64_avar: return NVPTX::LDV_f64_v2_avar; + case NVPTX::LD_v2f64_areg: return NVPTX::LDV_f64_v2_areg; + case NVPTX::LD_v2f64_ari: return NVPTX::LDV_f64_v2_ari; + case NVPTX::LD_v2f64_asi: return NVPTX::LDV_f64_v2_asi; + + case NVPTX::ST_v2i8_avar: return NVPTX::STV_i8_v2_avar; + case NVPTX::ST_v2i8_areg: return NVPTX::STV_i8_v2_areg; + case NVPTX::ST_v2i8_ari: return NVPTX::STV_i8_v2_ari; + case NVPTX::ST_v2i8_asi: return NVPTX::STV_i8_v2_asi; + case NVPTX::ST_v4i8_avar: return NVPTX::STV_i8_v4_avar; + case NVPTX::ST_v4i8_areg: return NVPTX::STV_i8_v4_areg; + case NVPTX::ST_v4i8_ari: return NVPTX::STV_i8_v4_ari; + case NVPTX::ST_v4i8_asi: return NVPTX::STV_i8_v4_asi; + + case NVPTX::ST_v2i16_avar: return NVPTX::STV_i16_v2_avar; + case NVPTX::ST_v2i16_areg: return NVPTX::STV_i16_v2_areg; + case NVPTX::ST_v2i16_ari: return NVPTX::STV_i16_v2_ari; + case NVPTX::ST_v2i16_asi: return NVPTX::STV_i16_v2_asi; + case NVPTX::ST_v4i16_avar: return NVPTX::STV_i16_v4_avar; + case NVPTX::ST_v4i16_areg: return NVPTX::STV_i16_v4_areg; + case NVPTX::ST_v4i16_ari: return NVPTX::STV_i16_v4_ari; + case NVPTX::ST_v4i16_asi: return NVPTX::STV_i16_v4_asi; + + case NVPTX::ST_v2i32_avar: return NVPTX::STV_i32_v2_avar; + case NVPTX::ST_v2i32_areg: return NVPTX::STV_i32_v2_areg; + case NVPTX::ST_v2i32_ari: return NVPTX::STV_i32_v2_ari; + case NVPTX::ST_v2i32_asi: return NVPTX::STV_i32_v2_asi; + case NVPTX::ST_v4i32_avar: return NVPTX::STV_i32_v4_avar; + case NVPTX::ST_v4i32_areg: return NVPTX::STV_i32_v4_areg; + case NVPTX::ST_v4i32_ari: return NVPTX::STV_i32_v4_ari; + case NVPTX::ST_v4i32_asi: return NVPTX::STV_i32_v4_asi; + + case NVPTX::ST_v2f32_avar: return NVPTX::STV_f32_v2_avar; + case NVPTX::ST_v2f32_areg: return NVPTX::STV_f32_v2_areg; + case NVPTX::ST_v2f32_ari: return NVPTX::STV_f32_v2_ari; + case NVPTX::ST_v2f32_asi: return NVPTX::STV_f32_v2_asi; + case NVPTX::ST_v4f32_avar: return NVPTX::STV_f32_v4_avar; + case NVPTX::ST_v4f32_areg: return NVPTX::STV_f32_v4_areg; + case NVPTX::ST_v4f32_ari: return NVPTX::STV_f32_v4_ari; + case NVPTX::ST_v4f32_asi: return NVPTX::STV_f32_v4_asi; + + case NVPTX::ST_v2i64_avar: return NVPTX::STV_i64_v2_avar; + case NVPTX::ST_v2i64_areg: return NVPTX::STV_i64_v2_areg; + case NVPTX::ST_v2i64_ari: return NVPTX::STV_i64_v2_ari; + case NVPTX::ST_v2i64_asi: return NVPTX::STV_i64_v2_asi; + case NVPTX::ST_v2f64_avar: return NVPTX::STV_f64_v2_avar; + case NVPTX::ST_v2f64_areg: return NVPTX::STV_f64_v2_areg; + case NVPTX::ST_v2f64_ari: return NVPTX::STV_f64_v2_ari; + case NVPTX::ST_v2f64_asi: return NVPTX::STV_f64_v2_asi; + } + return 0; +} diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h new file mode 100644 index 0000000..a7347ef --- /dev/null +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -0,0 +1,125 @@ +#ifndef __CL_COMMON_DEFINES_H__ +#define __CL_COMMON_DEFINES_H__ +// This file includes defines that are common to both kernel code and +// the NVPTX back-end. + +// +// Common defines for Image intrinsics +// Channel order +enum { + CLK_R = 0x10B0, + CLK_A = 0x10B1, + CLK_RG = 0x10B2, + CLK_RA = 0x10B3, + CLK_RGB = 0x10B4, + CLK_RGBA = 0x10B5, + CLK_BGRA = 0x10B6, + CLK_ARGB = 0x10B7, + +#if (__NV_CL_C_VERSION == __NV_CL_C_VERSION_1_0) + CLK_xRGB = 0x10B7, +#endif + + CLK_INTENSITY = 0x10B8, + CLK_LUMINANCE = 0x10B9 + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + , + CLK_Rx = 0x10BA, + CLK_RGx = 0x10BB, + CLK_RGBx = 0x10BC +#endif +}; + + +typedef enum clk_channel_type { + // valid formats for float return types + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + CLK_UNORM_SHORT_565 = 0x10D4, + CLK_UNORM_SHORT_555 = 0x10D5, + CLK_UNORM_INT_101010 = 0x10D6, +#endif + + // valid only for integer return types + CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT16 = 0x10D8, + CLK_SIGNED_INT32 = 0x10D9, + CLK_UNSIGNED_INT8 = 0x10DA, + CLK_UNSIGNED_INT16 = 0x10DB, + CLK_UNSIGNED_INT32 = 0x10DC, + + // CI SPI for CPU + __CLK_UNORM_INT8888 , // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + + __CLK_VALID_IMAGE_TYPE_COUNT, + __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 +}clk_channel_type; + +typedef enum clk_sampler_type { + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + +#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, +#endif + __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | + CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | + CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | + CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> + (__CLK_ANISOTROPIC_RATIO_BITS-1) +} clk_sampler_type; + +// Memory synchronization +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +#endif // __CL_COMMON_DEFINES_H__ diff --git a/lib/Target/NVPTX/gen-register-defs.py b/lib/Target/NVPTX/gen-register-defs.py new file mode 100644 index 0000000..ed06668 --- /dev/null +++ b/lib/Target/NVPTX/gen-register-defs.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python + +num_regs = 396 + +outFile = open('NVPTXRegisterInfo.td', 'w') + +outFile.write(''' +//===-- NVPTXRegisterInfo.td - NVPTX Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class NVPTXReg : Register { + let Namespace = "NVPTX"; +} + +class NVPTXRegClass regTypes, int alignment, dag regList> + : RegisterClass <"NVPTX", regTypes, alignment, regList>; + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +// Special Registers used as stack pointer +def VRFrame : NVPTXReg<"%SP">; +def VRFrameLocal : NVPTXReg<"%SPL">; + +// Special Registers used as the stack +def VRDepot : NVPTXReg<"%Depot">; +''') + +# Predicates +outFile.write(''' +//===--- Predicate --------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def P%d : NVPTXReg<"%%p%d">;\n' % (i, i)) + +# Int8 +outFile.write(''' +//===--- 8-bit ------------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RC%d : NVPTXReg<"%%rc%d">;\n' % (i, i)) + +# Int16 +outFile.write(''' +//===--- 16-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RS%d : NVPTXReg<"%%rs%d">;\n' % (i, i)) + +# Int32 +outFile.write(''' +//===--- 32-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def R%d : NVPTXReg<"%%r%d">;\n' % (i, i)) + +# Int64 +outFile.write(''' +//===--- 64-bit -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def RL%d : NVPTXReg<"%%rl%d">;\n' % (i, i)) + +# F32 +outFile.write(''' +//===--- 32-bit float -----------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def F%d : NVPTXReg<"%%f%d">;\n' % (i, i)) + +# F64 +outFile.write(''' +//===--- 64-bit float -----------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def FL%d : NVPTXReg<"%%fl%d">;\n' % (i, i)) + +# Vector registers +outFile.write(''' +//===--- Vector -----------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def v2b8_%d : NVPTXReg<"%%v2b8_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b16_%d : NVPTXReg<"%%v2b16_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b32_%d : NVPTXReg<"%%v2b32_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v2b64_%d : NVPTXReg<"%%v2b64_%d">;\n' % (i, i)) + +for i in range(0, num_regs): + outFile.write('def v4b8_%d : NVPTXReg<"%%v4b8_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v4b16_%d : NVPTXReg<"%%v4b16_%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def v4b32_%d : NVPTXReg<"%%v4b32_%d">;\n' % (i, i)) + +# Argument registers +outFile.write(''' +//===--- Arguments --------------------------------------------------------===// +''') +for i in range(0, num_regs): + outFile.write('def ia%d : NVPTXReg<"%%ia%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def la%d : NVPTXReg<"%%la%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def fa%d : NVPTXReg<"%%fa%d">;\n' % (i, i)) +for i in range(0, num_regs): + outFile.write('def da%d : NVPTXReg<"%%da%d">;\n' % (i, i)) + +outFile.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + +outFile.write('def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int8Regs : NVPTXRegClass<[i8], 8, (add (sequence "RC%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write('def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write('def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Int64ArgRegs : NVPTXRegClass<[i64], 64, (add (sequence "la%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%%u", 0, %d))>;\n' % (num_regs-1)) +outFile.write('def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%%u", 0, %d))>;\n' % (num_regs-1)) + +outFile.write(''' +// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; +''') + +outFile.write(''' +class NVPTXVecRegClass regTypes, int alignment, dag regList, + NVPTXRegClass sClass, + int e, + string n> + : NVPTXRegClass +{ + NVPTXRegClass scalarClass=sClass; + int elems=e; + string name=n; +} +''') + + +outFile.write('def V2F32Regs\n : NVPTXVecRegClass<[v2f32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Float32Regs, 2, ".v2.f32">;\n' % (num_regs-1)) +outFile.write('def V4F32Regs\n : NVPTXVecRegClass<[v4f32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Float32Regs, 4, ".v4.f32">;\n' % (num_regs-1)) + +outFile.write('def V2I32Regs\n : NVPTXVecRegClass<[v2i32], 64, (add (sequence "v2b32_%%u", 0, %d)),\n Int32Regs, 2, ".v2.u32">;\n' % (num_regs-1)) +outFile.write('def V4I32Regs\n : NVPTXVecRegClass<[v4i32], 128, (add (sequence "v4b32_%%u", 0, %d)),\n Int32Regs, 4, ".v4.u32">;\n' % (num_regs-1)) + +outFile.write('def V2F64Regs\n : NVPTXVecRegClass<[v2f64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Float64Regs, 2, ".v2.f64">;\n' % (num_regs-1)) +outFile.write('def V2I64Regs\n : NVPTXVecRegClass<[v2i64], 128, (add (sequence "v2b64_%%u", 0, %d)),\n Int64Regs, 2, ".v2.u64">;\n' % (num_regs-1)) + +outFile.write('def V2I16Regs\n : NVPTXVecRegClass<[v2i16], 32, (add (sequence "v2b16_%%u", 0, %d)),\n Int16Regs, 2, ".v2.u16">;\n' % (num_regs-1)) +outFile.write('def V4I16Regs\n : NVPTXVecRegClass<[v4i16], 64, (add (sequence "v4b16_%%u", 0, %d)),\n Int16Regs, 4, ".v4.u16">;\n' % (num_regs-1)) + +outFile.write('def V2I8Regs\n : NVPTXVecRegClass<[v2i8], 16, (add (sequence "v2b8_%%u", 0, %d)),\n Int8Regs, 2, ".v2.u8">;\n' % (num_regs-1)) +outFile.write('def V4I8Regs\n : NVPTXVecRegClass<[v4i8], 32, (add (sequence "v4b8_%%u", 0, %d)),\n Int8Regs, 4, ".v4.u8">;\n' % (num_regs-1)) + +outFile.close() + + +outFile = open('NVPTXNumRegisters.h', 'w') +outFile.write(''' +//===-- NVPTXNumRegisters.h - PTX Register Info ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef NVPTX_NUM_REGISTERS_H +#define NVPTX_NUM_REGISTERS_H + +namespace llvm { + +const unsigned NVPTXNumRegisters = %d; + +} + +#endif +''' % num_regs) + +outFile.close() diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt deleted file mode 100644 index a3be342..0000000 --- a/lib/Target/PTX/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PTX.td) - -tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(PTXCommonTableGen) - -add_llvm_target(PTXCodeGen - PTXAsmPrinter.cpp - PTXISelDAGToDAG.cpp - PTXISelLowering.cpp - PTXInstrInfo.cpp - PTXFPRoundingModePass.cpp - PTXFrameLowering.cpp - PTXMCAsmStreamer.cpp - PTXMCInstLower.cpp - PTXMFInfoExtract.cpp - PTXMachineFunctionInfo.cpp - PTXParamManager.cpp - PTXRegAlloc.cpp - PTXRegisterInfo.cpp - PTXSelectionDAGInfo.cpp - PTXSubtarget.cpp - PTXTargetMachine.cpp - ) - -add_subdirectory(TargetInfo) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) - diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt deleted file mode 100644 index b252893..0000000 --- a/lib/Target/PTX/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXAsmPrinter - PTXInstPrinter.cpp - ) - -add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen) - diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt deleted file mode 100644 index af5d200..0000000 --- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXAsmPrinter -parent = PTX -required_libraries = MC Support -add_to_library_groups = PTX diff --git a/lib/Target/PTX/InstPrinter/Makefile b/lib/Target/PTX/InstPrinter/Makefile deleted file mode 100644 index 0ccfe44..0000000 --- a/lib/Target/PTX/InstPrinter/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXAsmPrinter - -# Hack: we need to include 'main' ptx target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp deleted file mode 100644 index 1830213..0000000 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ /dev/null @@ -1,249 +0,0 @@ -//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints a PTX MCInst to a .ptx file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "PTXInstPrinter.h" -#include "MCTargetDesc/PTXBaseInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#include "PTXGenAsmWriter.inc" - -PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - // Decode the register number into type and offset - unsigned RegSpace = RegNo & 0x7; - unsigned RegType = (RegNo >> 3) & 0x7; - unsigned RegOffset = RegNo >> 6; - - // Print the register - OS << "%"; - - switch (RegSpace) { - default: - llvm_unreachable("Unknown register space!"); - case PTXRegisterSpace::Reg: - switch (RegType) { - default: - llvm_unreachable("Unknown register type!"); - case PTXRegisterType::Pred: - OS << "p"; - break; - case PTXRegisterType::B16: - OS << "rh"; - break; - case PTXRegisterType::B32: - OS << "r"; - break; - case PTXRegisterType::B64: - OS << "rd"; - break; - case PTXRegisterType::F32: - OS << "f"; - break; - case PTXRegisterType::F64: - OS << "fd"; - break; - } - break; - case PTXRegisterSpace::Return: - OS << "ret"; - break; - case PTXRegisterSpace::Argument: - OS << "arg"; - break; - } - - OS << RegOffset; -} - -void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - printPredicate(MI, O); - switch (MI->getOpcode()) { - default: - printInstruction(MI, O); - break; - case PTX::CALL: - printCall(MI, O); - } - O << ";"; - printAnnotation(O, Annot); -} - -void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { - // The last two operands are the predicate operands - int RegIndex; - int OpIndex; - - if (MI->getOpcode() == PTX::CALL) { - RegIndex = 0; - OpIndex = 1; - } else { - RegIndex = MI->getNumOperands()-2; - OpIndex = MI->getNumOperands()-1; - } - - int PredOp = MI->getOperand(OpIndex).getImm(); - if (PredOp == PTXPredicate::None) - return; - - if (PredOp == PTXPredicate::Negate) - O << '!'; - else - O << '@'; - - printOperand(MI, RegIndex, O); -} - -void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { - O << "\tcall.uni\t"; - // The first two operands are the predicate slot - unsigned Index = 2; - unsigned NumRets = MI->getOperand(Index++).getImm(); - - if (NumRets > 0) { - O << "("; - printOperand(MI, Index++, O); - for (unsigned i = 1; i < NumRets; ++i) { - O << ", "; - printOperand(MI, Index++, O); - } - O << "), "; - } - - const MCExpr* Expr = MI->getOperand(Index++).getExpr(); - unsigned NumArgs = MI->getOperand(Index++).getImm(); - - // if the function call is to printf or puts, change to vprintf - if (const MCSymbolRefExpr *SymRefExpr = dyn_cast(Expr)) { - const MCSymbol &Sym = SymRefExpr->getSymbol(); - if (Sym.getName() == "printf" || Sym.getName() == "puts") { - O << "vprintf"; - } else { - O << Sym.getName(); - } - } else { - O << *Expr; - } - - O << ", ("; - - if (NumArgs > 0) { - printOperand(MI, Index++, O); - for (unsigned i = 1; i < NumArgs; ++i) { - O << ", "; - printOperand(MI, Index++, O); - } - } - O << ")"; -} - -void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) { - O << Op.getImm(); - } else if (Op.isFPImm()) { - double Imm = Op.getFPImm(); - APFloat FPImm(Imm); - APInt FPIntImm = FPImm.bitcastToAPInt(); - O << "0D"; - // PTX requires us to output the full 64 bits, even if the number is zero - if (FPIntImm.getZExtValue() > 0) { - O << FPIntImm.toString(16, false); - } else { - O << "0000000000000000"; - } - } else if (Op.isReg()) { - printRegName(O, Op.getReg()); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - const MCExpr *Expr = Op.getExpr(); - if (const MCSymbolRefExpr *SymRefExpr = dyn_cast(Expr)) { - const MCSymbol &Sym = SymRefExpr->getSymbol(); - O << Sym.getName(); - } else { - O << *Op.getExpr(); - } - } -} - -void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - // By definition, operand OpNo+1 is an i32imm - const MCOperand &Op2 = MI->getOperand(OpNo+1); - printOperand(MI, OpNo, O); - if (Op2.getImm() == 0) - return; // don't print "+0" - O << "+" << Op2.getImm(); -} - -void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert (Op.isImm() && "Rounding modes must be immediate values"); - switch (Op.getImm()) { - default: - llvm_unreachable("Unknown rounding mode!"); - case PTXRoundingMode::RndDefault: - llvm_unreachable("FP rounding-mode pass did not handle instruction!"); - case PTXRoundingMode::RndNone: - // Do not print anything. - break; - case PTXRoundingMode::RndNearestEven: - O << ".rn"; - break; - case PTXRoundingMode::RndTowardsZero: - O << ".rz"; - break; - case PTXRoundingMode::RndNegInf: - O << ".rm"; - break; - case PTXRoundingMode::RndPosInf: - O << ".rp"; - break; - case PTXRoundingMode::RndApprox: - O << ".approx"; - break; - case PTXRoundingMode::RndNearestEvenInt: - O << ".rni"; - break; - case PTXRoundingMode::RndTowardsZeroInt: - O << ".rzi"; - break; - case PTXRoundingMode::RndNegInfInt: - O << ".rmi"; - break; - case PTXRoundingMode::RndPosInfInt: - O << ".rpi"; - break; - } -} - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h deleted file mode 100644 index ea4d504..0000000 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ /dev/null @@ -1,45 +0,0 @@ -//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints n PTX MCInst to a .ptx file. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXINSTPRINTER_H -#define PTXINSTPRINTER_H - -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class PTXInstPrinter : public MCInstPrinter { -public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - - // Autogenerated by tblgen. - void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void printPredicate(const MCInst *MI, raw_ostream &O); - void printCall(const MCInst *MI, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); -}; -} - -#endif - diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt deleted file mode 100644 index 15a1eb5..0000000 --- a/lib/Target/PTX/LLVMBuild.txt +++ /dev/null @@ -1,32 +0,0 @@ -;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = InstPrinter MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = PTX -parent = Target -has_asmprinter = 1 - -[component_1] -type = Library -name = PTXCodeGen -parent = PTX -required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils -add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index d1fd74c..0000000 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_llvm_library(LLVMPTXDesc - PTXMCTargetDesc.cpp - PTXMCAsmInfo.cpp - ) - -add_dependencies(LLVMPTXDesc PTXCommonTableGen) diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index 19b80c5..0000000 --- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXDesc -parent = PTX -required_libraries = MC PTXAsmPrinter PTXInfo Support -add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile deleted file mode 100644 index 35f5a7b..0000000 --- a/lib/Target/PTX/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h deleted file mode 100644 index a3e0f32..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ /dev/null @@ -1,134 +0,0 @@ -//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the PTX target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXBASEINFO_H -#define PTXBASEINFO_H - -#include "PTXMCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - namespace PTXStateSpace { - enum { - Global = 0, // default to global state space - Constant = 1, - Local = 2, - Parameter = 3, - Shared = 4 - }; - } // namespace PTXStateSpace - - namespace PTXPredicate { - enum { - Normal = 0, - Negate = 1, - None = 2 - }; - } // namespace PTXPredicate - - /// Namespace to hold all target-specific flags. - namespace PTXRoundingMode { - // Instruction Flags - enum { - // Rounding Mode Flags - RndMask = 15, - RndDefault = 0, // --- - RndNone = 1, // - RndNearestEven = 2, // .rn - RndTowardsZero = 3, // .rz - RndNegInf = 4, // .rm - RndPosInf = 5, // .rp - RndApprox = 6, // .approx - RndNearestEvenInt = 7, // .rni - RndTowardsZeroInt = 8, // .rzi - RndNegInfInt = 9, // .rmi - RndPosInfInt = 10 // .rpi - }; - } // namespace PTXII - - namespace PTXRegisterType { - // Register type encoded in MCOperands - enum { - Pred = 0, - B16, - B32, - B64, - F32, - F64 - }; - } // namespace PTXRegisterType - - namespace PTXRegisterSpace { - // Register space encoded in MCOperands - enum { - Reg = 0, - Local, - Param, - Argument, - Return - }; - } - - inline static void decodeRegisterName(raw_ostream &OS, - unsigned EncodedReg) { - OS << "%"; - - unsigned RegSpace = EncodedReg & 0x7; - unsigned RegType = (EncodedReg >> 3) & 0x7; - unsigned RegOffset = EncodedReg >> 6; - - switch (RegSpace) { - default: - llvm_unreachable("Unknown register space!"); - case PTXRegisterSpace::Reg: - switch (RegType) { - default: - llvm_unreachable("Unknown register type!"); - case PTXRegisterType::Pred: - OS << "p"; - break; - case PTXRegisterType::B16: - OS << "rh"; - break; - case PTXRegisterType::B32: - OS << "r"; - break; - case PTXRegisterType::B64: - OS << "rd"; - break; - case PTXRegisterType::F32: - OS << "f"; - break; - case PTXRegisterType::F64: - OS << "fd"; - break; - } - break; - case PTXRegisterSpace::Return: - OS << "ret"; - break; - case PTXRegisterSpace::Argument: - OS << "arg"; - break; - } - - OS << RegOffset; - } -} // namespace llvm - -#endif - diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp deleted file mode 100644 index cdfbc80..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the PTXMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "PTXMCAsmInfo.h" -#include "llvm/ADT/Triple.h" - -using namespace llvm; - -void PTXMCAsmInfo::anchor() { } - -PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::ptx64) - PointerSize = 8; - - CommentString = "//"; - - PrivateGlobalPrefix = "$L__"; - - AllowPeriodsInName = false; - - HasSetDirective = false; - - HasDotTypeDotSizeDirective = false; - - HasSingleParameterDotFile = false; -} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h deleted file mode 100644 index 32ca069..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the PTXMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_MCASM_INFO_H -#define PTX_MCASM_INFO_H - -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - class StringRef; - - class PTXMCAsmInfo : public MCAsmInfo { - virtual void anchor(); - public: - explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); - }; -} // namespace llvm - -#endif // PTX_MCASM_INFO_H diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp deleted file mode 100644 index 08fb970..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ /dev/null @@ -1,98 +0,0 @@ -//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides PTX specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "PTXMCTargetDesc.h" -#include "PTXMCAsmInfo.h" -#include "InstPrinter/PTXInstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "PTXGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "PTXGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "PTXGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createPTXMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitPTXMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - // PTX does not have a return address register. - InitPTXMCRegisterInfo(X, 0); - return X; -} - -static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitPTXMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createPTXMCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, MII, MRI, STI); -} - -extern "C" void LLVMInitializePTXTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo X(ThePTX32Target); - RegisterMCAsmInfo Y(ThePTX64Target); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); - TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, - createPTXMCSubtargetInfo); - TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, - createPTXMCSubtargetInfo); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); - TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); -} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h deleted file mode 100644 index 542638a..0000000 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h +++ /dev/null @@ -1,36 +0,0 @@ -//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides PTX specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXMCTARGETDESC_H -#define PTXMCTARGETDESC_H - -namespace llvm { -class Target; - -extern Target ThePTX32Target; -extern Target ThePTX64Target; - -} // End llvm namespace - -// Defines symbolic names for PTX registers. -#define GET_REGINFO_ENUM -#include "PTXGenRegisterInfo.inc" - -// Defines symbolic names for the PTX instructions. -#define GET_INSTRINFO_ENUM -#include "PTXGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "PTXGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile deleted file mode 100644 index fa09634..0000000 --- a/lib/Target/PTX/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMPTXCodeGen -TARGET = PTX - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = PTXGenAsmWriter.inc \ - PTXGenDAGISel.inc \ - PTXGenInstrInfo.inc \ - PTXGenRegisterInfo.inc \ - PTXGenSubtargetInfo.inc - -DIRS = InstPrinter TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h deleted file mode 100644 index ffb92cb..0000000 --- a/lib/Target/PTX/PTX.h +++ /dev/null @@ -1,43 +0,0 @@ -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// PTX back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_H -#define PTX_H - -#include "MCTargetDesc/PTXBaseInfo.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class MachineInstr; - class MCInst; - class PTXAsmPrinter; - class PTXTargetMachine; - class FunctionPass; - - FunctionPass *createPTXISelDag(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel); - - FunctionPass *createPTXRegisterAllocator(); - - void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - PTXAsmPrinter &AP); - -} // namespace llvm; - -#endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td deleted file mode 100644 index 994a68e..0000000 --- a/lib/Target/PTX/PTX.td +++ /dev/null @@ -1,141 +0,0 @@ -//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This is the top level entry point for the PTX target. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// Subtarget Features -//===----------------------------------------------------------------------===// - -//===- Architectural Features ---------------------------------------------===// - -def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", - "Do not demote .f64 to .f32">; - -def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", - "Disable Fused-Multiply Add">; - -//===- PTX Version --------------------------------------------------------===// - -def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0">; - -def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1">; - -def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2">; - -def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3">; - -//===- PTX Target ---------------------------------------------------------===// - -def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", - "Use Shader Model 1.0">; -def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", - "Use Shader Model 1.1">; -def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", - "Use Shader Model 1.2">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", - "Use Shader Model 1.3">; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", - "Use Shader Model 2.0", [FeatureDouble]>; -def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", - "Use Shader Model 2.1", [FeatureDouble]>; -def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", - "Use Shader Model 2.2", [FeatureDouble]>; -def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", - "Use Shader Model 2.3", [FeatureDouble]>; - -def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", - "PTX_COMPUTE_1_0", - "Use Compute Compatibility 1.0">; -def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", - "PTX_COMPUTE_1_1", - "Use Compute Compatibility 1.1">; -def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", - "PTX_COMPUTE_1_2", - "Use Compute Compatibility 1.2">; -def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", - "PTX_COMPUTE_1_3", - "Use Compute Compatibility 1.3">; -def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", - "PTX_COMPUTE_2_0", - "Use Compute Compatibility 2.0", - [FeatureDouble]>; - -//===----------------------------------------------------------------------===// -// PTX supported processors -//===----------------------------------------------------------------------===// - -class Proc Features> - : Processor; - -def : Proc<"generic", []>; - -// Processor definitions for compute/shader models -def : Proc<"compute_10", [FeatureCOMPUTE10]>; -def : Proc<"compute_11", [FeatureCOMPUTE11]>; -def : Proc<"compute_12", [FeatureCOMPUTE12]>; -def : Proc<"compute_13", [FeatureCOMPUTE13]>; -def : Proc<"compute_20", [FeatureCOMPUTE20]>; -def : Proc<"sm_10", [FeatureSM10]>; -def : Proc<"sm_11", [FeatureSM11]>; -def : Proc<"sm_12", [FeatureSM12]>; -def : Proc<"sm_13", [FeatureSM13]>; -def : Proc<"sm_20", [FeatureSM20]>; -def : Proc<"sm_21", [FeatureSM21]>; -def : Proc<"sm_22", [FeatureSM22]>; -def : Proc<"sm_23", [FeatureSM23]>; - -// Processor definitions for common GPU architectures -def : Proc<"g80", [FeatureSM10]>; -def : Proc<"gt200", [FeatureSM13]>; -def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; -def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "PTXRegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "PTXInstrInfo.td" - -def PTXInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// PTX uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def PTXAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - bit isMCAsmWriter = 1; -} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def PTX : Target { - let InstructionSet = PTXInstrInfo; - let AssemblyWriters = [PTXAsmWriter]; -} diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp deleted file mode 100644 index 0b6ac7b..0000000 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ /dev/null @@ -1,561 +0,0 @@ -//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to PTX assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-asm-printer" - -#include "PTXAsmPrinter.h" -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXParamManager.h" -#include "PTXRegisterInfo.h" -#include "PTXTargetMachine.h" -#include "llvm/Argument.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static const char PARAM_PREFIX[] = "__param_"; -static const char RETURN_PREFIX[] = "__ret_"; - -static const char *getRegisterTypeName(unsigned RegType) { - switch (RegType) { - default: - llvm_unreachable("Unknown register type"); - case PTXRegisterType::Pred: - return ".pred"; - case PTXRegisterType::B16: - return ".b16"; - case PTXRegisterType::B32: - return ".b32"; - case PTXRegisterType::B64: - return ".b64"; - case PTXRegisterType::F32: - return ".f32"; - case PTXRegisterType::F64: - return ".f64"; - } -} - -static const char *getStateSpaceName(unsigned addressSpace) { - switch (addressSpace) { - default: llvm_unreachable("Unknown state space"); - case PTXStateSpace::Global: return "global"; - case PTXStateSpace::Constant: return "const"; - case PTXStateSpace::Local: return "local"; - case PTXStateSpace::Parameter: return "param"; - case PTXStateSpace::Shared: return "shared"; - } -} - -static const char *getTypeName(Type* type) { - while (true) { - switch (type->getTypeID()) { - default: llvm_unreachable("Unknown type"); - case Type::FloatTyID: return ".f32"; - case Type::DoubleTyID: return ".f64"; - case Type::IntegerTyID: - switch (type->getPrimitiveSizeInBits()) { - default: llvm_unreachable("Unknown integer bit-width"); - case 16: return ".u16"; - case 32: return ".u32"; - case 64: return ".u64"; - } - case Type::ArrayTyID: - case Type::PointerTyID: - type = dyn_cast(type)->getElementType(); - break; - } - } - return NULL; -} - -bool PTXAsmPrinter::doFinalization(Module &M) { - // XXX Temproarily remove global variables so that doFinalization() will not - // emit them again (global variables are emitted at beginning). - - Module::GlobalListType &global_list = M.getGlobalList(); - int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; - - // first, back-up GlobalVariable in gv_array - i = 0; - for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) - gv_array[i++] = &*I; - - // second, empty global_list - while (!global_list.empty()) - global_list.remove(global_list.begin()); - - // call doFinalization - bool ret = AsmPrinter::doFinalization(M); - - // now we restore global variables - for (i = 0; i < n; i ++) - global_list.insert(global_list.end(), gv_array[i]); - - delete[] gv_array; - return ret; -} - -void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) -{ - const PTXSubtarget& ST = TM.getSubtarget(); - - // Emit the PTX .version and .target attributes - OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString()); - OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() + - (ST.supportsDouble() ? "" - : ", map_f64_to_f32")); - // .address_size directive is optional, but it must immediately follow - // the .target directive if present within a module - if (ST.supportsPTX23()) { - const char *addrSize = ST.is64Bit() ? "64" : "32"; - OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize); - } - - OutStreamer.AddBlankLine(); - - // Define any .file directives - DebugInfoFinder DbgFinder; - DbgFinder.processModule(M); - - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - DICompileUnit DIUnit(*I); - StringRef FN = DIUnit.getFilename(); - StringRef Dir = DIUnit.getDirectory(); - GetOrCreateSourceID(FN, Dir); - } - - OutStreamer.AddBlankLine(); - - // declare external functions - for (Module::const_iterator i = M.begin(), e = M.end(); - i != e; ++i) - EmitFunctionDeclaration(i); - - // declare global variables - for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); - i != e; ++i) - EmitVariableDeclaration(i); -} - -void PTXAsmPrinter::EmitFunctionBodyStart() { - OutStreamer.EmitRawText(Twine("{")); - - const PTXMachineFunctionInfo *MFI = MF->getInfo(); - const PTXParamManager &PM = MFI->getParamManager(); - - // Print register definitions - SmallString<128> regDefs; - raw_svector_ostream os(regDefs); - unsigned numRegs; - - // pred - numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .pred %p<" << numRegs << ">;\n"; - - // i16 - numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; - - // i32 - numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b32 %r<" << numRegs << ">;\n"; - - // i64 - numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; - - // f32 - numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .f32 %f<" << numRegs << ">;\n"; - - // f64 - numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg); - if(numRegs > 0) - os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; - - // Local params - for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); - i != e; ++i) - os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i) - << ";\n"; - - OutStreamer.EmitRawText(os.str()); - - - const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); - DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() - << " frame object(s)\n"); - for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { - DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); - if (FrameInfo->getObjectSize(i) > 0) { - OutStreamer.EmitRawText("\t.local .align " + - Twine(FrameInfo->getObjectAlignment(i)) + - " .b8 __local" + - Twine(i) + - "[" + - Twine(FrameInfo->getObjectSize(i)) + - "];"); - } - } - - //unsigned Index = 1; - // Print parameter passing params - //for (PTXMachineFunctionInfo::param_iterator - // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { - // std::string def = "\t.param .b"; - // def += utostr(*i); - // def += " __ret_"; - // def += utostr(Index); - // Index++; - // def += ";"; - // OutStreamer.EmitRawText(Twine(def)); - //} -} - -void PTXAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText(Twine("}")); -} - -void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { - MCInst TmpInst; - LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); - OutStreamer.EmitInstruction(TmpInst); -} - -void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { - // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(gv)) - return; - - MCSymbol *gvsym = Mang->getSymbol(gv); - - assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); - - SmallString<128> decl; - raw_svector_ostream os(decl); - - // check if it is defined in some other translation unit - if (gv->isDeclaration()) - os << ".extern "; - - // state space: e.g., .global - os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' '; - - // alignment (optional) - unsigned alignment = gv->getAlignment(); - if (alignment != 0) - os << ".align " << gv->getAlignment() << ' '; - - - if (PointerType::classof(gv->getType())) { - PointerType* pointerTy = dyn_cast(gv->getType()); - Type* elementTy = pointerTy->getElementType(); - - if (elementTy->isArrayTy()) { - assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - - ArrayType* arrayTy = dyn_cast(elementTy); - elementTy = arrayTy->getElementType(); - - unsigned numElements = arrayTy->getNumElements(); - - while (elementTy->isArrayTy()) { - arrayTy = dyn_cast(elementTy); - elementTy = arrayTy->getElementType(); - - numElements *= arrayTy->getNumElements(); - } - - // FIXME: isPrimitiveType() == false for i16? - assert(elementTy->isSingleValueType() && - "Non-primitive types are not handled"); - - // Find the size of the element in bits - unsigned elementSize = elementTy->getPrimitiveSizeInBits(); - - os << ".b" << elementSize << ' ' << gvsym->getName() - << '[' << numElements << ']'; - } else { - os << ".b8" << gvsym->getName() << "[]"; - } - - // handle string constants (assume ConstantArray means string) - if (gv->hasInitializer()) { - const Constant *C = gv->getInitializer(); - if (const ConstantArray *CA = dyn_cast(C)) { - os << " = {"; - - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - if (i > 0) - os << ','; - - os << "0x"; - os.write_hex(cast(CA->getOperand(i))->getZExtValue()); - } - - os << '}'; - } - } - } else { - // Note: this is currently the fall-through case and most likely generates - // incorrect code. - os << getTypeName(gv->getType()) << ' ' << gvsym->getName(); - - if (isa(gv->getType()) || isa(gv->getType())) - os << "[]"; - } - - os << ';'; - - OutStreamer.EmitRawText(os.str()); - OutStreamer.AddBlankLine(); -} - -void PTXAsmPrinter::EmitFunctionEntryLabel() { - // The function label could have already been emitted if two symbols end up - // conflicting due to asm renaming. Detect this and emit an error. - if (!CurrentFnSym->isUndefined()) - report_fatal_error("'" + Twine(CurrentFnSym->getName()) + - "' label emitted multiple times to assembly file"); - - const PTXMachineFunctionInfo *MFI = MF->getInfo(); - const PTXParamManager &PM = MFI->getParamManager(); - const bool isKernel = MFI->isKernel(); - const PTXSubtarget& ST = TM.getSubtarget(); - - SmallString<128> decl; - raw_svector_ostream os(decl); - os << (isKernel ? ".entry" : ".func"); - - if (!isKernel) { - os << " ("; - if (ST.useParamSpaceForDeviceArgs()) { - for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - - os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); - } - } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; - i != e; ++i) { - if (i != b) - os << ", "; - - os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' - << MFI->getRegisterName(*i); - } - } - os << ')'; - } - - // Print function name - os << ' ' << CurrentFnSym->getName() << " ("; - - const Function *F = MF->getFunction(); - - // Print parameters - if (isKernel || ST.useParamSpaceForDeviceArgs()) { - /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - - os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); - }*/ - int Counter = 1; - for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), - b = i; i != e; ++i) { - if (i != b) - os << ", "; - const Type *ArgType = (*i).getType(); - os << ".param .b"; - if (ArgType->isPointerTy()) { - if (ST.is64Bit()) - os << "64"; - else - os << "32"; - } else { - os << ArgType->getPrimitiveSizeInBits(); - } - if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { - const PointerType *PtrType = dyn_cast(ArgType); - os << " .ptr"; - switch (PtrType->getAddressSpace()) { - default: - llvm_unreachable("Unknown address space in argument"); - case PTXStateSpace::Global: - os << " .global"; - break; - case PTXStateSpace::Shared: - os << " .shared"; - break; - } - } - os << " __param_" << Counter++; - } - } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; - i != e; ++i) { - if (i != b) - os << ", "; - - os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' - << MFI->getRegisterName(*i); - } - } - os << ')'; - - OutStreamer.EmitRawText(os.str()); -} - -void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func) -{ - const PTXSubtarget& ST = TM.getSubtarget(); - - std::string decl = ""; - - // hard-coded emission of extern vprintf function - - if (func->getName() == "printf" || func->getName() == "puts") { - decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b"; - if (ST.is64Bit()) - decl += "64"; - else - decl += "32"; - decl += " __param_2, .param .b"; - if (ST.is64Bit()) - decl += "64"; - else - decl += "32"; - decl += " __param_3)\n"; - } - - OutStreamer.EmitRawText(Twine(decl)); -} - -unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, - StringRef DirName) { - // If FE did not provide a file name, then assume stdin. - if (FileName.empty()) - return GetOrCreateSourceID("", StringRef()); - - // MCStream expects full path name as filename. - if (!DirName.empty() && !sys::path::is_absolute(FileName)) { - SmallString<128> FullPathName = DirName; - sys::path::append(FullPathName, FileName); - // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. - return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); - } - - StringMapEntry &Entry = SourceIdMap.GetOrCreateValue(FileName); - if (Entry.getValue()) - return Entry.getValue(); - - unsigned SrcId = SourceIdMap.size(); - Entry.setValue(SrcId); - - // Print out a .file directive to specify files for .loc directives. - OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey()); - - return SrcId; -} - -MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, - const MCSymbol *Symbol) { - const MCExpr *Expr; - Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); - return MCOperand::CreateExpr(Expr); -} - -MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { - MCOperand MCOp; - const PTXMachineFunctionInfo *MFI = MF->getInfo(); - unsigned EncodedReg; - switch (MO.getType()) { - default: - llvm_unreachable("Unknown operand type"); - case MachineOperand::MO_Register: - if (MO.getReg() > 0) { - // Encode the register - EncodedReg = MFI->getEncodedRegister(MO.getReg()); - } else { - EncodedReg = 0; - } - MCOp = MCOperand::CreateReg(EncodedReg); - break; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), OutContext)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); - break; - case MachineOperand::MO_FPImmediate: - APFloat Val = MO.getFPImm()->getValueAPF(); - bool ignored; - Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); - MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); - break; - } - - return MCOp; -} - -// Force static initialization. -extern "C" void LLVMInitializePTXAsmPrinter() { - RegisterAsmPrinter X(ThePTX32Target); - RegisterAsmPrinter Y(ThePTX64Target); -} diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h deleted file mode 100644 index 74c8d58..0000000 --- a/lib/Target/PTX/PTXAsmPrinter.h +++ /dev/null @@ -1,57 +0,0 @@ -//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// PTX Assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXASMPRINTER_H -#define PTXASMPRINTER_H - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -class MCOperand; - -class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { -public: - explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - const char *getPassName() const { return "PTX Assembly Printer"; } - - bool doFinalization(Module &M); - - virtual void EmitStartOfAsmFile(Module &M); - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - virtual void EmitFunctionEntryLabel(); - virtual void EmitInstruction(const MachineInstr *MI); - - unsigned GetOrCreateSourceID(StringRef FileName, - StringRef DirName); - - MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); - MCOperand lowerOperand(const MachineOperand &MO); - -private: - void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(const Function* func); - - StringMap SourceIdMap; -}; // class PTXAsmPrinter -} // namespace llvm - -#endif - diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp deleted file mode 100644 index a21d172..0000000 --- a/lib/Target/PTX/PTXFPRoundingModePass.cpp +++ /dev/null @@ -1,181 +0,0 @@ -//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a machine function pass that sets appropriate FP rounding -// modes for all relevant instructions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-fp-rounding-mode" - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -// NOTE: PTXFPRoundingModePass should be executed just before emission. - -namespace { - /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to - /// all FP instructions. Essentially, this pass just looks for all FP - /// instructions that have a rounding mode set to RndDefault, and sets an - /// appropriate rounding mode based on the target device. - /// - class PTXFPRoundingModePass : public MachineFunctionPass { - private: - static char ID; - - typedef std::pair RndModeDesc; - - PTXTargetMachine& TargetMachine; - DenseMap Instrs; - - public: - PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) - : MachineFunctionPass(ID), - TargetMachine(TM) { - initializeMap(); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "PTX FP Rounding Mode Pass"; - } - - private: - - void initializeMap(); - void processInstruction(MachineInstr &MI); - }; // class PTXFPRoundingModePass -} // end anonymous namespace - -using namespace llvm; - -char PTXFPRoundingModePass::ID = 0; - -bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { - // Look at each basic block - for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; - ++bbi) { - MachineBasicBlock &MBB = *bbi; - // Look at each instruction - for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); - ii != ie; ++ii) { - MachineInstr &MI = *ii; - processInstruction(MI); - } - } - return false; -} - -void PTXFPRoundingModePass::initializeMap() { - using namespace PTXRoundingMode; - const PTXSubtarget& ST = TargetMachine.getSubtarget(); - - // Build a map of default rounding mode for all instructions that need a - // rounding mode. - Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); - - Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); - Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); - - unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; - Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); - Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); - - unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; - Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); - Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); - - Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); - - Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); - Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); - - Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); - - Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); - Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); -} - -void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { - // Is this an instruction that needs a rounding mode? - if (Instrs.count(MI.getOpcode())) { - const RndModeDesc &Desc = Instrs[MI.getOpcode()]; - // Get the rounding mode operand - MachineOperand &Op = MI.getOperand(Desc.first); - // Update the rounding mode if needed - if (Op.getImm() == PTXRoundingMode::RndDefault) { - Op.setImm(Desc.second); - } - } -} - -FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXFPRoundingModePass(TM, OptLevel); -} - diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp deleted file mode 100644 index e6e268e..0000000 --- a/lib/Target/PTX/PTXFrameLowering.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "PTXFrameLowering.h" -#include "llvm/CodeGen/MachineFunction.h" - -using namespace llvm; - -void PTXFrameLowering::emitPrologue(MachineFunction &MF) const { -} - -void PTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h deleted file mode 100644 index 831e818..0000000 --- a/lib/Target/PTX/PTXFrameLowering.h +++ /dev/null @@ -1,44 +0,0 @@ -//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_FRAMEINFO_H -#define PTX_FRAMEINFO_H - -#include "PTX.h" -#include "PTXSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - class PTXSubtarget; - -class PTXFrameLowering : public TargetFrameLowering { -protected: - const PTXSubtarget &STI; - -public: - explicit PTXFrameLowering(const PTXSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), - STI(sti) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool hasFP(const MachineFunction &MF) const { return false; } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp deleted file mode 100644 index 5c7ee29..0000000 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ /dev/null @@ -1,356 +0,0 @@ -//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the PTX target. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXTargetMachine.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -// PTXDAGToDAGISel - PTX specific code to select PTX machine -// instructions for SelectionDAG operations. -class PTXDAGToDAGISel : public SelectionDAGISel { - public: - PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - - virtual const char *getPassName() const { - return "PTX DAG->DAG Pattern Instruction Selection"; - } - - SDNode *Select(SDNode *Node); - - // Complex Pattern Selectors. - bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); - bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); - bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); - - // Include the pieces auto'gened from the target description -#include "PTXGenDAGISel.inc" - - private: - // We need this only because we can't match intruction BRAdp - // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td - SDNode *SelectBRCOND(SDNode *Node); - - SDNode *SelectREADPARAM(SDNode *Node); - SDNode *SelectWRITEPARAM(SDNode *Node); - SDNode *SelectFrameIndex(SDNode *Node); - - bool isImm(const SDValue &operand); - bool SelectImm(const SDValue &operand, SDValue &imm); - - const PTXSubtarget& getSubtarget() const; -}; // class PTXDAGToDAGISel -} // namespace - -// createPTXISelDag - This pass converts a legalized DAG into a -// PTX-specific DAG, ready for instruction scheduling -FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXDAGToDAGISel(TM, OptLevel); -} - -PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} - -SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { - switch (Node->getOpcode()) { - case ISD::BRCOND: - return SelectBRCOND(Node); - case PTXISD::READ_PARAM: - return SelectREADPARAM(Node); - case PTXISD::WRITE_PARAM: - return SelectWRITEPARAM(Node); - case ISD::FrameIndex: - return SelectFrameIndex(Node); - default: - return SelectCode(Node); - } -} - -SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { - assert(Node->getNumOperands() >= 3); - - SDValue Chain = Node->getOperand(0); - SDValue Pred = Node->getOperand(1); - SDValue Target = Node->getOperand(2); // branch target - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - assert(Target.getOpcode() == ISD::BasicBlock); - assert(Pred.getValueType() == MVT::i1); - - // Emit BRAdp - SDValue Ops[] = { Target, Pred, PredOp, Chain }; - return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); -} - -SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { - SDValue Chain = Node->getOperand(0); - SDValue Index = Node->getOperand(1); - - int OpCode; - - // Get the type of parameter we are reading - EVT VT = Node->getValueType(0); - assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); - - MVT Type = VT.getSimpleVT(); - - if (Type == MVT::i1) - OpCode = PTX::READPARAMPRED; - else if (Type == MVT::i16) - OpCode = PTX::READPARAMI16; - else if (Type == MVT::i32) - OpCode = PTX::READPARAMI32; - else if (Type == MVT::i64) - OpCode = PTX::READPARAMI64; - else if (Type == MVT::f32) - OpCode = PTX::READPARAMF32; - else { - assert(Type == MVT::f64 && "Unexpected type!"); - OpCode = PTX::READPARAMF64; - } - - SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - SDValue Ops[] = { Index, Pred, PredOp, Chain }; - return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); -} - -SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { - - SDValue Chain = Node->getOperand(0); - SDValue Value = Node->getOperand(1); - - int OpCode; - - //Node->dumpr(CurDAG); - - // Get the type of parameter we are writing - EVT VT = Value->getValueType(0); - assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); - - MVT Type = VT.getSimpleVT(); - - if (Type == MVT::i1) - OpCode = PTX::WRITEPARAMPRED; - else if (Type == MVT::i16) - OpCode = PTX::WRITEPARAMI16; - else if (Type == MVT::i32) - OpCode = PTX::WRITEPARAMI32; - else if (Type == MVT::i64) - OpCode = PTX::WRITEPARAMI64; - else if (Type == MVT::f32) - OpCode = PTX::WRITEPARAMF32; - else if (Type == MVT::f64) - OpCode = PTX::WRITEPARAMF64; - else - llvm_unreachable("Invalid type in SelectWRITEPARAM"); - - SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); - DebugLoc dl = Node->getDebugLoc(); - - SDValue Ops[] = { Value, Pred, PredOp, Chain }; - SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); - - //dbgs() << "SelectWRITEPARAM produced:\n\t"; - //Ret->dumpr(CurDAG); - - return Ret; -} - -SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { - int FI = cast(Node)->getIndex(); - //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; - //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); - - PTXMachineFunctionInfo *MFI = MF->getInfo(); - - SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), - Node->getValueType(0)); - - return FrameSymbol.getNode(); -} - -// Match memory operand of the form [reg+reg] -bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { - if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || - isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) - return false; - - assert(Addr.getValueType().isSimple() && "Type must be simple"); - - R1 = Addr; - R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; -} - -// Match memory operand of the form [reg], [imm+reg], and [reg+imm] -bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - // FrameIndex addresses are handled separately - //errs() << "SelectADDRri: "; - //Addr.getNode()->dumpr(); - if (isa(Addr)) { - //errs() << "Failure\n"; - return false; - } - - if (CurDAG->isBaseWithConstantOffset(Addr)) { - Base = Addr.getOperand(0); - if (isa(Base)) { - //errs() << "Failure\n"; - return false; - } - ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); - //errs() << "Success\n"; - return true; - } - - /*if (Addr.getNumOperands() == 1) { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - errs() << "Success\n"; - return true; - }*/ - - //errs() << "SelectADDRri fails on: "; - //Addr.getNode()->dumpr(); - - if (isImm(Addr)) { - //errs() << "Failure\n"; - return false; - } - - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - //errs() << "Success\n"; - return true; - - /*if (Addr.getOpcode() != ISD::ADD) { - // let SelectADDRii handle the [imm] case - if (isImm(Addr)) - return false; - // it is [reg] - - assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; - } - - if (Addr.getNumOperands() < 2) - return false; - - // let SelectADDRii handle the [imm+imm] case - if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) - return false; - - // try [reg+imm] and [imm+reg] - for (int i = 0; i < 2; i ++) - if (SelectImm(Addr.getOperand(1-i), Offset)) { - Base = Addr.getOperand(i); - return true; - } - - // neither [reg+imm] nor [imm+reg] - return false;*/ -} - -// Match memory operand of the form [imm+imm] and [imm] -bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - // is [imm+imm]? - if (Addr.getOpcode() == ISD::ADD) { - return SelectImm(Addr.getOperand(0), Base) && - SelectImm(Addr.getOperand(1), Offset); - } - - // is [imm]? - if (SelectImm(Addr, Base)) { - assert(Addr.getValueType().isSimple() && "Type must be simple"); - - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - - return true; - } - - return false; -} - -// Match memory operand of the form [reg], [imm+reg], and [reg+imm] -bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, - SDValue &Offset) { - //errs() << "SelectADDRlocal: "; - //Addr.getNode()->dumpr(); - if (isa(Addr)) { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); - //errs() << "Success\n"; - return true; - } - - if (CurDAG->isBaseWithConstantOffset(Addr)) { - Base = Addr.getOperand(0); - if (!isa(Base)) { - //errs() << "Failure\n"; - return false; - } - ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); - //errs() << "Offset: "; - //Offset.getNode()->dumpr(); - //errs() << "Success\n"; - return true; - } - - //errs() << "Failure\n"; - return false; -} - -bool PTXDAGToDAGISel::isImm(const SDValue &operand) { - return ConstantSDNode::classof(operand.getNode()); -} - -bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { - SDNode *node = operand.getNode(); - if (!ConstantSDNode::classof(node)) - return false; - - ConstantSDNode *CN = cast(node); - imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), - operand.getValueType()); - return true; -} - -const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const -{ - return TM.getSubtarget(); -} - diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp deleted file mode 100644 index ef4455b..0000000 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ /dev/null @@ -1,522 +0,0 @@ -//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#include "PTXISelLowering.h" -#include "PTX.h" -#include "PTXMachineFunctionInfo.h" -#include "PTXRegisterInfo.h" -#include "PTXSubtarget.h" -#include "llvm/Function.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// TargetLowering Implementation -//===----------------------------------------------------------------------===// - -PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - // Set up the register classes. - addRegisterClass(MVT::i1, PTX::RegPredRegisterClass); - addRegisterClass(MVT::i16, PTX::RegI16RegisterClass); - addRegisterClass(MVT::i32, PTX::RegI32RegisterClass); - addRegisterClass(MVT::i64, PTX::RegI64RegisterClass); - addRegisterClass(MVT::f32, PTX::RegF32RegisterClass); - addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); - - setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - setMinFunctionAlignment(2); - - // Let LLVM use loads/stores for all mem* operations - maxStoresPerMemcpy = 4096; - maxStoresPerMemmove = 4096; - maxStoresPerMemset = 4096; - - //////////////////////////////////// - /////////// Expansion ////////////// - //////////////////////////////////// - - // (any/zero/sign) extload => load + (any/zero/sign) extend - - setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - - // f32 extload => load + fextend - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - - // f64 truncstore => trunc + store - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // sign_extend_inreg => sign_extend - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // br_cc => brcond - - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // select_cc => setcc - - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - - //////////////////////////////////// - //////////// Legal ///////////////// - //////////////////////////////////// - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - - //////////////////////////////////// - //////////// Custom //////////////// - //////////////////////////////////// - - // customise setcc to use bitwise logic if possible - - //setOperationAction(ISD::SETCC, MVT::i1, Custom); - setOperationAction(ISD::SETCC, MVT::i1, Legal); - - // customize translation of memory addresses - - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // Compute derived properties from the register classes - computeRegisterProperties(); -} - -EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i1; -} - -SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unimplemented operand"); - case ISD::SETCC: - return LowerSETCC(Op, DAG); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - } -} - -const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - llvm_unreachable("Unknown opcode"); - case PTXISD::COPY_ADDRESS: - return "PTXISD::COPY_ADDRESS"; - case PTXISD::LOAD_PARAM: - return "PTXISD::LOAD_PARAM"; - case PTXISD::STORE_PARAM: - return "PTXISD::STORE_PARAM"; - case PTXISD::READ_PARAM: - return "PTXISD::READ_PARAM"; - case PTXISD::WRITE_PARAM: - return "PTXISD::WRITE_PARAM"; - case PTXISD::EXIT: - return "PTXISD::EXIT"; - case PTXISD::RET: - return "PTXISD::RET"; - case PTXISD::CALL: - return "PTXISD::CALL"; - } -} - -//===----------------------------------------------------------------------===// -// Custom Lower Operation -//===----------------------------------------------------------------------===// - -SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - SDValue Op2 = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); - //ISD::CondCode CC = cast(Op.getOperand(2))->get(); - - // Look for X == 0, X == 1, X != 0, or X != 1 - // We can simplify these to bitwise logic - - //if (Op1.getOpcode() == ISD::Constant && - // (cast(Op1)->getZExtValue() == 1 || - // cast(Op1)->isNullValue()) && - // (CC == ISD::SETEQ || CC == ISD::SETNE)) { - // - // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); - //} - - //ConstantSDNode* COp1 = cast(Op1); - //if(COp1 && COp1->getZExtValue() == 1) { - // if(CC == ISD::SETNE) { - // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); - // } - //} - - llvm_unreachable("setcc was not matched by a pattern!"); - - return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); -} - -SDValue PTXTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast(Op)->getGlobal(); - - assert(PtrVT.isSimple() && "Pointer must be to primitive type."); - - SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); - SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, - dl, - PtrVT.getSimpleVT(), - targetGlobal); - - return movInstr; -} - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -SDValue PTXTargetLowering:: - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - if (isVarArg) llvm_unreachable("PTX does not support varargs"); - - MachineFunction &MF = DAG.getMachineFunction(); - const PTXSubtarget& ST = getTargetMachine().getSubtarget(); - PTXMachineFunctionInfo *MFI = MF.getInfo(); - PTXParamManager &PM = MFI->getParamManager(); - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention"); - case CallingConv::PTX_Kernel: - MFI->setKernel(true); - break; - case CallingConv::PTX_Device: - MFI->setKernel(false); - break; - } - - // We do one of two things here: - // IsKernel || SM >= 2.0 -> Use param space for arguments - // SM < 2.0 -> Use registers for arguments - if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { - // We just need to emit the proper LOAD_PARAM ISDs - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && - "Kernels cannot take pred operands"); - - unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); - unsigned Param = PM.addArgumentParam(ParamSize); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - ParamValue); - InVals.push_back(ArgValue); - } - } - else { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT RegVT = Ins[i].VT; - const TargetRegisterClass* TRC = getRegClassFor(RegVT); - unsigned RegType; - - // Determine which register class we need - if (RegVT == MVT::i1) - RegType = PTXRegisterType::Pred; - else if (RegVT == MVT::i16) - RegType = PTXRegisterType::B16; - else if (RegVT == MVT::i32) - RegType = PTXRegisterType::B32; - else if (RegVT == MVT::i64) - RegType = PTXRegisterType::B64; - else if (RegVT == MVT::f32) - RegType = PTXRegisterType::F32; - else if (RegVT == MVT::f64) - RegType = PTXRegisterType::F64; - else - llvm_unreachable("Unknown parameter type"); - - // Use a unique index in the instruction to prevent instruction folding. - // Yes, this is a hack. - SDValue Index = DAG.getTargetConstant(i, MVT::i32); - unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, - Index); - - InVals.push_back(ArgValue); - - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument); - } - } - - return Chain; -} - -SDValue PTXTargetLowering:: - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, - SelectionDAG &DAG) const { - if (isVarArg) llvm_unreachable("PTX does not support varargs"); - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention."); - case CallingConv::PTX_Kernel: - assert(Outs.size() == 0 && "Kernel must return void."); - return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); - case CallingConv::PTX_Device: - assert(Outs.size() <= 1 && "Can at most return one value."); - break; - } - - MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo(); - PTXParamManager &PM = MFI->getParamManager(); - - SDValue Flag; - const PTXSubtarget& ST = getTargetMachine().getSubtarget(); - - if (ST.useParamSpaceForDeviceArgs()) { - assert(Outs.size() < 2 && "Device functions can return at most one value"); - - if (Outs.size() == 1) { - unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); - unsigned Param = PM.addReturnParam(ParamSize); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[0]); - } - } else { - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT RegVT = Outs[i].VT; - const TargetRegisterClass* TRC; - unsigned RegType; - - // Determine which register class we need - if (RegVT == MVT::i1) { - TRC = PTX::RegPredRegisterClass; - RegType = PTXRegisterType::Pred; - } - else if (RegVT == MVT::i16) { - TRC = PTX::RegI16RegisterClass; - RegType = PTXRegisterType::B16; - } - else if (RegVT == MVT::i32) { - TRC = PTX::RegI32RegisterClass; - RegType = PTXRegisterType::B32; - } - else if (RegVT == MVT::i64) { - TRC = PTX::RegI64RegisterClass; - RegType = PTXRegisterType::B64; - } - else if (RegVT == MVT::f32) { - TRC = PTX::RegF32RegisterClass; - RegType = PTXRegisterType::F32; - } - else if (RegVT == MVT::f64) { - TRC = PTX::RegF64RegisterClass; - RegType = PTXRegisterType::F64; - } - else { - llvm_unreachable("Unknown parameter type"); - } - - unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - - SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); - SDValue OutReg = DAG.getRegister(Reg, RegVT); - - Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return); - } - } - - if (Flag.getNode() == 0) { - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - } - else { - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); - } -} - -SDValue -PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - - MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *PTXMFI = MF.getInfo(); - PTXParamManager &PM = PTXMFI->getParamManager(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - assert(getTargetMachine().getSubtarget().callsAreHandled() && - "Calls are not handled for the target device"); - - // Identify the callee function - const GlobalValue *GV = cast(Callee)->getGlobal(); - const Function *function = cast(GV); - - // allow non-device calls only for printf - bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; - - assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && - "PTX function calls must be to PTX device functions"); - - unsigned outSize = isPrintf ? 2 : Outs.size(); - - std::vector Ops; - // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] - Ops.resize(outSize + Ins.size() + 4); - - Ops[0] = Chain; - - // Identify the callee function - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - Ops[Ins.size()+2] = Callee; - - // #Outs - Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); - - if (isPrintf) { - // first argument is the address of the global string variable in memory - unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); - SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue0, OutVals[0]); - Ops[Ins.size()+4] = ParamValue0; - - // alignment is the maximum size of all the arguments - unsigned alignment = 0; - for (unsigned i = 1; i < OutVals.size(); ++i) { - alignment = std::max(alignment, - OutVals[i].getValueType().getSizeInBits()); - } - - // size is the alignment multiplied by the number of arguments - unsigned size = alignment * (OutVals.size() - 1); - - // second argument is the address of the stack object (unless no arguments) - unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); - SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), - MVT::Other); - Ops[Ins.size()+5] = ParamValue1; - - if (size > 0) - { - // create a local stack object to store the arguments - unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); - SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); - - // store each of the arguments to the stack in turn - for (unsigned int i = 1; i != OutVals.size(); i++) { - SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); - Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr, - MachinePointerInfo(), - false, false, 0); - } - - // copy the address of the local frame index to get the address in non-local space - SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex); - - // store this address in the second argument - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr); - } - } - else - { - // Generate STORE_PARAM nodes for each function argument. In PTX, function - // arguments are explicitly stored into .param variables and passed as - // arguments. There is no register/stack-based calling convention in PTX. - for (unsigned i = 0; i != OutVals.size(); ++i) { - unsigned Size = OutVals[i].getValueType().getSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[i]); - Ops[i+Ins.size()+4] = ParamValue; - } - } - - std::vector InParams; - - // Generate list of .param variables to hold the return value(s). - Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); - for (unsigned i = 0; i < Ins.size(); ++i) { - unsigned Size = Ins[i].VT.getStoreSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); - Ops[i+2] = ParamValue; - InParams.push_back(ParamValue); - } - - Ops[0] = Chain; - - // Create the CALL node. - Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); - - // Create the LOAD_PARAM nodes that retrieve the function return value(s). - for (unsigned i = 0; i < Ins.size(); ++i) { - SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - InParams[i]); - InVals.push_back(Load); - } - - return Chain; -} - -unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { - // All arguments consist of one "register," regardless of the type. - return 1; -} - diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h deleted file mode 100644 index 33220f4..0000000 --- a/lib/Target/PTX/PTXISelLowering.h +++ /dev/null @@ -1,82 +0,0 @@ -//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that PTX uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_ISEL_LOWERING_H -#define PTX_ISEL_LOWERING_H - -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - -namespace PTXISD { - enum NodeType { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - LOAD_PARAM, - STORE_PARAM, - READ_PARAM, - WRITE_PARAM, - EXIT, - RET, - COPY_ADDRESS, - CALL - }; -} // namespace PTXISD - -class PTXTargetLowering : public TargetLowering { - public: - explicit PTXTargetLowering(TargetMachine &TM); - - virtual const char *getTargetNodeName(unsigned Opcode) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, - SelectionDAG &DAG) const; - - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual EVT getSetCCResultType(EVT VT) const; - - virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); - - private: - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; -}; // class PTXTargetLowering -} // namespace llvm - -#endif // PTX_ISEL_LOWERING_H diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td deleted file mode 100644 index 267e834..0000000 --- a/lib/Target/PTX/PTXInstrFormats.td +++ /dev/null @@ -1,51 +0,0 @@ -//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - - -// Rounding Mode Specifier -/*class RoundingMode val> { - bits<3> Value = val; -} - -def RndDefault : RoundingMode<0>; -def RndNearestEven : RoundingMode<1>; -def RndNearestZero : RoundingMode<2>; -def RndNegInf : RoundingMode<3>; -def RndPosInf : RoundingMode<4>; -def RndApprox : RoundingMode<5>;*/ - - -// Rounding Mode Operand -def RndMode : Operand { - let PrintMethod = "printRoundingMode"; -} - -def RndDefault : PatLeaf<(i32 0)>; - -// PTX Predicate operand, default to (0, 0) = (zero-reg, none). -// Leave PrintMethod empty; predicate printing is defined elsewhere. -def pred : PredicateOperand; - -def RndModeOperand : Operand { - let MIOperandInfo = (ops i32imm); -} - -// Instruction Types -let Namespace = "PTX" in { - - class InstPTX pattern> - : Instruction { - dag OutOperandList = oops; - dag InOperandList = !con(iops, (ins pred:$_p)); - let AsmString = asmstr; // Predicate printing is defined elsewhere. - let Pattern = pattern; - let isPredicable = 1; - } -} diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp deleted file mode 100644 index 443cd54..0000000 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ /dev/null @@ -1,359 +0,0 @@ -//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-instrinfo" - -#include "PTXInstrInfo.h" -#include "PTX.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_INSTRINFO_CTOR -#include "PTXGenInstrInfo.inc" - -using namespace llvm; - -PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) - : PTXGenInstrInfo(), - RI(_TM, *this), TM(_TM) {} - -static const struct map_entry { - const TargetRegisterClass *cls; - const int opcode; -} map[] = { - { &PTX::RegI16RegClass, PTX::MOVU16rr }, - { &PTX::RegI32RegClass, PTX::MOVU32rr }, - { &PTX::RegI64RegClass, PTX::MOVU64rr }, - { &PTX::RegF32RegClass, PTX::MOVF32rr }, - { &PTX::RegF64RegClass, PTX::MOVF64rr }, - { &PTX::RegPredRegClass, PTX::MOVPREDrr } -}; - -void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const { - - const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); - //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && - // "Invalid register copy between two register classes"); - - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { - if (map[i].cls == MRI.getRegClass(DstReg)) { - const MCInstrDesc &MCID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). - addReg(SrcReg, getKillRegState(KillSrc)); - AddDefaultPredicate(MI); - return; - } - } - - llvm_unreachable("Impossible reg-to-reg copy"); -} - -bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DstRC != SrcRC) - return false; - - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) - if (DstRC == map[i].cls) { - const MCInstrDesc &MCID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); - AddDefaultPredicate(MI); - return true; - } - - return false; -} - -bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const { - switch (MI.getOpcode()) { - default: - return false; - case PTX::MOVU16rr: - case PTX::MOVU32rr: - case PTX::MOVU64rr: - case PTX::MOVF32rr: - case PTX::MOVF64rr: - case PTX::MOVPREDrr: - assert(MI.getNumOperands() >= 2 && - MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && - "Invalid register-register move instruction"); - SrcSubIdx = DstSubIdx = 0; // No sub-registers - DstReg = MI.getOperand(0).getReg(); - SrcReg = MI.getOperand(1).getReg(); - return true; - } -} - -// predicate support - -bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { - int i = MI->findFirstPredOperandIdx(); - return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; -} - -bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - return !isPredicated(MI) && MI->isTerminator(); -} - -bool PTXInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const { - if (Pred.size() < 2) - llvm_unreachable("lesser than 2 predicate operands are provided"); - - int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - - MI->getOperand(i).setReg(Pred[0].getReg()); - MI->getOperand(i+1).setImm(Pred[1].getImm()); - - return true; -} - -bool PTXInstrInfo:: -SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const { - const MachineOperand &PredReg1 = Pred1[0]; - const MachineOperand &PredReg2 = Pred2[0]; - if (PredReg1.getReg() != PredReg2.getReg()) - return false; - - const MachineOperand &PredOp1 = Pred1[1]; - const MachineOperand &PredOp2 = Pred2[1]; - if (PredOp1.getImm() != PredOp2.getImm()) - return false; - - return true; -} - -bool PTXInstrInfo:: -DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const { - // If an instruction sets a predicate register, it defines a predicate. - - // TODO supprot 5-operand format of setp instruction - - if (MI->getNumOperands() < 1) - return false; - - const MachineOperand &MO = MI->getOperand(0); - - if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) - return false; - - Pred.push_back(MO); - Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); - return true; -} - -// branch support - -bool PTXInstrInfo:: -AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // TODO implement cases when AllowModify is true - - if (MBB.empty()) - return true; - - MachineBasicBlock::iterator iter = MBB.end(); - const MachineInstr& instLast1 = *--iter; - // for special case that MBB has only 1 instruction - const bool IsSizeOne = MBB.size() == 1; - // if IsSizeOne is true, *--iter and instLast2 are invalid - // we put a dummy value in instLast2 and desc2 since they are used - const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - - DEBUG(dbgs() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); - DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); - - // this block ends with no branches - if (!IsAnyKindOfBranch(instLast1)) { - DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); - return false; - } - - // this block ends with only an unconditional branch - if (instLast1.isUnconditionalBranch() && - // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 - (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { - DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); - TBB = GetBranchTarget(instLast1); - return false; - } - - // this block ends with a conditional branch and - // it falls through to a successor block - if (instLast1.isConditionalBranch() && - IsAnySuccessorAlsoLayoutSuccessor(MBB)) { - DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); - TBB = GetBranchTarget(instLast1); - int i = instLast1.findFirstPredOperandIdx(); - Cond.push_back(instLast1.getOperand(i)); - Cond.push_back(instLast1.getOperand(i+1)); - return false; - } - - // when IsSizeOne is true, we are done - if (IsSizeOne) - return true; - - // this block ends with a conditional branch - // followed by an unconditional branch - if (instLast2.isConditionalBranch() && - instLast1.isUnconditionalBranch()) { - DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); - TBB = GetBranchTarget(instLast2); - FBB = GetBranchTarget(instLast1); - int i = instLast2.findFirstPredOperandIdx(); - Cond.push_back(instLast2.getOperand(i)); - Cond.push_back(instLast2.getOperand(i+1)); - return false; - } - - // branch cannot be understood - DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); - return true; -} - -unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - unsigned count = 0; - while (!MBB.empty()) - if (IsAnyKindOfBranch(MBB.back())) { - MBB.pop_back(); - ++count; - } else - break; - DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); - return count; -} - -unsigned PTXInstrInfo:: -InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); - DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() - << "\n"; - else dbgs() << "InsertBranch: TBB: (NULL)\n"); - DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() - << "\n"; - else dbgs() << "InsertBranch: FBB: (NULL)\n"); - DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); - - assert(TBB && "TBB is NULL"); - - if (FBB) { - BuildMI(&MBB, DL, get(PTX::BRAdp)) - .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); - return 2; - } else if (Cond.size()) { - BuildMI(&MBB, DL, get(PTX::BRAdp)) - .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); - return 1; - } else { - BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); - return 1; - } -} - -// Memory operand folding for spills -void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - llvm_unreachable("storeRegToStackSlot should not be called for PTX"); -} - -void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - llvm_unreachable("loadRegFromStackSlot should not be called for PTX"); -} - -// static helper routines - -MachineSDNode *PTXInstrInfo:: -GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, SDValue Op1) { - SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); - SDValue ops[] = { Op1, predReg, predOp }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); -} - -MachineSDNode *PTXInstrInfo:: -GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { - SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); - SDValue ops[] = { Op1, Op2, predReg, predOp }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); -} - -void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { - if (MI->findFirstPredOperandIdx() == -1) { - MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); - } -} - -bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch(); -} - -bool PTXInstrInfo:: -IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { - for (MachineBasicBlock::const_succ_iterator - i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) - if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) - return true; - return false; -} - -MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { - // FIXME So far all branch instructions put destination in 1st operand - const MachineOperand& target = inst.getOperand(0); - assert(target.isMBB() && "FIXME: detect branch target operand"); - return target.getMBB(); -} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h deleted file mode 100644 index fba89c0..0000000 --- a/lib/Target/PTX/PTXInstrInfo.h +++ /dev/null @@ -1,133 +0,0 @@ -//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_INSTR_INFO_H -#define PTX_INSTR_INFO_H - -#include "PTXRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "PTXGenInstrInfo.inc" - -namespace llvm { -class PTXTargetMachine; - -class MachineSDNode; -class SDValue; -class SelectionDAG; - -class PTXInstrInfo : public PTXGenInstrInfo { -private: - const PTXRegisterInfo RI; - PTXTargetMachine &TM; - -public: - explicit PTXInstrInfo(PTXTargetMachine &_TM); - - virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; - - virtual bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - - // predicate support - - virtual bool isPredicated(const MachineInstr *MI) const; - - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; - - virtual - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const; - - virtual - bool SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; - - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; - - // PTX is fully-predicable - virtual bool isPredicable(MachineInstr *MI) const { return true; } - - // branch support - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify = false) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - - // Memory operand folding for spills - // TODO: Implement this eventually and get rid of storeRegToStackSlot and - // loadRegFromStackSlot. Doing so will get rid of the "stack" registers - // we currently use to spill, though I doubt the overall effect on ptxas - // output will be large. I have yet to see a case where ptxas is unable - // to see through the "stack" register usage and hence generates - // efficient code anyway. - // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - // MachineInstr* MI, - // const SmallVectorImpl &Ops, - // int FrameIndex) const; - - virtual void storeRegToStackSlot(MachineBasicBlock& MBB, - MachineBasicBlock::iterator MII, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass* RC, - const TargetRegisterInfo* TRI) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MII, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - // static helper routines - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1); - - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1, SDValue Op2); - - static void AddDefaultPredicate(MachineInstr *MI); - - static bool IsAnyKindOfBranch(const MachineInstr& inst); - - static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); - - static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); -}; // class PTXInstrInfo -} // namespace llvm - -#endif // PTX_INSTR_INFO_H diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td deleted file mode 100644 index bead428..0000000 --- a/lib/Target/PTX/PTXInstrInfo.td +++ /dev/null @@ -1,1031 +0,0 @@ -//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PTX instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// - -include "PTXInstrFormats.td" - -//===----------------------------------------------------------------------===// -// Code Generation Predicates -//===----------------------------------------------------------------------===// - -// Shader Model Support -def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; -def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; -def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; -def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; - -// PTX Version Support -def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; -def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; -def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; -def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; -def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">; -def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; - -// Fused-Multiply Add -def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; -def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; - - - -// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; -// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, -// [SDNPHasChain, SDNPOutGlue]>; -// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, -// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -def PTXcall : SDNode<"PTXISD::CALL", SDTNone, - [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; - - -// Branch & call targets have OtherVT type. -def brtarget : Operand; -def calltarget : Operand; - -//===----------------------------------------------------------------------===// -// PTX Specific Node Definitions -//===----------------------------------------------------------------------===// - -// PTX allow generic 3-reg shifts like shl r0, r1, r2 -def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>; -def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>; -def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; - -def PTXexit - : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; -def PTXret - : SDNode<"PTXISD::RET", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def PTXcopyaddress - : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; - - - -//===----------------------------------------------------------------------===// -// Instruction Class Templates -//===----------------------------------------------------------------------===// - -// For floating-point instructions, we cannot just embed the pattern into the -// instruction definition since we need to muck around with the rounding mode, -// and I do not know how to insert constants into instructions directly from -// pattern matches. - -//===- Floating-Point Instructions - 2 Operand Form -----------------------===// -multiclass PTX_FLOAT_2OP { - def rr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a), - !strconcat(opcstr, "$r.f32\t$d, $a"), []>; - def ri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, f32imm:$a), - !strconcat(opcstr, "$r.f32\t$d, $a"), []>; - def rr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a), - !strconcat(opcstr, "$r.f64\t$d, $a"), []>; - def ri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, f64imm:$a), - !strconcat(opcstr, "$r.f64\t$d, $a"), []>; -} - -//===- Floating-Point Instructions - 3 Operand Form -----------------------===// -multiclass PTX_FLOAT_3OP { - def rr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b), - !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; - def ri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, f32imm:$b), - !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; - def rr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b), - !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; - def ri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, f64imm:$b), - !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; -} - -//===- Floating-Point Instructions - 4 Operand Form -----------------------===// -multiclass PTX_FLOAT_4OP { - def rrr32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rri32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rii32 : InstPTX<(outs RegF32:$d), - (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), - !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; - def rrr64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; - def rri64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; - def rii64 : InstPTX<(outs RegF64:$d), - (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), - !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; -} - -//===- Integer Instructions - 3 Operand Form ------------------------------===// -multiclass PTX_INT3 { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// -multiclass PTX_INT3_SIGNED { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".s16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".s16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".s32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".s32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".s64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".s64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// -multiclass PTX_LOGIC { - def ripreds : InstPTX<(outs RegPred:$d), - (ins RegPred:$a, i1imm:$b), - !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; - def rrpreds : InstPTX<(outs RegPred:$d), - (ins RegPred:$a, RegPred:$b), - !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; -} - -//===- Integer Shift Instructions - 3 Operand Form ------------------------===// -multiclass PTX_INT3ntnc { - def rr16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, RegI16:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; - def rr32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, RegI32:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; - def rr64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, RegI64:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; - def ri16 : InstPTX<(outs RegI16:$d), - (ins RegI16:$a, i16imm:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; - def ri32 : InstPTX<(outs RegI32:$d), - (ins RegI32:$a, i32imm:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; - def ri64 : InstPTX<(outs RegI64:$d), - (ins RegI64:$a, i64imm:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; - def ir16 : InstPTX<(outs RegI16:$d), - (ins i16imm:$a, RegI16:$b), - !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; - def ir32 : InstPTX<(outs RegI32:$d), - (ins i32imm:$a, RegI32:$b), - !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; - def ir64 : InstPTX<(outs RegI64:$d), - (ins i64imm:$a, RegI64:$b), - !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; -} - -//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// -multiclass PTX_SETP_I { - // TODO support 5-operand format: p|q, a, b, c - - def rr - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; - def ri - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; - - def rr_and_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_and_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), - RegPred:$c))]>; - def rr_or_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_or_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; - def rr_xor_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; - def ri_xor_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), - RegPred:$c))]>; - - def rr_and_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_and_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; - def rr_or_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_or_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; - def rr_xor_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), - (not RegPred:$c)))]>; - def ri_xor_not_r - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), - (not RegPred:$c)))]>; -} - -//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// -multiclass PTX_SETP_FP { - // TODO support 5-operand format: p|q, a, b, c - - def rr_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; - def rr_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; - - def ri_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; - def ri_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), - !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; - - def rr_and_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), - RegPred:$c))]>; - def rr_and_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), - RegPred:$c))]>; - - def rr_or_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; - def rr_or_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; - - def rr_xor_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), - RegPred:$c))]>; - def rr_xor_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), - RegPred:$c))]>; - - def rr_and_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_and_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; - - def rr_or_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_or_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; - - def rr_xor_not_r_u - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), - (not RegPred:$c)))]>; - def rr_xor_not_r_o - : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, - "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), - (not RegPred:$c)))]>; -} - -//===- Select Predicate Instructions - 4 Operand Form ---------------------===// -multiclass PTX_SELP { - def rr - : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; - def ri - : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; - def ii - : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), - !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; -} - - - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -///===- Integer Arithmetic Instructions -----------------------------------===// - -defm ADD : PTX_INT3<"add", add>; -defm SUB : PTX_INT3<"sub", sub>; -defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies -defm DIV : PTX_INT3<"div", udiv>; -defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; -defm REM : PTX_INT3<"rem", urem>; - -///===- Floating-Point Arithmetic Instructions ----------------------------===// - -// FNEG -defm FNEG : PTX_FLOAT_2OP<"neg">; - -// Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add">; -defm FSUB : PTX_FLOAT_3OP<"sub">; -defm FMUL : PTX_FLOAT_3OP<"mul">; -defm FDIV : PTX_FLOAT_3OP<"div">; - -// Multi-operation hybrid instructions -defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; - - -///===- Floating-Point Intrinsic Instructions -----------------------------===// - -// SQRT -def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "sqrt$r.f32\t$d, $a", []>; -def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "sqrt$r.f32\t$d, $a", []>; -def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "sqrt$r.f64\t$d, $a", []>; -def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "sqrt$r.f64\t$d, $a", []>; - -// SIN -def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "sin$r.f32\t$d, $a", []>; -def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "sin$r.f32\t$d, $a", []>; -def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "sin$r.f64\t$d, $a", []>; -def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "sin$r.f64\t$d, $a", []>; - -// COS -def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), - "cos$r.f32\t$d, $a", []>; -def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), - "cos$r.f32\t$d, $a", []>; -def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), - "cos$r.f64\t$d, $a", []>; -def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), - "cos$r.f64\t$d, $a", []>; - - - - -///===- Comparison and Selection Instructions -----------------------------===// - -// .setp - -// Compare u16 - -defm SETPEQu16 : PTX_SETP_I; -defm SETPNEu16 : PTX_SETP_I; -defm SETPLTu16 : PTX_SETP_I; -defm SETPLEu16 : PTX_SETP_I; -defm SETPGTu16 : PTX_SETP_I; -defm SETPGEu16 : PTX_SETP_I; -defm SETPLTs16 : PTX_SETP_I; -defm SETPLEs16 : PTX_SETP_I; -defm SETPGTs16 : PTX_SETP_I; -defm SETPGEs16 : PTX_SETP_I; - -// Compare u32 - -defm SETPEQu32 : PTX_SETP_I; -defm SETPNEu32 : PTX_SETP_I; -defm SETPLTu32 : PTX_SETP_I; -defm SETPLEu32 : PTX_SETP_I; -defm SETPGTu32 : PTX_SETP_I; -defm SETPGEu32 : PTX_SETP_I; -defm SETPLTs32 : PTX_SETP_I; -defm SETPLEs32 : PTX_SETP_I; -defm SETPGTs32 : PTX_SETP_I; -defm SETPGEs32 : PTX_SETP_I; - -// Compare u64 - -defm SETPEQu64 : PTX_SETP_I; -defm SETPNEu64 : PTX_SETP_I; -defm SETPLTu64 : PTX_SETP_I; -defm SETPLEu64 : PTX_SETP_I; -defm SETPGTu64 : PTX_SETP_I; -defm SETPGEu64 : PTX_SETP_I; -defm SETPLTs64 : PTX_SETP_I; -defm SETPLEs64 : PTX_SETP_I; -defm SETPGTs64 : PTX_SETP_I; -defm SETPGEs64 : PTX_SETP_I; - -// Compare f32 - -defm SETPEQf32 : PTX_SETP_FP; -defm SETPNEf32 : PTX_SETP_FP; -defm SETPLTf32 : PTX_SETP_FP; -defm SETPLEf32 : PTX_SETP_FP; -defm SETPGTf32 : PTX_SETP_FP; -defm SETPGEf32 : PTX_SETP_FP; - -// Compare f64 - -defm SETPEQf64 : PTX_SETP_FP; -defm SETPNEf64 : PTX_SETP_FP; -defm SETPLTf64 : PTX_SETP_FP; -defm SETPLEf64 : PTX_SETP_FP; -defm SETPGTf64 : PTX_SETP_FP; -defm SETPGEf64 : PTX_SETP_FP; - -// .selp - -defm SELPi16 : PTX_SELP; -defm SELPi32 : PTX_SELP; -defm SELPi64 : PTX_SELP; -defm SELPf32 : PTX_SELP; -defm SELPf64 : PTX_SELP; - -///===- Logic and Shift Instructions --------------------------------------===// - -defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; -defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; -defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; - -defm AND : PTX_LOGIC<"and", and>; -defm OR : PTX_LOGIC<"or", or>; -defm XOR : PTX_LOGIC<"xor", xor>; - -///===- Data Movement and Conversion Instructions -------------------------===// - -// any_extend -// Implement the anyext instruction in terms of the PTX cvt instructions. -//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; -//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; -//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; - -// bitconvert -// These instructions implement the bit-wise conversion between integer and -// floating-point types. -def MOVi32f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; -def MOVf32i32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; -def MOVi64f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; -def MOVf64i64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; - -let neverHasSideEffects = 1 in { - def MOVPREDrr - : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; - def MOVU16rr - : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; - def MOVU32rr - : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; - def MOVU64rr - : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; - def MOVF32rr - : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; - def MOVF64rr - : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVPREDri - : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", - [(set RegPred:$d, imm:$a)]>; - def MOVU16ri - : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", - [(set RegI16:$d, imm:$a)]>; - def MOVU32ri - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RegI32:$d, imm:$a)]>; - def MOVU64ri - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RegI64:$d, imm:$a)]>; - def MOVF32ri - : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", - [(set RegF32:$d, fpimm:$a)]>; - def MOVF64ri - : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", - [(set RegF64:$d, fpimm:$a)]>; -} - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVaddr32 - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; - def MOVaddr64 - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; - def MOVframe32 - : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a", - [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>; - def MOVframe64 - : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a", - [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>; -} - -// PTX cvt instructions -// Note all of these may actually be used, we just define all possible patterns -// here (that make sense). -// FIXME: Can we collapse this somehow into a multiclass def? - -// To i16 -def CVTu16u32 - : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; -def CVTu16u64 - : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; -def CVTu16f32 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u16.f32\t$d, $a", []>; -def CVTs16f32 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s16.f32\t$d, $a", []>; -def CVTu16f64 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u16.f64\t$d, $a", []>; -def CVTs16f64 - : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s16.f64\t$d, $a", []>; - -// To i32 -def CVTu32u16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; -def CVTs32s16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; -def CVTu32u64 - : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; -def CVTu32f32 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u32.f32\t$d, $a", []>; -def CVTs32f32 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s32.f32\t$d, $a", []>; -def CVTu32f64 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u32.f64\t$d, $a", []>; -def CVTs32f64 - : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s32.f64\t$d, $a", []>; - -// To i64 -def CVTu64u16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; -def CVTs64s16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; -def CVTu64u32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; -def CVTs64s32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; -def CVTu64f32 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.u64.f32\t$d, $a", []>; -def CVTs64f32 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), - "cvt$r.s64.f32\t$d, $a", []>; -def CVTu64f64 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.u64.f64\t$d, $a", []>; -def CVTs64f64 - : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.s64.f64\t$d, $a", []>; - -// To f32 -def CVTf32u16 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f32.u16\t$d, $a", []>; -def CVTf32s16 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f32.s16\t$d, $a", []>; -def CVTf32u32 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f32.u32\t$d, $a", []>; -def CVTf32s32 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f32.s32\t$d, $a", []>; -def CVTf32u64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f32.u64\t$d, $a", []>; -def CVTf32s64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f32.s64\t$d, $a", []>; -def CVTf32f64 - : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), - "cvt$r.f32.f64\t$d, $a", []>; - -// To f64 -def CVTf64u16 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f64.u16\t$d, $a", []>; -def CVTf64s16 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), - "cvt$r.f64.s16\t$d, $a", []>; -def CVTf64u32 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f64.u32\t$d, $a", []>; -def CVTf64s32 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), - "cvt$r.f64.s32\t$d, $a", []>; -def CVTf64u64 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f64.u64\t$d, $a", []>; -def CVTf64s64 - : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), - "cvt$r.f64.s64\t$d, $a", []>; -def CVTf64f32 - : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; - - ///===- Control Flow Instructions -----------------------------------------===// - -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { - def BRAd - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a RegPred register) here - // When this is revisited, make sure to also look at LowerSETCC and try to - // fold it into negated predicates, if possible. - def BRAdp - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", - [/*(brcond pred:$_p, bb:$d)*/]>; -} - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; - def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; -} - -let hasSideEffects = 1 in { - def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; -} - -///===- Parameter Passing Pseudo-Instructions -----------------------------===// - -def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), - "mov.pred\t$a, %arg$b", []>; -def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), - "mov.b16\t$a, %arg$b", []>; -def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), - "mov.b32\t$a, %arg$b", []>; -def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), - "mov.b64\t$a, %arg$b", []>; -def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), - "mov.f32\t$a, %arg$b", []>; -def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), - "mov.f64\t$a, %arg$b", []>; - -def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; -def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; -def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; -def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; -def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; -def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; - - -//===----------------------------------------------------------------------===// -// Instruction Selection Patterns -//===----------------------------------------------------------------------===// - -// FADD -def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), - (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), - (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), - (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), - (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FSUB -def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), - (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), - (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), - (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), - (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FMUL -def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), - (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), - (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), - (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), - (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FDIV -def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), - (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; -def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), - (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; -def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), - (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; -def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), - (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; - -// FMUL+FADD -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), - (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), - (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), - (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), - (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), - (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), - (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; -def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), - (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>, - Requires<[SupportsFMA]>; - -// FNEG -def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; - -// FSQRT -def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; - -// FSIN -def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; - -// FCOS -def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; -def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; -def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; -def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; - -// Type conversion notes: -// - PTX does not directly support converting a predicate to a value, so we -// use a select instruction to select either 0 or 1 (integer or fp) based -// on the truth value of the predicate. -// - PTX does not directly support converting to a predicate type, so we fake it -// by performing a greater-than test between the value and zero. This follows -// the C convention that any non-zero value is equivalent to 'true'. - -// Conversion to pred -def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; -def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; -def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; -def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; -def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; - -// Conversion to u16 -def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; -def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; -def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; -def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; -def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; -def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; -def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; -def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; -def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; - -// Conversion to u32 -def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; -def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; -def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; -def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; -def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; -def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; -def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; -def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; -def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; -def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; -def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; -def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; - -// Conversion to u64 -def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; -def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, - 0xFFFFFFFFFFFFFFFF, 0)>; -def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; -def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; -def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; -def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; -def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; -def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; -def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; -def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; -def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; -def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; -def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; -def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; - -// Conversion to f32 -def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, - (MOVf32i32 0x3F800000), (MOVf32i32 0))>; -def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; -def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; -def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; -def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; -def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; -def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; -def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; -def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; - -// Conversion to f64 -def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, - (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; -def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; -def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; -def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; -def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; -def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; -def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; -def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; -def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; - -// setcc - predicate inversion for branch conditions -def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), - (XORripreds RegPred:$a, imm:$b)>; - -///===- Intrinsic Instructions --------------------------------------------===// -include "PTXIntrinsicInstrInfo.td" - -///===- Load/Store Instructions -------------------------------------------===// -include "PTXInstrLoadStore.td" - diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td deleted file mode 100644 index 7a62684..0000000 --- a/lib/Target/PTX/PTXInstrLoadStore.td +++ /dev/null @@ -1,278 +0,0 @@ -//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PTX load/store instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - - -// Addressing Predicates -// We have to differentiate between 32- and 64-bit pointer types -def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; -def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; - -//===----------------------------------------------------------------------===// -// Pattern Fragments for Loads/Stores -//===----------------------------------------------------------------------===// - -def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast(N)->getSrcValue()) && - (PT = dyn_cast(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Global; - return false; -}]>; - -def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast(N)->getSrcValue()) && - (PT = dyn_cast(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Constant; - return false; -}]>; - -def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast(N)->getSrcValue()) && - (PT = dyn_cast(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Shared; - return false; -}]>; - -def store_global - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast(N)->getSrcValue()) && - (PT = dyn_cast(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Global; - return false; -}]>; - -def store_shared - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast(N)->getSrcValue()) && - (PT = dyn_cast(Src->getType()))) - return PT->getAddressSpace() == PTXStateSpace::Shared; - return false; -}]>; - -// Addressing modes. -def ADDRrr32 : ComplexPattern; -def ADDRrr64 : ComplexPattern; -def ADDRri32 : ComplexPattern; -def ADDRri64 : ComplexPattern; -def ADDRii32 : ComplexPattern; -def ADDRii64 : ComplexPattern; -def ADDRlocal32 : ComplexPattern; -def ADDRlocal64 : ComplexPattern; - -// Address operands -def MEMri32 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI32, i32imm); -} -def MEMri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI64, i64imm); -} -def LOCALri32 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def LOCALri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -def MEMii32 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def MEMii64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -// The operand here does not correspond to an actual address, so we -// can use i32 in 64-bit address modes. -def MEMpi : Operand { - let PrintMethod = "printParamOperand"; - let MIOperandInfo = (ops i32imm); -} -def MEMret : Operand { - let PrintMethod = "printReturnOperand"; - let MIOperandInfo = (ops i32imm); -} - - -// Load/store .param space -def PTXloadparam - : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXstoreparam - : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; - -def PTXreadparam - : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXwriteparam - : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; - - - -//===----------------------------------------------------------------------===// -// Classes for loads/stores -//===----------------------------------------------------------------------===// -multiclass PTX_LD { - def rr32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr32:$a))]>, - Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr64:$a))]>, - Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri32:$a))]>, - Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri64:$a))]>, - Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs RC:$d), - (ins MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii32:$a))]>, - Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs RC:$d), - (ins MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii64:$a))]>, - Requires<[Use64BitAddresses]>; -} - -multiclass PTX_ST { - def rr32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr32:$a)]>, - Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr64:$a)]>, - Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri32:$a)]>, - Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri64:$a)]>, - Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs), - (ins RC:$d, MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii32:$a)]>, - Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs), - (ins RC:$d, MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii64:$a)]>, - Requires<[Use64BitAddresses]>; -} - -multiclass PTX_LOCAL_LD_ST { - def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), - !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_global ADDRlocal32:$a))]>; - def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), - !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (load_global ADDRlocal64:$a))]>; - def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), - !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_global RC:$d, ADDRlocal32:$a)]>; - def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), - !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), - [(store_global RC:$d, ADDRlocal64:$a)]>; -} - -multiclass PTX_PARAM_LD_ST { - let hasSideEffects = 1 in { - def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), - !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (PTXloadparam texternalsym:$a))]>; - def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), - !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), - [(PTXstoreparam texternalsym:$d, RC:$a)]>; - } -} - -multiclass PTX_LD_ALL { - defm u16 : PTX_LD; - defm u32 : PTX_LD; - defm u64 : PTX_LD; - defm f32 : PTX_LD; - defm f64 : PTX_LD; -} - -multiclass PTX_ST_ALL { - defm u16 : PTX_ST; - defm u32 : PTX_ST; - defm u64 : PTX_ST; - defm f32 : PTX_ST; - defm f64 : PTX_ST; -} - - - -//===----------------------------------------------------------------------===// -// Instruction definitions for loads/stores -//===----------------------------------------------------------------------===// - -// Global/shared stores -defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STs : PTX_ST_ALL<"st.shared", store_shared>; - -// Global/shared/constant loads -defm LDg : PTX_LD_ALL<"ld.global", load_global>; -defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; - -// Param loads/stores -defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; -defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; -defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; -defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; -defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; -defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; - -// Local loads/stores -defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; -defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; -defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; -defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; -defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; -defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; - diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td deleted file mode 100644 index 3416f1c..0000000 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ /dev/null @@ -1,110 +0,0 @@ -//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the PTX-specific intrinsic instructions. -// -//===----------------------------------------------------------------------===// - -// PTX Special Purpose Register Accessor Intrinsics - -class PTX_READ_SPECIAL_REGISTER_R64 - : InstPTX<(outs RegI64:$d), (ins), - !strconcat("mov.u64\t$d, %", regname), - [(set RegI64:$d, (intop))]>; - -class PTX_READ_SPECIAL_REGISTER_R32 - : InstPTX<(outs RegI32:$d), (ins), - !strconcat("mov.u32\t$d, %", regname), - [(set RegI32:$d, (intop))]>; - -// TODO Add read vector-version of special registers - -//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", -// int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", - int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", - int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", - int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", - int_ptx_read_tid_w>; - -//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", -// int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", - int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", - int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", - int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", - int_ptx_read_ntid_w>; - -def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", - int_ptx_read_laneid>; -def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", - int_ptx_read_warpid>; -def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", - int_ptx_read_nwarpid>; - -//def PTX_READ_CTAID_R64 : -//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", - int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", - int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", - int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", - int_ptx_read_ctaid_w>; - -//def PTX_READ_NCTAID_R64 : -//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", - int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", - int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", - int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", - int_ptx_read_nctaid_w>; - -def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", - int_ptx_read_smid>; -def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", - int_ptx_read_nsmid>; -def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", - int_ptx_read_gridid>; - -def PTX_READ_LANEMASK_EQ - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; -def PTX_READ_LANEMASK_LE - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; -def PTX_READ_LANEMASK_LT - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; -def PTX_READ_LANEMASK_GE - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; -def PTX_READ_LANEMASK_GT - : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; - -def PTX_READ_CLOCK - : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; -def PTX_READ_CLOCK64 - : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; - -def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; -def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; -def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; -def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; - -// PTX Parallel Synchronization and Communication Intrinsics - -def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", - [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp deleted file mode 100644 index 3ed67a6..0000000 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ /dev/null @@ -1,556 +0,0 @@ -//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/PathV2.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -class PTXMCAsmStreamer : public MCStreamer { - formatted_raw_ostream &OS; - const MCAsmInfo &MAI; - OwningPtr InstPrinter; - OwningPtr Emitter; - - SmallString<128> CommentToEmit; - raw_svector_ostream CommentStream; - - unsigned IsVerboseAsm : 1; - unsigned ShowInst : 1; - -public: - PTXMCAsmStreamer(MCContext &Context, - formatted_raw_ostream &os, - bool isVerboseAsm, bool useLoc, - MCInstPrinter *printer, - MCCodeEmitter *emitter, - bool showInst) - : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), - InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), - IsVerboseAsm(isVerboseAsm), - ShowInst(showInst) { - if (InstPrinter && IsVerboseAsm) - InstPrinter->setCommentStream(CommentStream); - } - - ~PTXMCAsmStreamer() {} - - inline void EmitEOL() { - // If we don't have any comments, just emit a \n. - if (!IsVerboseAsm) { - OS << '\n'; - return; - } - EmitCommentsAndEOL(); - } - void EmitCommentsAndEOL(); - - /// isVerboseAsm - Return true if this streamer supports verbose assembly at - /// all. - virtual bool isVerboseAsm() const { return IsVerboseAsm; } - - /// hasRawTextSupport - We support EmitRawText. - virtual bool hasRawTextSupport() const { return true; } - - /// AddComment - Add a comment that can be emitted to the generated .s - /// file if applicable as a QoI issue to make the output of the compiler - /// more readable. This only affects the MCAsmStreamer, and only when - /// verbose assembly output is enabled. - virtual void AddComment(const Twine &T); - - /// AddEncodingComment - Add a comment showing the encoding of an instruction. - virtual void AddEncodingComment(const MCInst &Inst); - - /// GetCommentOS - Return a raw_ostream that comments can be written to. - /// Unlike AddComment, you are required to terminate comments with \n if you - /// use this method. - virtual raw_ostream &GetCommentOS() { - if (!IsVerboseAsm) - return nulls(); // Discard comments unless in verbose asm mode. - return CommentStream; - } - - /// AddBlankLine - Emit a blank line to a .s file to pretty it up. - virtual void AddBlankLine() { - EmitEOL(); - } - - /// @name MCStreamer Interface - /// @{ - - virtual void ChangeSection(const MCSection *Section); - virtual void InitSections() { /* PTX does not use sections */ } - - virtual void EmitLabel(MCSymbol *Symbol); - - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); - - virtual void EmitThumbFunc(MCSymbol *Func); - - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); - - virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize); - - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); - - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); - virtual void EmitCOFFSymbolStorageClass(int StorageClass); - virtual void EmitCOFFSymbolType(int Type); - virtual void EndCOFFSymbolDef(); - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - - /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. - /// - /// @param Symbol - The common symbol to emit. - /// @param Size - The size of the common symbol. - /// @param ByteAlignment - The alignment of the common symbol in bytes. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0); - - virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0); - - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace); - virtual void EmitULEB128Value(const MCExpr *Value); - virtual void EmitSLEB128Value(const MCExpr *Value); - virtual void EmitGPRel32Value(const MCExpr *Value); - - - virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace); - - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); - - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0); - - virtual bool EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0); - - virtual void EmitFileDirective(StringRef Filename); - virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename); - - virtual void EmitInstruction(const MCInst &Inst); - - /// EmitRawText - If this file is backed by an assembly streamer, this dumps - /// the specified string in the output .s file. This capability is - /// indicated by the hasRawTextSupport() predicate. - virtual void EmitRawText(StringRef String); - - virtual void FinishImpl(); - - /// @} - -}; // class PTXMCAsmStreamer - -} - -/// TODO: Add appropriate implementation of Emit*() methods when needed - -void PTXMCAsmStreamer::AddComment(const Twine &T) { - if (!IsVerboseAsm) return; - - // Make sure that CommentStream is flushed. - CommentStream.flush(); - - T.toVector(CommentToEmit); - // Each comment goes on its own line. - CommentToEmit.push_back('\n'); - - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); -} - -void PTXMCAsmStreamer::EmitCommentsAndEOL() { - if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { - OS << '\n'; - return; - } - - CommentStream.flush(); - StringRef Comments = CommentToEmit.str(); - - assert(Comments.back() == '\n' && - "Comment array not newline terminated"); - do { - // Emit a line of comments. - OS.PadToColumn(MAI.getCommentColumn()); - size_t Position = Comments.find('\n'); - OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; - - Comments = Comments.substr(Position+1); - } while (!Comments.empty()); - - CommentToEmit.clear(); - // Tell the comment stream that the vector changed underneath it. - CommentStream.resync(); -} - -static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { - assert(Bytes && "Invalid size!"); - return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); -} - -void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { - assert(Section && "Cannot switch to a null section!"); -} - -void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - - OS << *Symbol << MAI.getLabelSuffix(); - EmitEOL(); - Symbol->setSection(*getCurrentSection()); -} - -void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} - -void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {} - -void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { - OS << *Symbol << " = " << *Value; - EmitEOL(); - - // FIXME: Lift context changes into super class. - Symbol->setVariableValue(Value); -} - -void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, - const MCSymbol *Symbol) { - OS << ".weakref " << *Alias << ", " << *Symbol; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) { - report_fatal_error("Unimplemented."); -} - -void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) {} - -void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} - -void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} - -void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {} - -void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {} - -void PTXMCAsmStreamer::EndCOFFSymbolDef() {} - -void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} - -void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - unsigned Size, unsigned ByteAlignment) {} - -void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, - MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) {} - -static inline char toOctal(int X) { return (X&7)+'0'; } - -static void PrintQuotedString(StringRef Data, raw_ostream &OS) { - OS << '"'; - - for (unsigned i = 0, e = Data.size(); i != e; ++i) { - unsigned char C = Data[i]; - if (C == '"' || C == '\\') { - OS << '\\' << (char)C; - continue; - } - - if (isprint((unsigned char)C)) { - OS << (char)C; - continue; - } - - switch (C) { - case '\b': OS << "\\b"; break; - case '\f': OS << "\\f"; break; - case '\n': OS << "\\n"; break; - case '\r': OS << "\\r"; break; - case '\t': OS << "\\t"; break; - default: - OS << '\\'; - OS << toOctal(C >> 6); - OS << toOctal(C >> 3); - OS << toOctal(C >> 0); - break; - } - } - - OS << '"'; -} - -void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - if (Data.empty()) return; - - if (Data.size() == 1) { - OS << MAI.getData8bitsDirective(AddrSpace); - OS << (unsigned)(unsigned char)Data[0]; - EmitEOL(); - return; - } - - // If the data ends with 0 and the target supports .asciz, use it, otherwise - // use .ascii - if (MAI.getAscizDirective() && Data.back() == 0) { - OS << MAI.getAscizDirective(); - Data = Data.substr(0, Data.size()-1); - } else { - OS << MAI.getAsciiDirective(); - } - - OS << ' '; - PrintQuotedString(Data, OS); - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - const char *Directive = 0; - switch (Size) { - default: break; - case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; - case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; - case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; - case 8: - Directive = MAI.getData64bitsDirective(AddrSpace); - // If the target doesn't support 64-bit data, emit as two 32-bit halves. - if (Directive) break; - int64_t IntValue; - if (!Value->EvaluateAsAbsolute(IntValue)) - report_fatal_error("Don't know how to emit this value."); - if (getContext().getAsmInfo().isLittleEndian()) { - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); - } else { - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); - } - return; - } - - assert(Directive && "Invalid size for machine code value!"); - OS << Directive << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { - assert(MAI.hasLEB128() && "Cannot print a .uleb"); - OS << ".uleb128 " << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { - assert(MAI.hasLEB128() && "Cannot print a .sleb"); - OS << ".sleb128 " << *Value; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { - assert(MAI.getGPRel32Directive() != 0); - OS << MAI.getGPRel32Directive() << *Value; - EmitEOL(); -} - - -/// EmitFill - Emit NumBytes bytes worth of the value specified by -/// FillValue. This implements directives such as '.space'. -void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { - if (NumBytes == 0) return; - - if (AddrSpace == 0) - if (const char *ZeroDirective = MAI.getZeroDirective()) { - OS << ZeroDirective << NumBytes; - if (FillValue != 0) - OS << ',' << (int)FillValue; - EmitEOL(); - return; - } - - // Emit a byte at a time. - MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); -} - -void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, - int64_t Value, - unsigned ValueSize, - unsigned MaxBytesToEmit) { - // Some assemblers don't support non-power of two alignments, so we always - // emit alignments as a power of two if possible. - if (isPowerOf2_32(ByteAlignment)) { - switch (ValueSize) { - default: llvm_unreachable("Invalid size for machine code value!"); - case 1: OS << MAI.getAlignDirective(); break; - // FIXME: use MAI for this! - case 2: OS << ".p2alignw "; break; - case 4: OS << ".p2alignl "; break; - case 8: llvm_unreachable("Unsupported alignment size!"); - } - - if (MAI.getAlignmentIsInBytes()) - OS << ByteAlignment; - else - OS << Log2_32(ByteAlignment); - - if (Value || MaxBytesToEmit) { - OS << ", 0x"; - OS.write_hex(truncateToSize(Value, ValueSize)); - - if (MaxBytesToEmit) - OS << ", " << MaxBytesToEmit; - } - EmitEOL(); - return; - } - - // Non-power of two alignment. This is not widely supported by assemblers. - // FIXME: Parameterize this based on MAI. - switch (ValueSize) { - default: llvm_unreachable("Invalid size for machine code value!"); - case 1: OS << ".balign"; break; - case 2: OS << ".balignw"; break; - case 4: OS << ".balignl"; break; - case 8: llvm_unreachable("Unsupported alignment size!"); - } - - OS << ' ' << ByteAlignment; - OS << ", " << truncateToSize(Value, ValueSize); - if (MaxBytesToEmit) - OS << ", " << MaxBytesToEmit; - EmitEOL(); -} - -void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit) {} - -bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, - unsigned char Value) {return false;} - - -void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { - assert(MAI.hasSingleParameterDotFile()); - OS << "\t.file\t"; - PrintQuotedString(Filename, OS); - EmitEOL(); -} - -// FIXME: should we inherit from MCAsmStreamer? -bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, - StringRef Directory, - StringRef Filename) { - if (!Directory.empty()) { - if (sys::path::is_absolute(Filename)) - return EmitDwarfFileDirective(FileNo, "", Filename); - SmallString<128> FullPathName = Directory; - sys::path::append(FullPathName, Filename); - return EmitDwarfFileDirective(FileNo, "", FullPathName); - } - - OS << "\t.file\t" << FileNo << ' '; - PrintQuotedString(Filename, OS); - EmitEOL(); - return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename); -} - -void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {} - -void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { - assert(getCurrentSection() && "Cannot emit contents before setting section!"); - - // Show the encoding in a comment if we have a code emitter. - if (Emitter) - AddEncodingComment(Inst); - - // Show the MCInst if enabled. - if (ShowInst) { - Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); - GetCommentOS() << "\n"; - } - - // If we have an AsmPrinter, use that to print, otherwise print the MCInst. - if (InstPrinter) - InstPrinter->printInst(&Inst, OS, ""); - else - Inst.print(OS, &MAI); - EmitEOL(); -} - -/// EmitRawText - If this file is backed by an assembly streamer, this dumps -/// the specified string in the output .s file. This capability is -/// indicated by the hasRawTextSupport() predicate. -void PTXMCAsmStreamer::EmitRawText(StringRef String) { - if (!String.empty() && String.back() == '\n') - String = String.substr(0, String.size()-1); - OS << String; - EmitEOL(); -} - -void PTXMCAsmStreamer::FinishImpl() {} - -namespace llvm { - MCStreamer *createPTXAsmStreamer(MCContext &Context, - formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, - MCInstPrinter *IP, - MCCodeEmitter *CE, MCAsmBackend *MAB, - bool ShowInst) { - return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, - IP, CE, ShowInst); - } -} diff --git a/lib/Target/PTX/PTXMCInstLower.cpp b/lib/Target/PTX/PTXMCInstLower.cpp deleted file mode 100644 index 142e639..0000000 --- a/lib/Target/PTX/PTXMCInstLower.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower PTX MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "PTXAsmPrinter.h" -#include "llvm/Constants.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Target/Mangler.h" - -void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - PTXAsmPrinter &AP) { - OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - OutMI.addOperand(AP.lowerOperand(MO)); - } -} - diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp deleted file mode 100644 index 172a0e0..0000000 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ /dev/null @@ -1,85 +0,0 @@ -//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an information extractor for PTX machine functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-mf-info-extract" - -#include "PTX.h" -#include "PTXTargetMachine.h" -#include "PTXMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -// NOTE: PTXMFInfoExtract must after register allocation! - -namespace { - /// PTXMFInfoExtract - PTX specific code to extract of PTX machine - /// function information for PTXAsmPrinter - /// - class PTXMFInfoExtract : public MachineFunctionPass { - private: - static char ID; - - public: - PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) - : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "PTX Machine Function Info Extractor"; - } - }; // class PTXMFInfoExtract -} // end anonymous namespace - -using namespace llvm; - -char PTXMFInfoExtract::ID = 0; - -bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { - PTXMachineFunctionInfo *MFI = MF.getInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Generate list of all virtual registers used in this function - for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - const TargetRegisterClass *TRC = MRI.getRegClass(Reg); - unsigned RegType; - if (TRC == PTX::RegPredRegisterClass) - RegType = PTXRegisterType::Pred; - else if (TRC == PTX::RegI16RegisterClass) - RegType = PTXRegisterType::B16; - else if (TRC == PTX::RegI32RegisterClass) - RegType = PTXRegisterType::B32; - else if (TRC == PTX::RegI64RegisterClass) - RegType = PTXRegisterType::B64; - else if (TRC == PTX::RegF32RegisterClass) - RegType = PTXRegisterType::F32; - else if (TRC == PTX::RegF64RegisterClass) - RegType = PTXRegisterType::F64; - else - llvm_unreachable("Unkown register class."); - MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); - } - - return false; -} - -FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new PTXMFInfoExtract(TM, OptLevel); -} diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/PTX/PTXMachineFunctionInfo.cpp deleted file mode 100644 index 60acfc7..0000000 --- a/lib/Target/PTX/PTXMachineFunctionInfo.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PTXMachineFunctionInfo.h" - -using namespace llvm; - -void PTXMachineFunctionInfo::anchor() { } diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h deleted file mode 100644 index bb7574c..0000000 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ /dev/null @@ -1,202 +0,0 @@ -//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares PTX-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_MACHINE_FUNCTION_INFO_H -#define PTX_MACHINE_FUNCTION_INFO_H - -#include "PTX.h" -#include "PTXParamManager.h" -#include "PTXRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -namespace llvm { - -/// PTXMachineFunctionInfo - This class is derived from MachineFunction and -/// contains private PTX target-specific information for each MachineFunction. -/// -class PTXMachineFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); - bool IsKernel; - DenseSet RegArgs; - DenseSet RegRets; - - typedef DenseMap FrameMap; - - FrameMap FrameSymbols; - - struct RegisterInfo { - unsigned Reg; - unsigned Type; - unsigned Space; - unsigned Offset; - unsigned Encoded; - }; - - typedef DenseMap RegisterInfoMap; - - RegisterInfoMap RegInfo; - - PTXParamManager ParamManager; - -public: - typedef DenseSet::const_iterator reg_iterator; - - PTXMachineFunctionInfo(MachineFunction &MF) - : IsKernel(false) { - } - - /// getParamManager - Returns the PTXParamManager instance for this function. - PTXParamManager& getParamManager() { return ParamManager; } - const PTXParamManager& getParamManager() const { return ParamManager; } - - /// setKernel/isKernel - Gets/sets a flag that indicates if this function is - /// a PTX kernel function. - void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } - bool isKernel() const { return IsKernel; } - - /// argreg_begin/argreg_end - Returns iterators to the set of registers - /// containing function arguments. - reg_iterator argreg_begin() const { return RegArgs.begin(); } - reg_iterator argreg_end() const { return RegArgs.end(); } - - /// retreg_begin/retreg_end - Returns iterators to the set of registers - /// containing the function return values. - reg_iterator retreg_begin() const { return RegRets.begin(); } - reg_iterator retreg_end() const { return RegRets.end(); } - - /// addRegister - Adds a virtual register to the set of all used registers - void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) { - if (!RegInfo.count(Reg)) { - RegisterInfo Info; - Info.Reg = Reg; - Info.Type = RegType; - Info.Space = RegSpace; - - // Determine register offset - Info.Offset = 0; - for(RegisterInfoMap::const_iterator i = RegInfo.begin(), - e = RegInfo.end(); i != e; ++i) { - const RegisterInfo& RI = i->second; - if (RI.Space == RegSpace) - if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type) - Info.Offset++; - } - - // Encode the register data into a single register number - Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space; - - RegInfo[Reg] = Info; - - if (RegSpace == PTXRegisterSpace::Argument) - RegArgs.insert(Reg); - else if (RegSpace == PTXRegisterSpace::Return) - RegRets.insert(Reg); - } - } - - /// countRegisters - Returns the number of registers of the given type and - /// space. - unsigned countRegisters(unsigned RegType, unsigned RegSpace) const { - unsigned Count = 0; - for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end(); - i != e; ++i) { - const RegisterInfo& RI = i->second; - if (RI.Type == RegType && RI.Space == RegSpace) - Count++; - } - return Count; - } - - /// getEncodedRegister - Returns the encoded value of the register. - unsigned getEncodedRegister(unsigned Reg) const { - return RegInfo.lookup(Reg).Encoded; - } - - /// addRetReg - Adds a register to the set of return-value registers. - void addRetReg(unsigned Reg) { - if (!RegRets.count(Reg)) { - RegRets.insert(Reg); - } - } - - /// addArgReg - Adds a register to the set of function argument registers. - void addArgReg(unsigned Reg) { - RegArgs.insert(Reg); - } - - /// getRegisterName - Returns the name of the specified virtual register. This - /// name is used during PTX emission. - std::string getRegisterName(unsigned Reg) const { - if (RegInfo.count(Reg)) { - const RegisterInfo& RI = RegInfo.lookup(Reg); - std::string Name; - raw_string_ostream NameStr(Name); - decodeRegisterName(NameStr, RI.Encoded); - NameStr.flush(); - return Name; - } - else if (Reg == PTX::NoRegister) - return "%noreg"; - else - llvm_unreachable("Register not in register name map"); - } - - /// getEncodedRegisterName - Returns the name of the encoded register. - std::string getEncodedRegisterName(unsigned EncodedReg) const { - std::string Name; - raw_string_ostream NameStr(Name); - decodeRegisterName(NameStr, EncodedReg); - NameStr.flush(); - return Name; - } - - /// getRegisterType - Returns the type of the specified virtual register. - unsigned getRegisterType(unsigned Reg) const { - if (RegInfo.count(Reg)) - return RegInfo.lookup(Reg).Type; - else - llvm_unreachable("Unknown register"); - } - - /// getOffsetForRegister - Returns the offset of the virtual register - unsigned getOffsetForRegister(unsigned Reg) const { - if (RegInfo.count(Reg)) - return RegInfo.lookup(Reg).Offset; - else - return 0; - } - - /// getFrameSymbol - Returns the symbol name for the given FrameIndex. - const char* getFrameSymbol(int FrameIndex) { - if (FrameSymbols.count(FrameIndex)) { - return FrameSymbols.lookup(FrameIndex).c_str(); - } else { - std::string Name = "__local"; - Name += utostr(FrameIndex); - // The whole point of caching this name is to ensure the pointer we pass - // to any getExternalSymbol() calls will remain valid for the lifetime of - // the back-end instance. This is to work around an issue in SelectionDAG - // where symbol names are expected to be life-long strings. - FrameSymbols[FrameIndex] = Name; - return FrameSymbols[FrameIndex].c_str(); - } - } -}; // class PTXMachineFunctionInfo -} // namespace llvm - -#endif // PTX_MACHINE_FUNCTION_INFO_H diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp deleted file mode 100644 index cc1cc71..0000000 --- a/lib/Target/PTX/PTXParamManager.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===-- PTXParamManager.cpp - Manager for .param variables ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXParamManager class. -// -//===----------------------------------------------------------------------===// - -#include "PTXParamManager.h" -#include "PTX.h" -#include "llvm/ADT/StringExtras.h" - -using namespace llvm; - -PTXParamManager::PTXParamManager() { -} - -unsigned PTXParamManager::addArgumentParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_ARGUMENT; - Param.Size = Size; - - std::string Name; - Name = "__param_"; - Name += utostr(ArgumentParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - ArgumentParams.push_back(Index); - - return Index; -} - -unsigned PTXParamManager::addReturnParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_RETURN; - Param.Size = Size; - - std::string Name; - Name = "__ret_"; - Name += utostr(ReturnParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - ReturnParams.push_back(Index); - - return Index; -} - -unsigned PTXParamManager::addLocalParam(unsigned Size) { - PTXParam Param; - Param.Type = PTX_PARAM_TYPE_LOCAL; - Param.Size = Size; - - std::string Name; - Name = "__localparam_"; - Name += utostr(LocalParams.size()+1); - Param.Name = Name; - - unsigned Index = AllParams.size(); - AllParams[Index] = Param; - LocalParams.push_back(Index); - - return Index; -} - diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h deleted file mode 100644 index 92e7728..0000000 --- a/lib/Target/PTX/PTXParamManager.h +++ /dev/null @@ -1,87 +0,0 @@ -//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PTXParamManager class, which manages all defined .param -// variables for a particular function. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_PARAM_MANAGER_H -#define PTX_PARAM_MANAGER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include - -namespace llvm { - -/// PTXParamManager - This class manages all .param variables defined for a -/// particular function. -class PTXParamManager { -private: - - /// PTXParamType - Type of a .param variable - enum PTXParamType { - PTX_PARAM_TYPE_ARGUMENT, - PTX_PARAM_TYPE_RETURN, - PTX_PARAM_TYPE_LOCAL - }; - - /// PTXParam - Definition of a PTX .param variable - struct PTXParam { - PTXParamType Type; - unsigned Size; - std::string Name; - }; - - DenseMap AllParams; - SmallVector ArgumentParams; - SmallVector ReturnParams; - SmallVector LocalParams; - -public: - - typedef SmallVector::const_iterator param_iterator; - - PTXParamManager(); - - param_iterator arg_begin() const { return ArgumentParams.begin(); } - param_iterator arg_end() const { return ArgumentParams.end(); } - param_iterator ret_begin() const { return ReturnParams.begin(); } - param_iterator ret_end() const { return ReturnParams.end(); } - param_iterator local_begin() const { return LocalParams.begin(); } - param_iterator local_end() const { return LocalParams.end(); } - - /// addArgumentParam - Returns a new .param used as an argument. - unsigned addArgumentParam(unsigned Size); - - /// addReturnParam - Returns a new .param used as a return argument. - unsigned addReturnParam(unsigned Size); - - /// addLocalParam - Returns a new .param used as a local .param variable. - unsigned addLocalParam(unsigned Size); - - /// getParamName - Returns the name of the parameter as a string. - const std::string &getParamName(unsigned Param) const { - assert(AllParams.count(Param) == 1 && "Param has not been defined!"); - return AllParams.find(Param)->second.Name; - } - - /// getParamSize - Returns the size of the parameter in bits. - unsigned getParamSize(unsigned Param) const { - assert(AllParams.count(Param) == 1 && "Param has not been defined!"); - return AllParams.find(Param)->second.Size; - } - -}; - -} - -#endif - diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp deleted file mode 100644 index 7fd5375..0000000 --- a/lib/Target/PTX/PTXRegAlloc.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a register allocator for PTX code. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-reg-alloc" - -#include "PTX.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/RegAllocRegistry.h" - -using namespace llvm; - -namespace { - // Special register allocator for PTX. - class PTXRegAlloc : public MachineFunctionPass { - public: - static char ID; - PTXRegAlloc() : MachineFunctionPass(ID) {} - - virtual const char* getPassName() const { - return "PTX Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - // We do not actually do anything (at least not yet). - return false; - } - }; - - char PTXRegAlloc::ID = 0; - - static RegisterRegAlloc - ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); -} - -FunctionPass *llvm::createPTXRegisterAllocator() { - return new PTXRegAlloc(); -} - diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp deleted file mode 100644 index b6ffd38..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "PTXRegisterInfo.h" -#include "PTX.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#define GET_REGINFO_TARGET_DESC -#include "PTXGenRegisterInfo.inc" - -using namespace llvm; - -PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &tii) - // PTX does not have a return address register. - : PTXGenRegisterInfo(0), TII(tii) { -} - -void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/, - int /*SPAdj*/, - RegScavenger * /*RS*/) const { - llvm_unreachable("FrameIndex should have been previously eliminated!"); -} diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h deleted file mode 100644 index 5614ce7..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the PTX implementation of the MRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_REGISTER_INFO_H -#define PTX_REGISTER_INFO_H - -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/BitVector.h" - -#define GET_REGINFO_HEADER -#include "PTXGenRegisterInfo.inc" - -namespace llvm { -class PTXTargetMachine; -class MachineFunction; - -struct PTXRegisterInfo : public PTXGenRegisterInfo { -private: - const TargetInstrInfo &TII; - -public: - PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &tii); - - virtual const uint16_t - *getCalleeSavedRegs(const MachineFunction *MF = 0) const { - static const uint16_t CalleeSavedRegs[] = { 0 }; - return CalleeSavedRegs; // save nothing - } - - virtual BitVector getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - return Reserved; // reserve no regs - } - - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, - RegScavenger *RS = NULL) const; - - virtual unsigned getFrameRegister(const MachineFunction &MF) const { - llvm_unreachable("PTX does not have a frame register"); - } -}; // struct PTXRegisterInfo -} // namespace llvm - -#endif // PTX_REGISTER_INFO_H diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td deleted file mode 100644 index e8b262e..0000000 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ /dev/null @@ -1,36 +0,0 @@ -//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class PTXReg : Register { - let Namespace = "PTX"; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// - -// The generated register info code throws warnings for empty register classes -// (e.g. zero-length arrays), so we use a dummy register here just to prevent -// these warnings. -def DUMMY_REG : PTXReg<"R0">; - -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; - diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp deleted file mode 100644 index a116fab..0000000 --- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTXSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ptx-selectiondag-info" -#include "PTXTargetMachine.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/SelectionDAG.h" -using namespace llvm; - -PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget()) { -} - -PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { -} - -SDValue -PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - // Always inline memcpys. In PTX, we do not have a C library that provides - // a memcpy function. - //if (!AlwaysInline) - // return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, PointerType, Src, - DAG.getConstant(SrcOff, PointerType)), - SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, PointerType, Dst, - DAG.getConstant(DstOff, PointerType)), - DstPtrInfo.getWithOffset(DstOff), - isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, PointerType, Src, - DAG.getConstant(SrcOff, PointerType)), - SrcPtrInfo.getWithOffset(SrcOff), false, false, - false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, PointerType, Dst, - DAG.getConstant(DstOff, PointerType)), - DstPtrInfo.getWithOffset(DstOff), false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - -SDValue PTXSelectionDAGInfo:: -EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const { - llvm_unreachable("memset lowering not implemented for PTX yet"); -} - diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.h b/lib/Target/PTX/PTXSelectionDAGInfo.h deleted file mode 100644 index e0c7167..0000000 --- a/lib/Target/PTX/PTXSelectionDAGInfo.h +++ /dev/null @@ -1,53 +0,0 @@ -//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the PTX subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef PTXSELECTIONDAGINFO_H -#define PTXSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. -/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. -class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can - /// make the right decision when generating code for different targets. - const PTXSubtarget *Subtarget; - -public: - explicit PTXSelectionDAGInfo(const TargetMachine &TM); - ~PTXSelectionDAGInfo(); - - virtual - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const; - - virtual - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo) const; -}; - -} - -#endif - diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp deleted file mode 100644 index 454f64e..0000000 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the PTX specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "PTXSubtarget.h" -#include "PTX.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "PTXGenSubtargetInfo.inc" - -using namespace llvm; - -void PTXSubtarget::anchor() { } - -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) - : PTXGenSubtargetInfo(TT, CPU, FS), - PTXTarget(PTX_COMPUTE_1_0), - PTXVersion(PTX_VERSION_2_0), - SupportsDouble(false), - SupportsFMA(true), - Is64Bit(is64Bit) { - std::string TARGET = CPU; - if (TARGET.empty()) - TARGET = "generic"; - ParseSubtargetFeatures(TARGET, FS); -} - -std::string PTXSubtarget::getTargetString() const { - switch(PTXTarget) { - default: llvm_unreachable("Unknown PTX target"); - case PTX_SM_1_0: return "sm_10"; - case PTX_SM_1_1: return "sm_11"; - case PTX_SM_1_2: return "sm_12"; - case PTX_SM_1_3: return "sm_13"; - case PTX_SM_2_0: return "sm_20"; - case PTX_SM_2_1: return "sm_21"; - case PTX_SM_2_2: return "sm_22"; - case PTX_SM_2_3: return "sm_23"; - case PTX_COMPUTE_1_0: return "compute_10"; - case PTX_COMPUTE_1_1: return "compute_11"; - case PTX_COMPUTE_1_2: return "compute_12"; - case PTX_COMPUTE_1_3: return "compute_13"; - case PTX_COMPUTE_2_0: return "compute_20"; - } -} - -std::string PTXSubtarget::getPTXVersionString() const { - switch(PTXVersion) { - case PTX_VERSION_2_0: return "2.0"; - case PTX_VERSION_2_1: return "2.1"; - case PTX_VERSION_2_2: return "2.2"; - case PTX_VERSION_2_3: return "2.3"; - } - llvm_unreachable("Invalid PTX version"); -} diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h deleted file mode 100644 index ce93fef..0000000 --- a/lib/Target/PTX/PTXSubtarget.h +++ /dev/null @@ -1,131 +0,0 @@ -//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PTX specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_SUBTARGET_H -#define PTX_SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" - -#define GET_SUBTARGETINFO_HEADER -#include "PTXGenSubtargetInfo.inc" - -namespace llvm { -class StringRef; - - class PTXSubtarget : public PTXGenSubtargetInfo { - virtual void anchor(); - public: - - /** - * Enumeration of Shader Models supported by the back-end. - */ - enum PTXTargetEnum { - PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ - PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ - PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ - PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ - PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ - PTX_LAST_COMPUTE, - - PTX_SM_1_0, /*< Shader Model 1.0 */ - PTX_SM_1_1, /*< Shader Model 1.1 */ - PTX_SM_1_2, /*< Shader Model 1.2 */ - PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0, /*< Shader Model 2.0 */ - PTX_SM_2_1, /*< Shader Model 2.1 */ - PTX_SM_2_2, /*< Shader Model 2.2 */ - PTX_SM_2_3, /*< Shader Model 2.3 */ - PTX_LAST_SM - }; - - /** - * Enumeration of PTX versions supported by the back-end. - * - * Currently, PTX 2.0 is the minimum supported version. - */ - enum PTXVersionEnum { - PTX_VERSION_2_0, /*< PTX Version 2.0 */ - PTX_VERSION_2_1, /*< PTX Version 2.1 */ - PTX_VERSION_2_2, /*< PTX Version 2.2 */ - PTX_VERSION_2_3 /*< PTX Version 2.3 */ - }; - - private: - - /// Shader Model supported on the target GPU. - PTXTargetEnum PTXTarget; - - /// PTX Language Version. - PTXVersionEnum PTXVersion; - - // The native .f64 type is supported on the hardware. - bool SupportsDouble; - - // Support the fused-multiply add (FMA) and multiply-add (MAD) - // instructions - bool SupportsFMA; - - // Use .u64 instead of .u32 for addresses. - bool Is64Bit; - - public: - - PTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); - - // Target architecture accessors - std::string getTargetString() const; - - std::string getPTXVersionString() const; - - bool supportsDouble() const { return SupportsDouble; } - - bool is64Bit() const { return Is64Bit; } - - bool supportsFMA() const { return SupportsFMA; } - - bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } - - bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } - - bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - - bool fdivNeedsRoundingMode() const { - return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool fmadNeedsRoundingMode() const { - return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool useParamSpaceForDeviceArgs() const { - return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool callsAreHandled() const { - return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || - (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); - } - - bool emitPtrAttribute() const { - return PTXVersion >= PTX_VERSION_2_2; - } - - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - }; // class PTXSubtarget -} // namespace llvm - -#endif // PTX_SUBTARGET_H diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp deleted file mode 100644 index 97b8de1..0000000 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ /dev/null @@ -1,165 +0,0 @@ -//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Top-level implementation for the PTX target. -// -//===----------------------------------------------------------------------===// - -#include "PTXTargetMachine.h" -#include "PTX.h" -#include "llvm/PassManager.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/Assembly/PrintModulePass.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Scalar.h" - - -using namespace llvm; - -namespace llvm { - MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, - bool useCFI, bool useDwarfDirectory, - MCInstPrinter *InstPrint, - MCCodeEmitter *CE, - MCAsmBackend *MAB, - bool ShowInst); -} - -extern "C" void LLVMInitializePTXTarget() { - - RegisterTargetMachine X(ThePTX32Target); - RegisterTargetMachine Y(ThePTX64Target); - - TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); - TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); -} - -namespace { - const char* DataLayout32 = - "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; - const char* DataLayout64 = - "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; -} - -// DataLayout and FrameLowering are filled with dummy data -PTXTargetMachine::PTXTargetMachine(const Target &T, - StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DataLayout(is64Bit ? DataLayout64 : DataLayout32), - Subtarget(TT, CPU, FS, is64Bit), - FrameLowering(Subtarget), - InstrInfo(*this), - TSInfo(*this), - TLInfo(*this) { -} - -void PTX32TargetMachine::anchor() { } - -PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} - -void PTX64TargetMachine::anchor() { } - -PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - -namespace llvm { -/// PTX Code Generator Pass Configuration Options. -class PTXPassConfig : public TargetPassConfig { -public: - PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - PTXTargetMachine &getPTXTargetMachine() const { - return getTM(); - } - - bool addInstSelector(); - FunctionPass *createTargetRegisterAllocator(bool); - void addOptimizedRegAlloc(FunctionPass *RegAllocPass); - bool addPostRegAlloc(); - void addMachineLateOptimization(); - bool addPreEmitPass(); -}; -} // namespace - -TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) { - PTXPassConfig *PassConfig = new PTXPassConfig(this, PM); - PassConfig->disablePass(PrologEpilogCodeInserterID); - return PassConfig; -} - -bool PTXPassConfig::addInstSelector() { - PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel())); - return false; -} - -FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) { - return createPTXRegisterAllocator(); -} - -// Modify the optimized compilation path to bypass optimized register alloction. -void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - addFastRegAlloc(RegAllocPass); -} - -bool PTXPassConfig::addPostRegAlloc() { - // PTXMFInfoExtract must after register allocation! - //PM->add(createPTXMFInfoExtract(getPTXTargetMachine())); - return false; -} - -/// Add passes that optimize machine instructions after register allocation. -void PTXPassConfig::addMachineLateOptimization() { - if (addPass(BranchFolderPassID) != &NoPassID) - printAndVerify("After BranchFolding"); - - if (addPass(TailDuplicateID) != &NoPassID) - printAndVerify("After TailDuplicate"); -} - -bool PTXPassConfig::addPreEmitPass() { - PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel())); - PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel())); - return true; -} diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h deleted file mode 100644 index 278d155..0000000 --- a/lib/Target/PTX/PTXTargetMachine.h +++ /dev/null @@ -1,104 +0,0 @@ -//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the PTX specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef PTX_TARGET_MACHINE_H -#define PTX_TARGET_MACHINE_H - -#include "PTXISelLowering.h" -#include "PTXInstrInfo.h" -#include "PTXFrameLowering.h" -#include "PTXSelectionDAGInfo.h" -#include "PTXSubtarget.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { -class PTXTargetMachine : public LLVMTargetMachine { - private: - const TargetData DataLayout; - PTXSubtarget Subtarget; // has to be initialized before FrameLowering - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; - PTXSelectionDAGInfo TSInfo; - PTXTargetLowering TLInfo; - - public: - PTXTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit); - - virtual const TargetData *getTargetData() const { return &DataLayout; } - - virtual const TargetFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - - virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); } - - virtual const PTXTargetLowering *getTargetLowering() const { - return &TLInfo; } - - virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } - - // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, - bool = true) { - return true; - } - - // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, - bool = true) { - return true; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); -}; // class PTXTargetMachine - - -class PTX32TargetMachine : public PTXTargetMachine { - virtual void anchor(); -public: - - PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; // class PTX32TargetMachine - -class PTX64TargetMachine : public PTXTargetMachine { - virtual void anchor(); -public: - - PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; // class PTX32TargetMachine - -} // namespace llvm - -#endif // PTX_TARGET_MACHINE_H diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt deleted file mode 100644 index d9a5da3..0000000 --- a/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXInfo - PTXTargetInfo.cpp - ) - -add_dependencies(LLVMPTXInfo PTXCommonTableGen) diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 2cc30c4..0000000 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PTXInfo -parent = PTX -required_libraries = MC Support Target -add_to_library_groups = PTX diff --git a/lib/Target/PTX/TargetInfo/Makefile b/lib/Target/PTX/TargetInfo/Makefile deleted file mode 100644 index 8619785..0000000 --- a/lib/Target/PTX/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp deleted file mode 100644 index 09a2735..0000000 --- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ /dev/null @@ -1,25 +0,0 @@ -//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "PTX.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -Target llvm::ThePTX32Target; -Target llvm::ThePTX64Target; - -extern "C" void LLVMInitializePTXTargetInfo() { - // see llvm/ADT/Triple.h - RegisterTarget X32(ThePTX32Target, "ptx32", - "PTX (32-bit) [Experimental]"); - RegisterTarget X64(ThePTX64Target, "ptx64", - "PTX (64-bit) [Experimental]"); -} diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index bcd8bd2..192d18d 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_target(PowerPCCodeGen PPCAsmPrinter.cpp PPCBranchSelector.cpp PPCCodeEmitter.cpp + PPCCTRLoops.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp PPCISelDAGToDAG.cpp @@ -28,6 +29,8 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) +add_dependencies(LLVMPowerPCCodeGen intrinsics_gen) + add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 61d23ce..d175e3e 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -86,8 +86,33 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { - assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!"); unsigned Code = MI->getOperand(OpNo).getImm(); + if (!Modifier) { + unsigned CCReg = MI->getOperand(OpNo+1).getReg(); + unsigned RegNo; + switch (CCReg) { + default: llvm_unreachable("Unknown CR register"); + case PPC::CR0: RegNo = 0; break; + case PPC::CR1: RegNo = 1; break; + case PPC::CR2: RegNo = 2; break; + case PPC::CR3: RegNo = 3; break; + case PPC::CR4: RegNo = 4; break; + case PPC::CR5: RegNo = 5; break; + case PPC::CR6: RegNo = 6; break; + case PPC::CR7: RegNo = 7; break; + } + + // Print the CR bit number. The Code is ((BI << 5) | BO) for a + // BCC, but we must have the positive form here (BO == 12) + unsigned BI = Code >> 5; + assert((Code & 0xF) == 12 && + "BO in predicate bit must have the positive form"); + + unsigned Value = 4*RegNo + BI; + O << Value; + return; + } + if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { case PPC::PRED_ALWAYS: return; // Don't print anything for always. diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 73fd534..8f1e211 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -42,7 +42,7 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier); + raw_ostream &O, const char *Modifier = 0); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 5a6827f..f652422 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -77,6 +77,7 @@ public: } // end anonymous namespace MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { return new PPCMCCodeEmitter(MCII, STI, Ctx); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index b7fa064..7162e15 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -22,6 +22,7 @@ class MCCodeEmitter; class MCContext; class MCInstrInfo; class MCObjectWriter; +class MCRegisterInfo; class MCSubtargetInfo; class Target; class StringRef; @@ -31,6 +32,7 @@ extern Target ThePPC32Target; extern Target ThePPC64Target; MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 24a7178..9103e12 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -30,6 +30,7 @@ namespace llvm { class AsmPrinter; class MCInst; + FunctionPass *createPPCCTRLoops(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, @@ -50,21 +51,27 @@ namespace llvm { /// and jumps to external functions on Tiger and earlier. MO_DARWIN_STUB = 1, - /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 4, MO_HA16 = 8, - /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). - MO_PIC_FLAG = 16, + MO_PIC_FLAG = 4, /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). - MO_NLP_FLAG = 32, + MO_NLP_FLAG = 8, /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. - MO_NLP_HIDDEN_FLAG = 64 + MO_NLP_HIDDEN_FLAG = 16, + + /// The next are not flags but distinct values. + MO_ACCESS_MASK = 224, + + /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) + MO_LO16 = 32, MO_HA16 = 64, + + MO_TPREL16_HA = 96, + MO_TPREL16_LO = 128 }; } // end namespace PPCII diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index c554d39..b7f1688 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -35,6 +35,8 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; +def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -42,12 +44,14 @@ def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true", "Enable 64-bit registers usage for ppc32 [beta]">; def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true", "Enable Altivec instructions">; -def FeatureGPUL : SubtargetFeature<"gpul","IsGigaProcessor", "true", - "Enable GPUL instructions">; +def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", + "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", + "Enable the isel instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; @@ -64,8 +68,10 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; -def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>; -def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>; +def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; +def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureBookE]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603]>; @@ -74,28 +80,37 @@ def : Processor<"603ev", G3Itineraries, [Directive603]>; def : Processor<"604", G3Itineraries, [Directive604]>; def : Processor<"604e", G3Itineraries, [Directive604]>; def : Processor<"620", G3Itineraries, [Directive620]>; -def : Processor<"g3", G3Itineraries, [Directive7400]>; +def : Processor<"750", G4Itineraries, [Directive750]>; +def : Processor<"g3", G3Itineraries, [Directive750]>; def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>; def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>; def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>; -def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>; -def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>; +def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>; def : Processor<"970", G5Itineraries, [Directive970, FeatureAltivec, - FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, - FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureFSqrt, FeatureSTFIWX, - Feature64Bit - /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureMFOCRF, FeatureFSqrt, + FeatureSTFIWX, FeatureISEL, + Feature64Bit + /*, Feature64BitRegs */]>; +def : Processor<"pwr6", G5Itineraries, + [DirectivePwr6, FeatureAltivec, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"pwr7", G5Itineraries, + [DirectivePwr7, FeatureAltivec, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, - FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, + FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index fb7aa71..f76b89c 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -22,8 +22,8 @@ #include "PPCSubtarget.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Assembly/Writer.h" @@ -248,7 +248,9 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'c': // Don't print "$" before a global var name or constant. break; // PPC never has a prefix. case 'L': // Write second word of DImode reference. @@ -451,11 +453,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc750", "ppc970", "ppcA2", + "power6", + "power7", "ppc64" }; unsigned Directive = Subtarget.getDarwinDirective(); - if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970) + if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970) Directive = PPC::DIR_970; if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) Directive = PPC::DIR_7400; diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 5f775e1..21a0fb2 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -135,21 +135,33 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { MBBStartOffset += 4; continue; } - + // Otherwise, we have to expand it to a long branch. - // The BCC operands are: - // 0. PPC branch predicate - // 1. CR register - // 2. Target MBB - PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); - unsigned CRReg = I->getOperand(1).getReg(); - MachineInstr *OldBranch = I; DebugLoc dl = OldBranch->getDebugLoc(); - - // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. - BuildMI(MBB, I, dl, TII->get(PPC::BCC)) - .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + + if (I->getOpcode() == PPC::BCC) { + // The BCC operands are: + // 0. PPC branch predicate + // 1. CR register + // 2. Target MBB + PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm(); + unsigned CRReg = I->getOperand(1).getReg(); + + // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition. + BuildMI(MBB, I, dl, TII->get(PPC::BCC)) + .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2); + } else if (I->getOpcode() == PPC::BDNZ) { + BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2); + } else if (I->getOpcode() == PPC::BDNZ8) { + BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2); + } else if (I->getOpcode() == PPC::BDZ) { + BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2); + } else if (I->getOpcode() == PPC::BDZ8) { + BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2); + } else { + llvm_unreachable("Unhandled branch type!"); + } // Uncond branch to the real destination. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp new file mode 100644 index 0000000..2a2abb1 --- /dev/null +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -0,0 +1,724 @@ +//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies loops where we can generate the PPC branch instructions +// that decrement and test the count register (CTR) (bdnz and friends). +// This pass is based on the HexagonHardwareLoops pass. +// +// The pattern that defines the induction variable can changed depending on +// prior optimizations. For example, the IndVarSimplify phase run by 'opt' +// normalizes induction variables, and the Loop Strength Reduction pass +// run by 'llc' may also make changes to the induction variable. +// The pattern detected by this phase is due to running Strength Reduction. +// +// Criteria for CTR loops: +// - Countable loops (w/ ind. var for a trip count) +// - Assumes loops are normalized by IndVarSimplify +// - Try inner-most loops first +// - No nested CTR loops. +// - No function calls in loops. +// +// Note: As with unconverted loops, PPCBranchSelector must be run after this +// pass in order to convert long-displacement jumps into jump pairs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ctrloops" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include + +using namespace llvm; + +STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); + +namespace { + class CountValue; + struct PPCCTRLoops : public MachineFunctionPass { + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification, replacement for typeid + + PPCCTRLoops() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "PPC CTR Loops"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// getCanonicalInductionVariable - Check to see if the loop has a canonical + /// induction variable. + /// Should be defined in MachineLoop. Based upon version in class Loop. + void getCanonicalInductionVariable(MachineLoop *L, + SmallVector &IVars, + SmallVector &IOps) const; + + /// getTripCount - Return a loop-invariant LLVM register indicating the + /// number of times the loop will be executed. If the trip-count cannot + /// be determined, this return null. + CountValue *getTripCount(MachineLoop *L, + SmallVector &OldInsts) const; + + /// isInductionOperation - Return true if the instruction matches the + /// pattern for an opertion that defines an induction variable. + bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; + + /// isInvalidOperation - Return true if the instruction is not valid within + /// a CTR loop. + bool isInvalidLoopOperation(const MachineInstr *MI) const; + + /// containsInavlidInstruction - Return true if the loop contains an + /// instruction that inhibits using the CTR loop. + bool containsInvalidInstruction(MachineLoop *L) const; + + /// converToCTRLoop - Given a loop, check if we can convert it to a + /// CTR loop. If so, then perform the conversion and return true. + bool convertToCTRLoop(MachineLoop *L); + + /// isDead - Return true if the instruction is now dead. + bool isDead(const MachineInstr *MI, + SmallVector &DeadPhis) const; + + /// removeIfDead - Remove the instruction if it is now dead. + void removeIfDead(MachineInstr *MI); + }; + + char PPCCTRLoops::ID = 0; + + + // CountValue class - Abstraction for a trip count of a loop. A + // smaller vesrsion of the MachineOperand class without the concerns + // of changing the operand representation. + class CountValue { + public: + enum CountValueType { + CV_Register, + CV_Immediate + }; + private: + CountValueType Kind; + union Values { + unsigned RegNum; + int64_t ImmVal; + Values(unsigned r) : RegNum(r) {} + Values(int64_t i) : ImmVal(i) {} + } Contents; + bool isNegative; + + public: + CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), + isNegative(neg) {} + explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), + isNegative(i < 0) {} + CountValueType getType() const { return Kind; } + bool isReg() const { return Kind == CV_Register; } + bool isImm() const { return Kind == CV_Immediate; } + bool isNeg() const { return isNegative; } + + unsigned getReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.RegNum; + } + void setReg(unsigned Val) { + Contents.RegNum = Val; + } + int64_t getImm() const { + assert(isImm() && "Wrong CountValue accessor"); + if (isNegative) { + return -Contents.ImmVal; + } + return Contents.ImmVal; + } + void setImm(int64_t Val) { + Contents.ImmVal = Val; + } + + void print(raw_ostream &OS, const TargetMachine *TM = 0) const { + if (isReg()) { OS << PrintReg(getReg()); } + if (isImm()) { OS << getImm(); } + } + }; +} // end anonymous namespace + + +/// isCompareEquals - Returns true if the instruction is a compare equals +/// instruction with an immediate operand. +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { + if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + SignedCmp = false; + return true; + } + + return false; +} + + +/// createPPCCTRLoops - Factory for creating +/// the CTR loop phase. +FunctionPass *llvm::createPPCCTRLoops() { + return new PPCCTRLoops(); +} + + +bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********* PPC CTR Loops *********\n"); + + bool Changed = false; + + // get the loop information + MLI = &getAnalysis(); + // get the register information + MRI = &MF.getRegInfo(); + // the target specific instructio info. + TII = MF.getTarget().getInstrInfo(); + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) { + MachineLoop *L = *I; + if (!L->getParentLoop()) { + Changed |= convertToCTRLoop(L); + } + } + + return Changed; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable. We check for a simple recurrence pattern - an +/// integer recurrence that decrements by one each time through the loop and +/// ends at zero. If so, return the phi node that corresponds to it. +/// +/// Based upon the similar code in LoopInfo except this code is specific to +/// the machine. +/// This method assumes that the IndVarSimplify pass has been run by 'opt'. +/// +void +PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L, + SmallVector &IVars, + SmallVector &IOps) const { + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); + assert(PI != TopMBB->pred_end() && + "Loop must have more than one incoming edge!"); + MachineBasicBlock *Backedge = *PI++; + if (PI == TopMBB->pred_end()) return; // dead loop + MachineBasicBlock *Incoming = *PI++; + if (PI != TopMBB->pred_end()) return; // multiple backedges? + + // make sure there is one incoming and one backedge and determine which + // is which. + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return; + + // Loop over all of the PHI nodes, looking for a canonical induction variable: + // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". + // - The recurrence comes from the backedge. + // - the definition is an induction operatio.n + for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); + I != E && I->isPHI(); ++I) { + MachineInstr *MPhi = &*I; + unsigned DefReg = MPhi->getOperand(0).getReg(); + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + // Check each operand for the value from the backedge. + MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); + if (L->contains(MBB)) { // operands comes from the backedge + // Check if the definition is an induction operation. + MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); + if (isInductionOperation(DI, DefReg)) { + IOps.push_back(DI); + IVars.push_back(MPhi); + } + } + } + } + return; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the +/// number of times the loop will be executed. The trip count can +/// be either a register or a constant value. If the trip-count +/// cannot be determined, this returns null. +/// +/// We find the trip count from the phi instruction that defines the +/// induction variable. We follow the links to the CMP instruction +/// to get the trip count. +/// +/// Based upon getTripCount in LoopInfo. +/// +CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, + SmallVector &OldInsts) const { + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate a CTR loop if the loop has more than one exit. + if (LastMBB == 0) + return 0; + + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + if (LastI->getOpcode() != PPC::BCC) + return 0; + + // We need to make sure that this compare is defining the condition + // register actually used by the terminating branch. + + unsigned PredReg = LastI->getOperand(1).getReg(); + DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI); + + unsigned PredCond = LastI->getOperand(0).getImm(); + if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE) + return 0; + + // Check that the loop has a induction variable. + SmallVector IVars, IOps; + getCanonicalInductionVariable(L, IVars, IOps); + for (unsigned i = 0; i < IVars.size(); ++i) { + MachineInstr *IOp = IOps[i]; + MachineInstr *IV_Inst = IVars[i]; + + // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } + + DEBUG(dbgs() << "Considering:\n"); + DEBUG(dbgs() << " induction operation: " << *IOp); + DEBUG(dbgs() << " induction variable: " << *IV_Inst); + DEBUG(dbgs() << " initial value: " << *InitialValue << "\n"); + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); + bool SignedCmp; + MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + MI->getOperand(0).getReg() == PredReg) { + + OldInsts.push_back(MI); + OldInsts.push_back(IOp); + + DEBUG(dbgs() << " compare: " << *MI); + + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be an immediate"); + + int64_t ImmVal; + if (SignedCmp) + ImmVal = (short) MO.getImm(); + else + ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm(); + + assert(InitialValue->isReg() && "Expecting register for init value"); + unsigned InitialValueReg = InitialValue->getReg(); + + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + + // Here we need to look for an immediate load (an li or lis/ori pair). + if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || + DefInstr->getOpcode() == PPC::ORI)) { + int64_t start = (short) DefInstr->getOperand(2).getImm(); + const MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || + DefInstr2->getOpcode() == PPC::LIS)) { + DEBUG(dbgs() << " initial constant: " << *DefInstr); + DEBUG(dbgs() << " initial constant: " << *DefInstr2); + + start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16; + + int64_t count = ImmVal - start; + if ((count % iv_value) != 0) { + return 0; + } + return new CountValue(count/iv_value); + } + } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || + DefInstr->getOpcode() == PPC::LI)) { + DEBUG(dbgs() << " initial constant: " << *DefInstr); + + int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + if ((count % iv_value) != 0) { + return 0; + } + return new CountValue(count/iv_value); + } else if (iv_value == 1 || iv_value == -1) { + // We can't determine a constant starting value. + if (ImmVal == 0) { + return new CountValue(InitialValueReg, iv_value > 0); + } + // FIXME: handle non-zero end value. + } + // FIXME: handle non-unit increments (we might not want to introduce division + // but we can handle some 2^n cases with shifts). + + } + } + } + return 0; +} + +/// isInductionOperation - return true if the operation is matches the +/// pattern that defines an induction variable: +/// addi iv, c +/// +bool +PPCCTRLoops::isInductionOperation(const MachineInstr *MI, + unsigned IVReg) const { + return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) && + MI->getOperand(1).isReg() && // could be a frame index instead + MI->getOperand(1).getReg() == IVReg); +} + +/// isInvalidOperation - Return true if the operation is invalid within +/// CTR loop. +bool +PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const { + + // call is not allowed because the callee may use a CTR loop + if (MI->getDesc().isCall()) { + return true; + } + // check if the instruction defines a CTR loop register + // (this will also catch nested CTR loops) + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) { + return true; + } + } + return false; +} + +/// containsInvalidInstruction - Return true if the loop contains +/// an instruction that inhibits the use of the CTR loop function. +/// +bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const { + const std::vector Blocks = L->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { + const MachineInstr *MI = &*MII; + if (isInvalidLoopOperation(MI)) { + return true; + } + } + } + return false; +} + +/// isDead returns true if the instruction is dead +/// (this was essentially copied from DeadMachineInstructionElim::isDead, but +/// with special cases for inline asm, physical registers and instructions with +/// side effects removed) +bool PPCCTRLoops::isDead(const MachineInstr *MI, + SmallVector &DeadPhis) const { + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (!MRI->use_nodbg_empty(Reg)) { + // This instruction has users, but if the only user is the phi node for the + // parent block, and the only use of that phi node is this instruction, then + // this instruction is dead: both it (and the phi node) can be removed. + MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); + if (llvm::next(I) == MRI->use_end() && + I.getOperand().getParent()->isPHI()) { + MachineInstr *OnePhi = I.getOperand().getParent(); + + for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) { + const MachineOperand &OPO = OnePhi->getOperand(j); + if (OPO.isReg() && OPO.isDef()) { + unsigned OPReg = OPO.getReg(); + + MachineRegisterInfo::use_iterator nextJ; + for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg), + E = MRI->use_end(); J!=E; J=nextJ) { + nextJ = llvm::next(J); + MachineOperand& Use = J.getOperand(); + MachineInstr *UseMI = Use.getParent(); + + if (MI != UseMI) { + // The phi node has a user that is not MI, bail... + return false; + } + } + } + } + + DeadPhis.push_back(OnePhi); + } else { + // This def has a non-debug use. Don't delete the instruction! + return false; + } + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +void PPCCTRLoops::removeIfDead(MachineInstr *MI) { + // This procedure was essentially copied from DeadMachineInstructionElim + + SmallVector DeadPhis; + if (isDead(MI, DeadPhis)) { + DEBUG(dbgs() << "CTR looping will remove: " << *MI); + + // It is possible that some DBG_VALUE instructions refer to this + // instruction. Examine each def operand for such references; + // if found, mark the DBG_VALUE as undef (but don't delete it). + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + MachineRegisterInfo::use_iterator nextI; + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), + E = MRI->use_end(); I!=E; I=nextI) { + nextI = llvm::next(I); // I is invalidated by the setReg + MachineOperand& Use = I.getOperand(); + MachineInstr *UseMI = Use.getParent(); + if (UseMI==MI) + continue; + if (Use.isDebug()) // this might also be a instr -> phi -> instr case + // which can also be removed. + UseMI->getOperand(0).setReg(0U); + } + } + + MI->eraseFromParent(); + for (unsigned i = 0; i < DeadPhis.size(); ++i) { + DeadPhis[i]->eraseFromParent(); + } + } +} + +/// converToCTRLoop - check if the loop is a candidate for +/// converting to a CTR loop. If so, then perform the +/// transformation. +/// +/// This function works on innermost loops first. A loop can +/// be converted if it is a counting loop; either a register +/// value or an immediate. +/// +/// The code makes several assumptions about the representation +/// of the loop in llvm. +bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { + bool Changed = false; + // Process nested loops first. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Changed |= convertToCTRLoop(*I); + } + // If a nested loop has been converted, then we can't convert this loop. + if (Changed) { + return Changed; + } + + SmallVector OldInsts; + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getTripCount(L, OldInsts); + if (TripCount == 0) { + DEBUG(dbgs() << "failed to get trip count!\n"); + return false; + } + // Does the loop contain any invalid instructions? + if (containsInvalidInstruction(L)) { + return false; + } + MachineBasicBlock *Preheader = L->getLoopPreheader(); + // No preheader means there's not place for the loop instr. + if (Preheader == 0) { + return false; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + DebugLoc dl; + if (InsertPos != Preheader->end()) + dl = InsertPos->getDebugLoc(); + + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate CTR loop if the loop has more than one exit. + if (LastMBB == 0) { + return false; + } + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + + // Determine the loop start. + MachineBasicBlock *LoopStart = L->getTopBlock(); + if (L->getLoopLatch() != LastMBB) { + // When the exit and latch are not the same, use the latch block as the + // start. + // The loop start address is used only after the 1st iteration, and the loop + // latch may contains instrs. that need to be executed after the 1st iter. + LoopStart = L->getLoopLatch(); + // Make sure the latch is a successor of the exit, otherwise it won't work. + if (!LastMBB->isSuccessor(LoopStart)) { + return false; + } + } + + // Convert the loop to a CTR loop + DEBUG(dbgs() << "Change to CTR loop at "; L->dump()); + + MachineFunction *MF = LastMBB->getParent(); + const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget(); + bool isPPC64 = Subtarget.isPPC64(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; + + unsigned CountReg; + if (TripCount->isReg()) { + // Create a copy of the loop count register. + const TargetRegisterClass *SrcRC = + MF->getRegInfo().getRegClass(TripCount->getReg()); + CountReg = MF->getRegInfo().createVirtualRegister(RC); + unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + (unsigned) PPC::EXTSW_32_64 : + (unsigned) TargetOpcode::COPY; + BuildMI(*Preheader, InsertPos, dl, + TII->get(CopyOp), CountReg).addReg(TripCount->getReg()); + if (TripCount->isNeg()) { + unsigned CountReg1 = CountReg; + CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, dl, + TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG), + CountReg).addReg(CountReg1); + } + } else { + assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); + // Put the trip count in a register for transfer into the count register. + + int64_t CountImm = TripCount->getImm(); + assert(!TripCount->isNeg() && "Constant trip count must be positive"); + + CountReg = MF->getRegInfo().createVirtualRegister(RC); + if (CountImm > 0xFFFF) { + BuildMI(*Preheader, InsertPos, dl, + TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), + CountReg).addImm(CountImm >> 16); + unsigned CountReg1 = CountReg; + CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, dl, + TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), + CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF); + } else { + BuildMI(*Preheader, InsertPos, dl, + TII->get(isPPC64 ? PPC::LI8 : PPC::LI), + CountReg).addImm(CountImm); + } + } + + // Add the mtctr instruction to the beginning of the loop. + BuildMI(*Preheader, InsertPos, dl, + TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg, + TripCount->isImm() ? RegState::Kill : 0); + + // Make sure the loop start always has a reference in the CFG. We need to + // create a BlockAddress operand to get this mechanism to work both the + // MachineBasicBlock and BasicBlock objects need the flag set. + LoopStart->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object + BlockAddress::get(const_cast(LoopStart->getBasicBlock())); + + // Replace the loop branch with a bdnz instruction. + dl = LastI->getDebugLoc(); + const std::vector Blocks = L->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + if (MBB != Preheader) + MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR); + } + + // The loop ends with either: + // - a conditional branch followed by an unconditional branch, or + // - a conditional branch to the loop start. + assert(LastI->getOpcode() == PPC::BCC && + "loop end must start with a BCC instruction"); + // Either the BCC branches to the beginning of the loop, or it + // branches out of the loop and there is an unconditional branch + // to the start of the loop. + MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB(); + BuildMI(*LastMBB, LastI, dl, + TII->get((BranchTarget == LoopStart) ? + (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget); + + // Conditional branch; just delete it. + DEBUG(dbgs() << "Removing old branch: " << *LastI); + LastMBB->erase(LastI); + + delete TripCount; + + // The induction operation (add) and the comparison (cmpwi) may now be + // unneeded. If these are unneeded, then remove them. + for (unsigned i = 0; i < OldInsts.size(); ++i) + removeIfDead(OldInsts[i]); + + ++NumCTRLoops; + return true; +} + diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index b77a80b..c24afa9 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -330,6 +330,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0); if (HasFP) + // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative + // offsets of R1 is not allowed. BuildMI(MBB, MBBI, dl, TII.get(PPC::STW)) .addReg(PPC::R31) .addImm(FPOffset) @@ -366,9 +368,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) @@ -381,9 +383,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0) .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(PPC::R1, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(PPC::R0); } } else { // PPC64. @@ -399,9 +401,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0) .addReg(PPC::X0) .addImm(NegFrameSize); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) @@ -414,9 +416,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0) .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X1, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(PPC::X0); } } @@ -492,7 +494,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just // subregisters of CR2. We just need to emit a move of CR2. - if (PPC::CRBITRCRegisterClass->contains(Reg)) + if (PPC::CRBITRCRegClass.contains(Reg)) continue; MachineLocation CSDst(MachineLocation::VirtualFP, Offset); @@ -817,7 +819,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::GPRCRegisterClass->contains(Reg)) { + if (PPC::GPRCRegClass.contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -825,7 +827,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } - } else if (PPC::G8RCRegisterClass->contains(Reg)) { + } else if (PPC::G8RCRegClass.contains(Reg)) { HasG8SaveArea = true; G8Regs.push_back(CSI[i]); @@ -833,7 +835,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinG8R) { MinG8R = Reg; } - } else if (PPC::F8RCRegisterClass->contains(Reg)) { + } else if (PPC::F8RCRegClass.contains(Reg)) { HasFPSaveArea = true; FPRegs.push_back(CSI[i]); @@ -842,12 +844,12 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) MinFPR = Reg; } // FIXME SVR4: Disable CR save area for now. - } else if (PPC::CRBITRCRegisterClass->contains(Reg) - || PPC::CRRCRegisterClass->contains(Reg)) { + } else if (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)) { // HasCRSaveArea = true; - } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + } else if (PPC::VRSAVERCRegClass.contains(Reg)) { HasVRSAVESaveArea = true; - } else if (PPC::VRRCRegisterClass->contains(Reg)) { + } else if (PPC::VRRCRegClass.contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); @@ -932,8 +934,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::CRBITRCRegisterClass->contains(Reg) || - PPC::CRRCRegisterClass->contains(Reg)) { + if (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -950,7 +952,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - if (PPC::VRSAVERCRegisterClass->contains(Reg)) { + if (PPC::VRSAVERCRegClass.contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 5a04888..a00f686 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -111,6 +111,23 @@ namespace { /// immediate field. Because preinc imms have already been validated, just /// accept it. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { + if (isa(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) { + Out = N; + return true; + } + + return false; + } + + /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc + /// index field. Because preinc imms have already been validated, just + /// accept it. + bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { + if (isa(N) || N.getOpcode() == PPCISD::Lo || + N.getOpcode() == ISD::TargetGlobalAddress) + return false; + Out = N; return true; } @@ -238,11 +255,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { DebugLoc dl; if (PPCLowering.getPointerTy() == MVT::i32) { - GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); } else { - GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass); + GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); } @@ -697,7 +714,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); - if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) + if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); else @@ -833,7 +850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::MFCR: { SDValue InFlag = N->getOperand(1); // Use MFOCRF if supported. - if (PPCSubTarget.isGigaProcessor()) + if (PPCSubTarget.hasMFOCRF()) return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, N->getOperand(0), InFlag); else @@ -915,12 +932,44 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - // FIXME: PPC64 return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); } else { - llvm_unreachable("R+R preindex loads not supported yet!"); + unsigned Opcode; + bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; + if (LD->getValueType(0) != MVT::i64) { + // Handle PPC32 integer and normal FP loads. + assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::f64: Opcode = PPC::LFDUX; break; + case MVT::f32: Opcode = PPC::LFSUX; break; + case MVT::i32: Opcode = PPC::LWZUX; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX; break; + } + } else { + assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); + assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && + "Invalid sext update load"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); + case MVT::i64: Opcode = PPC::LDUX; break; + case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; + case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; + case MVT::i1: + case MVT::i8: Opcode = PPC::LBZUX8; break; + } + } + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = { Offset, Base, Chain }; + return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3b24951..aa819ee 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -51,9 +51,11 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static cl::opt EnablePPCPreinc("enable-ppc-preinc", -cl::desc("enable preincrement load/store generation on PPC (experimental)"), - cl::Hidden); +static cl::opt DisablePPCPreinc("disable-ppc-preinc", +cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); + +static cl::opt DisableILPPref("disable-ppc-ilp-pref", +cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) @@ -64,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { + const PPCSubtarget *Subtarget = &TM.getSubtarget(); setPow2DivIsCheap(); @@ -73,12 +76,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - setMinStackArgumentAlignment(TM.getSubtarget().isPPC64() ? 8:4); + bool isPPC64 = Subtarget->isPPC64(); + setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. - addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); - addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); - addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); + addRegisterClass(MVT::i32, &PPC::GPRCRegClass); + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -130,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!TM.getSubtarget().hasFSQRT()) { + if (!Subtarget->hasFSQRT()) { setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); } @@ -226,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (TM.getSubtarget().isSVR4ABI()) { - if (TM.getSubtarget().isPPC64()) { + if (Subtarget->isSVR4ABI()) { + if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); @@ -271,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (TM.getSubtarget().has64BitSupport()) { + if (Subtarget->has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -290,9 +294,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } - if (TM.getSubtarget().use64BitRegs()) { + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly - addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); + addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. @@ -306,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (TM.getSubtarget().hasAltivec()) { + if (Subtarget->hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -370,12 +374,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); - addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); + addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); + addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); + addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -389,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (TM.getSubtarget().has64BitSupport()) + if (Subtarget->has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + } setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -398,7 +405,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - if (TM.getSubtarget().isPPC64()) { + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); setExceptionSelectorRegister(PPC::X4); @@ -415,7 +422,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BSWAP); // Darwin long double math library functions have $LDBL128 appended. - if (TM.getSubtarget().isDarwin()) { + if (Subtarget->isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -432,6 +439,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget->isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); setSchedulingPreference(Sched::Hybrid); @@ -902,10 +914,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() + assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); @@ -1006,7 +1019,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); } else { @@ -1015,7 +1028,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() + assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || @@ -1084,8 +1097,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - // Disabled by default for now. - if (!EnablePPCPreinc) return false; + if (DisablePPCPreinc) return false; SDValue Ptr; EVT VT; @@ -1103,7 +1115,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - // TODO: Check reg+reg first. + if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } // LDU/STU use reg+imm*4, others use reg+imm. if (VT != MVT::i64) { @@ -1222,6 +1237,30 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } +SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + + GlobalAddressSDNode *GA = cast(Op); + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(); + bool is64bit = PPCSubTarget.isPPC64(); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_HA); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_LO); + + if (model != TLSModel::LocalExec) + llvm_unreachable("only local-exec TLS mode supported"); + SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, + is64bit ? MVT::i64 : MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); + return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); +} + SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); @@ -1440,13 +1479,16 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - std::pair CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, + TargetLowering::CallLoweringInfo CLI(Chain, + Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, + CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, + /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -1702,7 +1744,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); @@ -1721,19 +1763,19 @@ PPCTargetLowering::LowerFormalArguments_SVR4( default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i32: - RC = PPC::GPRCRegisterClass; + RC = &PPC::GPRCRegClass; break; case MVT::f32: - RC = PPC::F4RCRegisterClass; + RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = PPC::F8RCRegisterClass; + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v4f32: - RC = PPC::VRRCRegisterClass; + RC = &PPC::VRRCRegClass; break; } @@ -1763,7 +1805,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + getTargetMachine(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -2743,7 +2785,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. @@ -2800,7 +2842,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); for (unsigned i = 0; i != RVLocs.size(); ++i) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); @@ -2864,14 +2906,19 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, } SDValue -PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); @@ -2921,7 +2968,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); @@ -2961,7 +3008,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + getTargetMachine(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -3485,7 +3532,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); // If this is the first return lowered for this function, add the regs to the @@ -4559,7 +4606,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); @@ -4899,11 +4946,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_CC_F4 || - MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { + if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8)) { + unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? + PPC::ISEL8 : PPC::ISEL; + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + // The SelectPred is ((BI << 5) | BO) for a BCC + unsigned BO = SelectPred & 0xF; + assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); + + unsigned TrueOpNo, FalseOpNo; + if (BO == 12) { + TrueOpNo = 2; + FalseOpNo = 3; + } else { + TrueOpNo = 3; + FalseOpNo = 2; + SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + } + + BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(TrueOpNo).getReg()) + .addReg(MI->getOperand(FalseOpNo).getReg()) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -5612,18 +5685,18 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'b': // R1-R31 case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) - return std::make_pair(0U, PPC::G8RCRegisterClass); - return std::make_pair(0U, PPC::GPRCRegisterClass); + return std::make_pair(0U, &PPC::G8RCRegClass); + return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': if (VT == MVT::f32) - return std::make_pair(0U, PPC::F4RCRegisterClass); - else if (VT == MVT::f64) - return std::make_pair(0U, PPC::F8RCRegisterClass); + return std::make_pair(0U, &PPC::F4RCRegClass); + if (VT == MVT::f64) + return std::make_pair(0U, &PPC::F8RCRegClass); break; case 'v': - return std::make_pair(0U, PPC::VRRCRegisterClass); + return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc - return std::make_pair(0U, PPC::CRRCRegisterClass); + return std::make_pair(0U, &PPC::CRRCRegClass); } } @@ -5839,11 +5912,30 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than +/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to +/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd +/// is expanded to mul + add. +bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + return true; + default: + break; + } + + return false; +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - unsigned Directive = PPCSubTarget.getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) - return Sched::ILP; + if (DisableILPPref) + return TargetLowering::getSchedulingPreference(N); - return TargetLowering::getSchedulingPreference(N); + return Sched::ILP; } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 18eb072..b0a013b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -366,6 +366,12 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT VT) const; + private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const; @@ -389,6 +395,7 @@ namespace llvm { SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; @@ -439,12 +446,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual bool diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 7f67a41..39778a5 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -68,15 +68,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), + (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. def BLA8_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), + (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), + (outs), (ins), "bctrl", BrB, [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; } @@ -88,27 +88,27 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), + (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. let isCodeGenOnly = 1 in def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, - (outs), (ins calltarget:$func, variable_ops), + (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; def BLA8_ELF : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), + (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; let isCodeGenOnly = 1 in def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, - (outs), (ins aaddr:$func, variable_ops), + (outs), (ins aaddr:$func), "bla $func\n\tnop", BrB, [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), + (outs), (ins), "bctrl", BrB, [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; } @@ -180,17 +180,17 @@ def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi8 :Pseudo< (outs), - (ins calltarget:$dst, i32imm:$offset, variable_ops), + (ins calltarget:$dst, i32imm:$offset), "#TC_RETURNd8 $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), +def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset), "#TC_RETURNa8 $func $offset", [(PPCtc_return (i64 imm:$func), imm:$offset)]>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops), +def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; @@ -229,6 +229,15 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [CTR8], Uses = [CTR8] in { + def BDZ8 : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst", BrB, []>; + def BDNZ8 : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst", BrB, []>; + } +} + // 64-but CR instructions def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), "mtcrf $FXM, $rS", BrMCRX>, @@ -256,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } +let Pattern = [(set G8RC:$rT, readcyclecounter)] in +def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), + "mfspr $rT, 268", SprMFTB>, + PPC970_DGroup_First, PPC970_Unit_FXU; +// Note that encoding mftb using mfspr is now the preferred form, +// and has been since at least ISA v2.03. The mftb instruction has +// now been phased out. Using mfspr, however, is known not to work on +// the POWER3. + let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", [(set G8RC:$result, @@ -278,45 +296,37 @@ def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins), let PPC970_Unit = 1 in { // FXU Operations. -// Copies, extends, truncates. -def OR4To8 : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB), - "or $rA, $rS, $rB", IntGeneral, - []>; -def OR8To4 : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB), - "or $rA, $rS, $rB", IntGeneral, - []>; - def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), - "li $rD, $imm", IntGeneral, + "li $rD, $imm", IntSimple, [(set G8RC:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), - "lis $rD, $imm", IntGeneral, + "lis $rD, $imm", IntSimple, [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "nand $rA, $rS, $rB", IntGeneral, + "nand $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "and $rA, $rS, $rB", IntGeneral, + "and $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "andc $rA, $rS, $rB", IntGeneral, + "andc $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "or $rA, $rS, $rB", IntGeneral, + "or $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "nor $rA, $rS, $rB", IntGeneral, + "nor $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "orc $rA, $rS, $rB", IntGeneral, + "orc $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "eqv $rA, $rS, $rB", IntGeneral, + "eqv $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), - "xor $rA, $rS, $rB", IntGeneral, + "xor $rA, $rS, $rB", IntSimple, [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; // Logical ops with immediate. @@ -329,20 +339,20 @@ def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, isDOT; def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntGeneral, + "ori $dst, $src1, $src2", IntSimple, [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntGeneral, + "oris $dst, $src1, $src2", IntSimple, [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntGeneral, + "xori $dst, $src1, $src2", IntSimple, [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntGeneral, + "xoris $dst, $src1, $src2", IntSimple, [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "add $rT, $rA, $rB", IntGeneral, + "add $rT, $rA, $rB", IntSimple, [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; let Defs = [CARRY] in { @@ -355,10 +365,13 @@ def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; } def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntGeneral, + "addi $rD, $rA, $imm", IntSimple, + [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; +def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), + "addi $rD, $rA, $imm", IntSimple, [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), - "addis $rD, $rA, $imm", IntGeneral, + "addis $rD, $rA, $imm", IntSimple, [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { @@ -374,7 +387,7 @@ def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subf $rT, $rA, $rB", IntGeneral, [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "neg $rT, $rA", IntGeneral, + "neg $rT, $rA", IntSimple, [(set G8RC:$rT, (ineg G8RC:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), @@ -427,21 +440,21 @@ def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), } def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), - "extsb $rA, $rS", IntGeneral, + "extsb $rA, $rS", IntSimple, [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), - "extsh $rA, $rS", IntGeneral, + "extsh $rA, $rS", IntSimple, [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), - "extsw $rA, $rS", IntGeneral, + "extsw $rA, $rS", IntSimple, [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; /// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntGeneral, + "extsw $rA, $rS", IntSimple, [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntGeneral, + "extsw $rA, $rS", IntSimple, [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; let Defs = [CARRY] in { @@ -493,6 +506,10 @@ def RLWINM8 : MForm_2<21, "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, []>; +def ISEL8 : AForm_1<31, 15, + (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; } // End FXU Operations. @@ -529,6 +546,16 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp NoEncode<"$ea_result">; // NO LWAU! +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWAUX : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } // Zero extending loads. @@ -568,6 +595,22 @@ def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LHZUX8 : XForm_1<31, 331, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -603,6 +646,11 @@ def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "ldux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">, isPPC64; } def : Pat<(PPCload ixaddr:$src), @@ -660,6 +708,14 @@ def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, + symbolLo:$ptroff, ptr_rc:$ptrreg), + "stwu $rS, $ptroff($ptrreg)", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, + RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; + def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), "stdu $rS, $ptroff($ptrreg)", LdStSTD, @@ -668,10 +724,41 @@ def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, isPPC64; -let mayStore = 1 in -def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst), - "stdux $rS, $dst", LdStSTD, - []>, isPPC64; + +def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti32 G8RC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), + (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked, isPPC64; // STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), @@ -706,11 +793,12 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), // Extensions and truncates to/from 32-bit regs. def : Pat<(i64 (zext GPRC:$in)), - (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>; + (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32), + 0, 32)>; def : Pat<(i64 (anyext GPRC:$in)), - (OR4To8 GPRC:$in, GPRC:$in)>; + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>; def : Pat<(i32 (trunc G8RC:$in)), - (OR8To4 G8RC:$in, G8RC:$in)>; + (EXTRACT_SUBREG G8RC:$in, sub_32)>; // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), @@ -765,6 +853,10 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; +def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in), + (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in), + (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>; def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), (ADDIS8 G8RC:$in, tglobaladdr:$g)>; def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 6c0f3d3..b0b8423 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -274,15 +274,11 @@ let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fsub V_immneg0, - (fsub (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB)))]>, - Requires<[FPContractions]>; + [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, + (fneg VRRC:$vB))))]>; def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index d8e4b2b..a41a027 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -94,6 +94,12 @@ class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, let Inst{31} = lk; } +class IForm_ext opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, list pattern> + : IForm { + let LI{0-4} = bo; +} + // 1.7.2 B-Form class BForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr> : I { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index b45ada9..47f09dc 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -40,6 +40,10 @@ extern cl::opt DisablePPC64RS; using namespace llvm; +static cl:: +opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, + cl::desc("Disable analysis for CTR loops")); + PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} @@ -75,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } + +// Detect 32 -> 64-bit extensions where we may reuse the low sub-register. +bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: return false; + case PPC::EXTSW: + case PPC::EXTSW_32_64: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SubIdx = PPC::sub_32; + return true; + } +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -186,10 +206,14 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, // Branch analysis. +// Note: If the condition register is set to CTR or CTR8 then this is a +// BDNZ (imm == 1) or BDZ (imm == 0) branch. bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -221,7 +245,30 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(LastInst->getOperand(0)); Cond.push_back(LastInst->getOperand(1)); return false; + } else if (LastInst->getOpcode() == PPC::BDNZ8 || + LastInst->getOpcode() == PPC::BDNZ) { + if (!LastInst->getOperand(0).isMBB()) + return true; + if (DisableCTRLoopAnal) + return true; + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(1)); + Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, + true)); + return false; + } else if (LastInst->getOpcode() == PPC::BDZ8 || + LastInst->getOpcode() == PPC::BDZ) { + if (!LastInst->getOperand(0).isMBB()) + return true; + if (DisableCTRLoopAnal) + return true; + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, + true)); + return false; } + // Otherwise, don't know what this is. return true; } @@ -245,6 +292,34 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, Cond.push_back(SecondLastInst->getOperand(1)); FBB = LastInst->getOperand(0).getMBB(); return false; + } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 || + SecondLastInst->getOpcode() == PPC::BDNZ) && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(0).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + if (DisableCTRLoopAnal) + return true; + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(1)); + Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, + true)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 || + SecondLastInst->getOpcode() == PPC::BDZ) && + LastInst->getOpcode() == PPC::B) { + if (!SecondLastInst->getOperand(0).isMBB() || + !LastInst->getOperand(0).isMBB()) + return true; + if (DisableCTRLoopAnal) + return true; + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR, + true)); + FBB = LastInst->getOperand(0).getMBB(); + return false; } // If the block ends with two PPC:Bs, handle it. The second one is not @@ -273,7 +348,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 0; --I; } - if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC) + if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && + I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 0; // Remove the branch. @@ -283,7 +360,9 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I == MBB.begin()) return 1; --I; - if (I->getOpcode() != PPC::BCC) + if (I->getOpcode() != PPC::BCC && + I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ && + I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ) return 1; // Remove the branch. @@ -301,10 +380,16 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "PPC branch conditions have two components!"); + bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); + else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) + BuildMI(&MBB, DL, get(Cond[0].getImm() ? + (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); else // Conditional branch BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); @@ -312,8 +397,13 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(PPC::BCC)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); + if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) + BuildMI(&MBB, DL, get(Cond[0].getImm() ? + (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : + (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB); + else + BuildMI(&MBB, DL, get(PPC::BCC)) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } @@ -354,7 +444,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const{ DebugLoc DL; - if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) .addReg(SrcReg, @@ -370,7 +460,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) .addReg(SrcReg, @@ -386,17 +476,17 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); - } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) @@ -438,7 +528,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } - } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should // not cause any bug. If we need other uses of CR bits, the following @@ -470,9 +560,9 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - PPC::CRRCRegisterClass, NewMIs); + &PPC::CRRCRegClass, NewMIs); - } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // STVX VAL, 0, R0 @@ -522,7 +612,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs)const{ - if (PPC::GPRCRegisterClass->hasSubClassEq(RC)) { + if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), DestReg), FrameIdx)); @@ -531,7 +621,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::R11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); } - } else if (PPC::G8RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR8) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), FrameIdx)); @@ -540,13 +630,13 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::X11), FrameIdx)); NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); } - } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); - } else if (PPC::F4RCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); - } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, @@ -578,7 +668,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, PPC::MTCRF8 : PPC::MTCRF), DestReg) .addReg(ScratchReg)); } - } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || @@ -607,9 +697,9 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - PPC::CRRCRegisterClass, NewMIs); + &PPC::CRRCRegClass, NewMIs); - } else if (PPC::VRRCRegisterClass->hasSubClassEq(RC)) { + } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { // We don't have indexed addressing for vector loads. Emit: // R0 = ADDI FI# // Dest = LVX 0, R0 @@ -665,8 +755,11 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); - // Leave the CR# the same, but invert the condition. - Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); + if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR) + Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0); + else + // Leave the CR# the same, but invert the condition. + Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm())); return false; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7d49aa1..374213e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -92,6 +92,9 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 748486c..f57f0c9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -323,7 +323,7 @@ def memri : Operand { } def memrr : Operand { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc, ptr_rc); + let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); } def memrix : Operand { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; @@ -349,10 +349,10 @@ def ixaddr : ComplexPattern; // "std" /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern; +def xaddroff : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -438,6 +438,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), "b${cond:cc} ${cond:reg}, $dst" /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; + + let Defs = [CTR], Uses = [CTR] in { + def BDZ : IForm_ext<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst", BrB, []>; + def BDNZ : IForm_ext<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst", BrB, []>; + } } // Darwin ABI Calls. @@ -445,15 +452,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), + (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. def BLA_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), + (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; } let Uses = [CTR, RM] in { def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), + (outs), (ins), "bctrl", BrB, [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; } @@ -464,16 +471,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), + (outs), (ins calltarget:$func), "bl $func", BrB, []>; // See Pat patterns below. def BLA_SVR4 : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), + (outs), (ins aaddr:$func), "bla $func", BrB, [(PPCcall_SVR4 (i32 imm:$func))]>; } let Uses = [CTR, RM] in { def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), + (outs), (ins), "bctrl", BrB, [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; } @@ -482,18 +489,18 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi :Pseudo< (outs), - (ins calltarget:$dst, i32imm:$offset, variable_ops), + (ins calltarget:$dst, i32imm:$offset), "#TC_RETURNd $dst $offset", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops), +def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset), "#TC_RETURNa $func $offset", [(PPCtc_return (i32 imm:$func), imm:$offset)]>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in -def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops), +def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), "#TC_RETURNr $dst $offset", []>; @@ -704,6 +711,44 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), "lfd $rD, $addr", LdStLFD, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; + + +// Indexed (r+r) Loads with Update (preinc). +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lbzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhaux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LHZUX : XForm_1<31, 331, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lhzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lwzux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfsux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; + +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), + (ins memrr:$addr), + "lfdux $rD, $addr", LdStLoad, + []>, RegConstraint<"$addr.offreg = $ea_result">, + NoEncode<"$ea_result">; } } @@ -815,12 +860,49 @@ def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; - -let mayStore = 1 in { -def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStStore, - []>; -} + +def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stbux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti8 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "sthux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_truncsti16 GPRC:$rS, + ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), + (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stwux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), + (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfsux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + +def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), + (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), + "stfdux $rS, $ptroff, $ptrreg", LdStStore, + [(set ptr_rc:$ea_res, + (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, + RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; + def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, @@ -852,7 +934,10 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), - "addi $rD, $rA, $imm", IntGeneral, + "addi $rD, $rA, $imm", IntSimple, + [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; +def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), + "addi $rD, $rA, $imm", IntSimple, [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), @@ -864,7 +949,7 @@ def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), []>; } def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), - "addis $rD, $rA, $imm", IntGeneral, + "addis $rD, $rA, $imm", IntSimple, [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, @@ -881,10 +966,10 @@ def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), let isReMaterializable = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), - "li $rD, $imm", IntGeneral, + "li $rD, $imm", IntSimple, [(set GPRC:$rD, immSExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), - "lis $rD, $imm", IntGeneral, + "lis $rD, $imm", IntSimple, [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; } } @@ -899,18 +984,18 @@ def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, isDOT; def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), - "ori $dst, $src1, $src2", IntGeneral, + "ori $dst, $src1, $src2", IntSimple, [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), - "oris $dst, $src1, $src2", IntGeneral, + "oris $dst, $src1, $src2", IntSimple, [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), - "xori $dst, $src1, $src2", IntGeneral, + "xori $dst, $src1, $src2", IntSimple, [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), - "xoris $dst, $src1, $src2", IntGeneral, + "xoris $dst, $src1, $src2", IntSimple, [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; -def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral, +def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, []>; def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), "cmpwi $crD, $rA, $imm", IntCompare>; @@ -921,28 +1006,28 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), let PPC970_Unit = 1 in { // FXU Operations. def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "nand $rA, $rS, $rB", IntGeneral, + "nand $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "and $rA, $rS, $rB", IntGeneral, + "and $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "andc $rA, $rS, $rB", IntGeneral, + "andc $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "or $rA, $rS, $rB", IntGeneral, + "or $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "nor $rA, $rS, $rB", IntGeneral, + "nor $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "orc $rA, $rS, $rB", IntGeneral, + "orc $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "eqv $rA, $rS, $rB", IntGeneral, + "eqv $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), - "xor $rA, $rS, $rB", IntGeneral, + "xor $rA, $rS, $rB", IntSimple, [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "slw $rA, $rS, $rB", IntGeneral, @@ -967,10 +1052,10 @@ def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), "cntlzw $rA, $rS", IntGeneral, [(set GPRC:$rA, (ctlz GPRC:$rS))]>; def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), - "extsb $rA, $rS", IntGeneral, + "extsb $rA, $rS", IntSimple, [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), - "extsh $rA, $rS", IntGeneral, + "extsh $rA, $rS", IntSimple, [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), @@ -1115,7 +1200,7 @@ def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), PPC970_MicroCode, PPC970_Unit_CRU; def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), - "mfcr $rT, $FXM", SprMFCR>, + "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; // Instructions to manipulate FPSCR. Only long double handling uses these. @@ -1159,7 +1244,7 @@ let PPC970_Unit = 1 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit // def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "add $rT, $rA, $rB", IntGeneral, + "add $rT, $rA, $rB", IntSimple, [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; let Defs = [CARRY] in { def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), @@ -1194,7 +1279,7 @@ def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), PPC970_DGroup_Cracked; } def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "neg $rT, $rA", IntGeneral, + "neg $rT, $rA", IntSimple, [(set GPRC:$rT, (ineg GPRC:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), @@ -1226,51 +1311,43 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, + (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, + (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, + (fneg F8RC:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>, - Requires<[FPContractions]>; + [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, + (fneg F4RC:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1321,6 +1398,13 @@ let Uses = [RM] in { } let PPC970_Unit = 1 in { // FXU Operations. + def ISEL : AForm_1<31, 15, + (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + "isel $rT, $rA, $rB, $cond", IntGeneral, + []>; +} + +let PPC970_Unit = 1 in { // FXU Operations. // M-Form instructions. rotate and mask instructions. // let isCommutable = 1 in { @@ -1418,6 +1502,10 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; +def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in), + (ADDIS GPRC:$in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in), + (ADDIL GPRC:$in, tglobaltlsaddr:$g)>; def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), (ADDIS GPRC:$in, tglobaladdr:$g)>; def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), @@ -1427,14 +1515,6 @@ def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS GPRC:$in, tblockaddress:$g)>; -// Fused negative multiply subtract, alternate pattern -def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), - (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, - Requires<[FPContractions]>; -def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), - (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, - Requires<[FPContractions]>; - // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index a6528c0..aba2739 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -210,7 +210,7 @@ asm( ".text\n" ".align 2\n" ".globl PPC64CompilationCallback\n" - ".section \".opd\",\"aw\"\n" + ".section \".opd\",\"aw\",@progbits\n" ".align 3\n" "PPC64CompilationCallback:\n" ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 276edcb..19ec993 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -99,10 +99,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, MCContext &Ctx = Printer.OutContext; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - if (MO.getTargetFlags() & PPCII::MO_LO16) - RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : MCSymbolRefExpr::VK_PPC_GAS_LO16; - else if (MO.getTargetFlags() & PPCII::MO_HA16) - RefKind = isDarwin ? MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : MCSymbolRefExpr::VK_PPC_GAS_HA16; + unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK; + + switch (access) { + case PPCII::MO_HA16: RefKind = isDarwin ? + MCSymbolRefExpr::VK_PPC_DARWIN_HA16 : + MCSymbolRefExpr::VK_PPC_GAS_HA16; + break; + case PPCII::MO_LO16: RefKind = isDarwin ? + MCSymbolRefExpr::VK_PPC_DARWIN_LO16 : + MCSymbolRefExpr::VK_PPC_GAS_LO16; + break; + case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA; + break; + case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; + break; + } // FIXME: This isn't right, but we don't have a good way to express this in // the MC Level, see below. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index ef13571..ab8bf1f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -89,10 +89,17 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; } +bool +PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return requiresRegisterScavenging(MF); +} + + /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * -PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { +PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -192,6 +199,20 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } +bool +PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + switch (RC->getID()) { + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return true; + default: + return false; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -321,14 +342,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // address of new allocated space. if (LP64) { if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(Reg, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1, RegState::Define) + .addReg(PPC::X1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -342,9 +363,9 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, .addImm(maxCallFrameSize) .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill); } else { - BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) + BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1) .addReg(Reg, RegState::Kill) - .addReg(PPC::R1, RegState::Define) + .addReg(PPC::R1) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index b1e6a72..152c36d 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -35,7 +35,8 @@ public: /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. - virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + virtual const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; @@ -46,10 +47,14 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// requiresRegisterScavenging - We require a register scavenger. /// FIXME (64-bit): Should be inlined. bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 0e55313..5ca3876 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -314,12 +314,18 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, } def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6, - CR7, CR2, CR3, CR4)> { - let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)]; + CR7, CR2, CR3, CR4)>; + +// The CTR registers are not allocatable because they're used by the +// decrement-and-branch instructions, and thus need to stay live across +// multiple basic blocks. +def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> { + let isAllocatable = 0; +} +def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { + let isAllocatable = 0; } -def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>; -def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>; def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { let CopyCost = -1; diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 8c0a858..6a6ccb9 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -25,6 +25,7 @@ def VFPU : FuncUnit; // vector floating point unit //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // +def IntSimple : InstrItinClass; def IntGeneral : InstrItinClass; def IntCompare : InstrItinClass; def IntDivD : InstrItinClass; @@ -117,17 +118,17 @@ include "PPCScheduleA2.td" // // opcode itinerary class // ====== =============== -// add IntGeneral +// add IntSimple // addc IntGeneral // adde IntGeneral -// addi IntGeneral +// addi IntSimple // addic IntGeneral // addic. IntGeneral -// addis IntGeneral +// addis IntSimple // addme IntGeneral // addze IntGeneral -// and IntGeneral -// andc IntGeneral +// and IntSimple +// andc IntSimple // andi. IntGeneral // andis. IntGeneral // b BrB @@ -165,10 +166,10 @@ include "PPCScheduleA2.td" // eciwx LdStLoad // ecowx LdStLoad // eieio LdStLoad -// eqv IntGeneral -// extsb IntGeneral -// extsh IntGeneral -// extsw IntRotateD +// eqv IntSimple +// extsb IntSimple +// extsh IntSimple +// extsw IntSimple // fabs FPGeneral // fadd FPGeneral // fadds FPGeneral @@ -280,13 +281,13 @@ include "PPCScheduleA2.td" // mulld IntMulHD // mulli IntMulLI // mullw IntMulHW -// nand IntGeneral -// neg IntGeneral -// nor IntGeneral -// or IntGeneral -// orc IntGeneral -// ori IntGeneral -// oris IntGeneral +// nand IntSimple +// neg IntSimple +// nor IntSimple +// or IntSimple +// orc IntSimple +// ori IntSimple +// oris IntSimple // rfi SprRFI // rfid IntRFID // rldcl IntRotateD @@ -502,7 +503,7 @@ include "PPCScheduleA2.td" // vupklsb VecPerm // vupklsh VecPerm // vxor VecGeneral -// xor IntGeneral -// xori IntGeneral -// xoris IntGeneral +// xor IntSimple +// xori IntSimple +// xoris IntSimple // diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 419faea..cd0fb70 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -108,6 +108,15 @@ def PPC440Itineraries : ProcessorItineraries< IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], [GPR_Bypass, FPR_Bypass], [ + InstrItinData, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, @@ -373,26 +382,6 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1, DISS2]>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<2, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [PDCD1, PDCD2]>, - InstrStage<1, [DISS1]>, - InstrStage<1, [IRACC], 0>, - InstrStage<4, [LWARX_Hold], 0>, - InstrStage<1, [LRACC]>, - InstrStage<1, [AGEN]>, - InstrStage<1, [CRD]>, - InstrStage<1, [LWB]>], - [8, 5], - [NoBypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td index 857ba40..4d4a5d0 100644 --- a/lib/Target/PowerPC/PPCScheduleA2.td +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -60,6 +60,17 @@ def PPCA2Itineraries : ProcessorItineraries< IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -159,6 +170,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -181,6 +203,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [10, 7, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -269,6 +302,17 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [14, 7], [GPR_Bypass, GPR_Bypass]>, + InstrItinData, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, @@ -379,28 +423,6 @@ def PPCA2Itineraries : ProcessorItineraries< InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], [26, 7], [NoBypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [13, 7], - [GPR_Bypass, GPR_Bypass]>, - InstrItinData, - InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, - IU4_4, IU4_5, IU4_6, IU4_7]>, - InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, - InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, - InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, - InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, - InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], - [26, 7], - [NoBypass, GPR_Bypass]>, InstrItinData, InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index bc926f7..61e89ed 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -14,6 +14,7 @@ def G3Itineraries : ProcessorItineraries< [IU1, IU2, FPU1, BPU, SRU, SLU], [], [ + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index f7ec1e0..e19ddfa 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -13,6 +13,7 @@ def G4Itineraries : ProcessorItineraries< [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [ + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 37ebfc5..e7446cb 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -16,6 +16,7 @@ def IU4 : FuncUnit; // integer unit 4 (7450 simple) def G4PlusItineraries : ProcessorItineraries< [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [ + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index d1e40ce..1371499 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -13,6 +13,7 @@ def G5Itineraries : ProcessorItineraries< [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [ + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index f405b47..bb193ac 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" #include @@ -25,56 +26,19 @@ using namespace llvm; -#if defined(__APPLE__) -#include -#include -#include -#include - -/// GetCurrentPowerPCFeatures - Returns the current CPUs features. -static const char *GetCurrentPowerPCCPU() { - host_basic_info_data_t hostInfo; - mach_msg_type_number_t infoCount; - - infoCount = HOST_BASIC_INFO_COUNT; - host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, - &infoCount); - - if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; - - switch(hostInfo.cpu_subtype) { - case CPU_SUBTYPE_POWERPC_601: return "601"; - case CPU_SUBTYPE_POWERPC_602: return "602"; - case CPU_SUBTYPE_POWERPC_603: return "603"; - case CPU_SUBTYPE_POWERPC_603e: return "603e"; - case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; - case CPU_SUBTYPE_POWERPC_604: return "604"; - case CPU_SUBTYPE_POWERPC_604e: return "604e"; - case CPU_SUBTYPE_POWERPC_620: return "620"; - case CPU_SUBTYPE_POWERPC_750: return "750"; - case CPU_SUBTYPE_POWERPC_7400: return "7400"; - case CPU_SUBTYPE_POWERPC_7450: return "7450"; - case CPU_SUBTYPE_POWERPC_970: return "970"; - default: ; - } - - return "generic"; -} -#endif - - PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) : PPCGenSubtargetInfo(TT, CPU, FS) , StackAlignment(16) , DarwinDirective(PPC::DIR_NONE) - , IsGigaProcessor(false) + , HasMFOCRF(false) , Has64BitSupport(false) , Use64BitRegs(false) , IsPPC64(is64Bit) , HasAltivec(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasISEL(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) @@ -84,9 +48,10 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; -#if defined(__APPLE__) +#if (defined(__APPLE__) || defined(__linux__)) && \ + (defined(__ppc__) || defined(__powerpc__)) if (CPUName == "generic") - CPUName = GetCurrentPowerPCCPU(); + CPUName = sys::getHostCPUName(); #endif // Parse features string. @@ -146,10 +111,14 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2) - Mode = TargetSubtargetInfo::ANTIDEP_ALL; - else - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here, + // but we can't because we can't reassign the cr registers. There is a + // dependence between the cr register and the RLWINM instruction used + // to extract its value which the anti-dependency breaker can't currently + // see. Maybe we should make a late-expanded pseudo to encode this dependency. + // (the relevant code is in PPCDAGToDAGISel::SelectSETCC) + + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); @@ -157,6 +126,9 @@ bool PPCSubtarget::enablePostRAScheduler( CriticalPathRCs.push_back(&PPC::G8RCRegClass); else CriticalPathRCs.push_back(&PPC::GPRCRegClass); + + CriticalPathRCs.push_back(&PPC::F8RCRegClass); + CriticalPathRCs.push_back(&PPC::VRRCRegClass); return OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index a275029..0207c83 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -41,6 +41,8 @@ namespace PPC { DIR_750, DIR_970, DIR_A2, + DIR_PWR6, + DIR_PWR7, DIR_64 }; } @@ -61,13 +63,14 @@ protected: unsigned DarwinDirective; /// Used by the ISel to turn in optimizations for POWER4-derived architectures - bool IsGigaProcessor; + bool HasMFOCRF; bool Has64BitSupport; bool Use64BitRegs; bool IsPPC64; bool HasAltivec; bool HasFSQRT; bool HasSTFIWX; + bool HasISEL; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -138,7 +141,8 @@ public: bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } - bool isGigaProcessor() const { return IsGigaProcessor; } + bool hasMFOCRF() const { return HasMFOCRF; } + bool hasISEL() const { return HasISEL; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 50f3db8..9805112 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -17,10 +17,15 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; +static cl:: +opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, + cl::desc("Disable CTR loops for PPC")); + extern "C" void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(ThePPC32Target); @@ -81,41 +86,37 @@ public: return getTM(); } + virtual bool addPreRegAlloc(); virtual bool addInstSelector(); virtual bool addPreEmitPass(); }; } // namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { - TargetPassConfig *PassConfig = new PPCPassConfig(this, PM); + return new PPCPassConfig(this, PM); +} - // Override this for PowerPC. Tail merging happily breaks up instruction issue - // groups, which typically degrades performance. - PassConfig->setEnableTailMerge(false); +bool PPCPassConfig::addPreRegAlloc() { + if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) + addPass(createPPCCTRLoops()); - return PassConfig; + return false; } bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createPPCISelDag(getPPCTargetMachine())); + addPass(createPPCISelDag(getPPCTargetMachine())); return false; } bool PPCPassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. - PM->add(createPPCBranchSelectionPass()); + addPass(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { - // FIXME: This should be moved to TargetJITInfo!! - if (Subtarget.isPPC64()) - // Temporary workaround for the inability of PPC64 JIT to handle jump - // tables. - Options.DisableJumpTables = true; - // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? Subtarget.SetJITMode(); diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 349cd89..b6763aa 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -2,7 +2,6 @@ TODO: * gpr0 allocation -* implement do-loop -> bdnz transform * lmw/stmw pass a la arm load store optimizer for prolog/epilog ===-------------------------------------------------------------------------=== diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile index a101aa4..2d0560d 100644 --- a/lib/Target/PowerPC/TargetInfo/Makefile +++ b/lib/Target/PowerPC/TargetInfo/Makefile @@ -10,6 +10,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMPowerPCInfo # Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 093255e..cbfa4cf 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -964,6 +964,12 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +unsigned f(unsigned x) { return ((x & 7) + 1) & 15; } +The & 15 part should be optimized away, it doesn't change the result. Currently +not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index ae4af0f..efb10db 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -23,5 +23,7 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) +add_dependencies(LLVMSparcCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 883aa3a..7bf8c3f 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -279,14 +279,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, //returns true if the Reg or its alias is in the RegSet. bool Filler::IsRegInSet(SmallSet& RegSet, unsigned Reg) { - if (RegSet.count(Reg)) - return true; - // check Aliased Registers - for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg); - *Alias; ++ Alias) - if (RegSet.count(*Alias)) + // Check Reg and all aliased Registers. + for (MCRegAliasIterator AI(Reg, TM.getRegisterInfo(), true); + AI.isValid(); ++AI) + if (RegSet.count(*AI)) return true; - return false; } diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index c14b3d4..2554862 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -187,7 +187,9 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, if (ExtraCode[1] != 0) return true; // Unknown modifier. switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'r': break; } diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 210705e..6b593c9 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -22,10 +22,9 @@ namespace llvm { class SparcSubtarget; class SparcFrameLowering : public TargetFrameLowering { - const SparcSubtarget &STI; public: - explicit SparcFrameLowering(const SparcSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) { + explicit SparcFrameLowering(const SparcSubtarget &/*sti*/) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) { } /// emitProlog/emitEpilog - These methods insert prolog and epilog code into diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index c3e6f16..79f7ebd 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -90,7 +90,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), RVLocs, *DAG.getContext()); + DAG.getTarget(), RVLocs, *DAG.getContext()); // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); @@ -160,7 +160,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32); const unsigned StackOffset = 92; @@ -345,21 +345,26 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, } SDValue -SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // Sparc target does not yet support tail call optimization. isTailCall = false; // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), ArgLocs, *DAG.getContext()); + DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); // Get the size of the outgoing arguments stack space requirement. @@ -590,7 +595,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), RVLocs, *DAG.getContext()); + DAG.getTarget(), RVLocs, *DAG.getContext()); RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32); @@ -689,9 +694,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. - addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); - addRegisterClass(MVT::f32, SP::FPRegsRegisterClass); - addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); + addRegisterClass(MVT::i32, &SP::IntRegsRegClass); + addRegisterClass(MVT::f32, &SP::FPRegsRegClass); + addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); // Turn FP extload into load/fextend setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); @@ -1259,7 +1264,7 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, SP::IntRegsRegisterClass); + return std::make_pair(0U, &SP::IntRegsRegClass); } } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index cf43048..09148ea 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -76,12 +76,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index faff468..f8674d0 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -303,13 +303,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == SP::IntRegsRegisterClass) + if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); - else if (RC == SP::FPRegsRegisterClass) + else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); - else if (RC == SP::DFPRegsRegisterClass) + else if (RC == &SP::DFPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); else @@ -324,11 +324,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == SP::IntRegsRegisterClass) + if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0); - else if (RC == SP::FPRegsRegisterClass) + else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0); - else if (RC == SP::DFPRegsRegisterClass) + else if (RC == &SP::DFPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0); else llvm_unreachable("Can't load this register from stack slot"); diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 6357468..ff8d3c5 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void SparcRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} - unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index cc25307..9ee12ed 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -34,7 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), - TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), + InstrInfo(Subtarget), + TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { } @@ -59,7 +60,7 @@ TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { } bool SparcPassConfig::addInstSelector() { - PM->add(createSparcISelDag(getSparcTargetMachine())); + addPass(createSparcISelDag(getSparcTargetMachine())); return false; } @@ -67,8 +68,8 @@ bool SparcPassConfig::addInstSelector() { /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. bool SparcPassConfig::addPreEmitPass(){ - PM->add(createSparcFPMoverPass(getSparcTargetMachine())); - PM->add(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createSparcFPMoverPass(getSparcTargetMachine())); + addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); return true; } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index b203dfa..b2cc624 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -28,9 +28,9 @@ namespace llvm { class SparcTargetMachine : public LLVMTargetMachine { SparcSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment + SparcInstrInfo InstrInfo; SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; - SparcInstrInfo InstrInfo; SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, StringRef TT, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index acb7476..cc6dc1e 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -117,8 +117,8 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const { && TypeBitWidth == rhs.TypeBitWidth); } -const TargetAlignElem TargetData::InvalidAlignmentElem = - TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0); +const TargetAlignElem +TargetData::InvalidAlignmentElem = { (AlignTypeEnum)0xFF, 0, 0, 0 }; //===----------------------------------------------------------------------===// // TargetData Class Implementation diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 440f9ad..f1d1d07 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -21,20 +21,25 @@ using namespace llvm; //===----------------------------------------------------------------------===// // TargetInstrInfo -//===----------------------------------------------------------------------===// +// +// Methods that depend on CodeGen are implemented in +// TargetInstrInfoImpl.cpp. Invoking them without linking libCodeGen raises a +// link error. +// ===----------------------------------------------------------------------===// TargetInstrInfo::~TargetInstrInfo() { } const TargetRegisterClass* TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const { if (OpNum >= MCID.getNumOperands()) return 0; short RegClass = MCID.OpInfo[OpNum].RegClass; if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) - return TRI->getPointerRegClass(RegClass); + return TRI->getPointerRegClass(MF, RegClass); // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) @@ -44,54 +49,6 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, return TRI->getRegClass(RegClass); } -unsigned -TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - unsigned Class = MI->getDesc().getSchedClass(); - unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps) - return UOps; - - // The # of u-ops is dynamically determined. The specific target should - // override this function to return the right number. - return 1; -} - -int -TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - if (!ItinData || ItinData->isEmpty()) - return -1; - - unsigned DefClass = DefMI->getDesc().getSchedClass(); - unsigned UseClass = UseMI->getDesc().getSchedClass(); - return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); -} - -int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - return ItinData->getStageLatency(MI->getDesc().getSchedClass()); -} - -bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, - unsigned DefIdx) const { - if (!ItinData || ItinData->isEmpty()) - return false; - - unsigned DefClass = DefMI->getDesc().getSchedClass(); - int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); - return (DefCycle != -1 && DefCycle <= 1); -} - /// insertNoop - Insert a noop into the instruction stream at the specified /// point. void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, @@ -99,7 +56,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, llvm_unreachable("Target didn't implement insertNoop!"); } - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index ec95ad4..8e215a7 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "__cxa_atexit", + "__cxa_guard_abort", + "__cxa_guard_acquire", + "__cxa_guard_release", + "__memcpy_chk", "acos", - "acosl", "acosf", + "acosl", "asin", - "asinl", "asinf", + "asinl", "atan", - "atanl", - "atanf", "atan2", - "atan2l", "atan2f", + "atan2l", + "atanf", + "atanl", "ceil", - "ceill", "ceilf", + "ceill", "copysign", "copysignf", "copysignl", "cos", - "cosl", "cosf", "cosh", - "coshl", "coshf", + "coshl", + "cosl", "exp", - "expl", - "expf", "exp2", - "exp2l", "exp2f", + "exp2l", + "expf", + "expl", "expm1", - "expm1l", "expm1f", + "expm1l", "fabs", - "fabsl", "fabsf", + "fabsl", + "fiprintf", "floor", - "floorl", "floorf", - "fiprintf", + "floorl", "fmod", - "fmodl", "fmodf", + "fmodl", + "fputc", "fputs", "fwrite", "iprintf", "log", - "logl", - "logf", - "log2", - "log2l", - "log2f", "log10", - "log10l", "log10f", + "log10l", "log1p", - "log1pl", "log1pf", + "log1pl", + "log2", + "log2f", + "log2l", + "logf", + "logl", + "memchr", + "memcmp", "memcpy", "memmove", "memset", @@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "pow", "powf", "powl", + "putchar", + "puts", "rint", "rintf", "rintl", @@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "roundf", "roundl", "sin", - "sinl", "sinf", "sinh", - "sinhl", "sinhf", + "sinhl", + "sinl", "siprintf", "sqrt", - "sqrtl", "sqrtf", + "sqrtl", + "strcat", + "strchr", + "strcpy", + "strlen", + "strncat", + "strncmp", + "strncpy", + "strnlen", "tan", - "tanl", "tanf", "tanh", - "tanhl", "tanhf", + "tanhl", + "tanl", "trunc", "truncf", - "truncl", - "__cxa_atexit", - "__cxa_guard_abort", - "__cxa_guard_acquire", - "__cxa_guard_release" + "truncl" }; /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. -static void initialize(TargetLibraryInfo &TLI, const Triple &T) { +static void initialize(TargetLibraryInfo &TLI, const Triple &T, + const char **StandardNames) { initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); +#ifndef NDEBUG + // Verify that the StandardNames array is in alphabetical order. + for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { + if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) + llvm_unreachable("TargetLibraryInfo function names must be sorted"); + } +#endif // !NDEBUG // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. if (T.isMacOSX()) { @@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, Triple()); + initialize(*this, Triple(), StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, T); + initialize(*this, T, StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) @@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) CustomNames = TLI.CustomNames; } +bool TargetLibraryInfo::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char **Start = &StandardNames[0]; + const char **End = &StandardNames[LibFunc::NumLibFuncs]; + const char **I = std::lower_bound(Start, End, funcName); + if (I != End && *I == funcName) { + F = (LibFunc::Func)(I - Start); + return true; + } + return false; +} /// disableAllFunctions - This disables all builtins, which is used for options /// like -fno-builtin. diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 2570e0d..b74a0bd 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -152,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // a mergable string section, or general .data if it contains relocations. if (GVar->isConstant()) { // If the initializer for the global contains something that requires a - // relocation, then we may have to drop this into a wriable data section + // relocation, then we may have to drop this into a writable data section // even though it is marked const. switch (C->getRelocationInfo()) { case Constant::NoRelocation: diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index b9b2526..3825719 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/GlobalAlias.h" #include "llvm/GlobalValue.h" +#include "llvm/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Target/TargetMachine.h" @@ -75,25 +77,58 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +/// Get the IR-specified TLS model for Var. +static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { + switch (Var->getThreadLocalMode()) { + case GlobalVariable::NotThreadLocal: + llvm_unreachable("getSelectedTLSModel for non-TLS variable"); + break; + case GlobalVariable::GeneralDynamicTLSModel: + return TLSModel::GeneralDynamic; + case GlobalVariable::LocalDynamicTLSModel: + return TLSModel::LocalDynamic; + case GlobalVariable::InitialExecTLSModel: + return TLSModel::InitialExec; + case GlobalVariable::LocalExecTLSModel: + return TLSModel::LocalExec; + } + llvm_unreachable("invalid TLS model"); +} + TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast(GV)) + GV = GA->resolveAliasedGlobal(false); + const GlobalVariable *Var = cast(GV); + + bool isLocal = Var->hasLocalLinkage(); + bool isDeclaration = Var->isDeclaration(); + bool isPIC = getRelocationModel() == Reloc::PIC_; + bool isPIE = Options.PositionIndependentExecutable; // FIXME: what should we do for protected and internal visibility? // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); + bool isHidden = Var->hasHiddenVisibility(); - if (getRelocationModel() == Reloc::PIC_ && - !Options.PositionIndependentExecutable) { + TLSModel::Model Model; + if (isPIC && !isPIE) { if (isLocal || isHidden) - return TLSModel::LocalDynamic; + Model = TLSModel::LocalDynamic; else - return TLSModel::GeneralDynamic; + Model = TLSModel::GeneralDynamic; } else { if (!isDeclaration || isHidden) - return TLSModel::LocalExec; + Model = TLSModel::LocalExec; else - return TLSModel::InitialExec; + Model = TLSModel::InitialExec; } + + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(Var); + if (SelectedModel > Model) + return SelectedModel; + + return Model; } /// getOptLevel - Returns the optimization level: None, Less, @@ -127,4 +162,3 @@ void TargetMachine::setFunctionSections(bool V) { void TargetMachine::setDataSections(bool V) { DataSections = V; } - diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 1716423..2395f2b 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -46,6 +46,50 @@ void PrintReg::print(raw_ostream &OS) const { } } +void PrintRegUnit::print(raw_ostream &OS) const { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } + + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } + + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); +} + +/// getAllocatableClass - Return the maximal subclass of the given register +/// class that is alloctable, or NULL. +const TargetRegisterClass * +TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { + if (!RC || RC->isAllocatable()) + return RC; + + const unsigned *SubClass = RC->getSubClassMask(); + for (unsigned Base = 0, BaseE = getNumRegClasses(); + Base < BaseE; Base += 32) { + unsigned Idx = Base; + for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { + unsigned Offset = CountTrailingZeros_32(Mask); + const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); + if (SubRC->isAllocatable()) + return SubRC; + Mask >>= Offset; + Idx += Offset + 1; + } + } + return NULL; +} + /// getMinimalPhysRegClass - Returns the Register Class of a physical /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. @@ -71,6 +115,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { /// registers for the specific register class. static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ + assert(RC->isAllocatable() && "invalid for nonallocatable sets"); ArrayRef Order = RC->getRawAllocationOrder(MF); for (unsigned i = 0; i != Order.size(); ++i) R.set(Order[i]); @@ -80,7 +125,10 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC) const { BitVector Allocatable(getNumRegs()); if (RC) { - getAllocatableSetForRC(MF, RC, Allocatable); + // A register class with no allocatable subclass returns an empty set. + const TargetRegisterClass *SubClass = getAllocatableClass(RC); + if (SubClass) + getAllocatableSetForRC(MF, SubClass, Allocatable); } else { for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I) @@ -95,6 +143,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, return Allocatable; } +static inline +const TargetRegisterClass *firstCommonClass(const uint32_t *A, + const uint32_t *B, + const TargetRegisterInfo *TRI) { + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return 0; +} + const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B) const { @@ -106,15 +164,83 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - const unsigned *SubA = A->getSubClassMask(); - const unsigned *SubB = B->getSubClassMask(); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); +} - // We could start the search from max(A.ID, B.ID), but we are only going to - // execute 2-3 iterations anyway. - for (unsigned Base = 0, BaseE = getNumRegClasses(); Base < BaseE; Base += 32) - if (unsigned Common = *SubA++ & *SubB++) - return getRegClass(Base + CountTrailingZeros_32(Common)); +const TargetRegisterClass * +TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned Idx) const { + assert(A && B && "Missing register class"); + assert(Idx && "Bad sub-register index"); + + // Find Idx in the list of super-register indices. + for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI) + if (RCI.getSubReg() == Idx) + // The bit mask contains all register classes that are projected into B + // by Idx. Find a class that is also a sub-class of A. + return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); + return 0; +} - // No common sub-class exists. - return NULL; +const TargetRegisterClass *TargetRegisterInfo:: +getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const { + assert(RCA && SubA && RCB && SubB && "Invalid arguments"); + + // Search all pairs of sub-register indices that project into RCA and RCB + // respectively. This is quadratic, but usually the sets are very small. On + // most targets like X86, there will only be a single sub-register index + // (e.g., sub_16bit projecting into GR16). + // + // The worst case is a register class like DPR on ARM. + // We have indices dsub_0..dsub_7 projecting into that class. + // + // It is very common that one register class is a sub-register of the other. + // Arrange for RCA to be the larger register so the answer will be found in + // the first iteration. This makes the search linear for the most common + // case. + const TargetRegisterClass *BestRC = 0; + unsigned *BestPreA = &PreA; + unsigned *BestPreB = &PreB; + if (RCA->getSize() < RCB->getSize()) { + std::swap(RCA, RCB); + std::swap(SubA, SubB); + std::swap(BestPreA, BestPreB); + } + + // Also terminate the search one we have found a register class as small as + // RCA. + unsigned MinSize = RCA->getSize(); + + for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { + unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); + for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) { + // Check if a common super-register class exists for this index pair. + const TargetRegisterClass *RC = + firstCommonClass(IA.getMask(), IB.getMask(), this); + if (!RC || RC->getSize() < MinSize) + continue; + + // The indexes must compose identically: PreA+SubA == PreB+SubB. + unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB); + if (FinalA != FinalB) + continue; + + // Is RC a better candidate than BestRC? + if (BestRC && RC->getSize() >= BestRC->getSize()) + continue; + + // Yes, RC is the smallest super-register seen so far. + BestRC = RC; + *BestPreA = IA.getSubReg(); + *BestPreB = IB.getSubReg(); + + // Bail early if we reached MinSize. We won't find a better candidate. + if (BestRC->getSize() == MinSize) + return BestRC; + } + } + return BestRC; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c..fbbaa9500 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -65,6 +65,10 @@ private: SmallVectorImpl &Operands, MCStreamer &Out); + bool MatchInstruction(SMLoc IDLoc, + SmallVectorImpl &Operands, + SmallVectorImpl &MCInsts); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -117,7 +121,7 @@ static unsigned MatchRegisterName(StringRef Name); /// } -static bool isImmSExti16i8Value(uint64_t Value) { +static bool isImmSExti16i8Value(uint64_t Value) { return (( Value <= 0x000000000000007FULL)|| (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); @@ -135,12 +139,12 @@ static bool isImmZExtu32u8Value(uint64_t Value) { static bool isImmSExti64i8Value(uint64_t Value) { return (( Value <= 0x000000000000007FULL)|| - (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } static bool isImmSExti64i32Value(uint64_t Value) { return (( Value <= 0x000000007FFFFFFFULL)|| - (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } namespace { @@ -187,7 +191,7 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } - + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } virtual void print(raw_ostream &OS) const {} @@ -309,28 +313,45 @@ struct X86Operand : public MCParsedAsmOperand { } bool isMem() const { return Kind == Memory; } - bool isMem8() const { + bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); } - bool isMem16() const { + bool isMem16() const { return Kind == Memory && (!Mem.Size || Mem.Size == 16); } - bool isMem32() const { + bool isMem32() const { return Kind == Memory && (!Mem.Size || Mem.Size == 32); } - bool isMem64() const { + bool isMem64() const { return Kind == Memory && (!Mem.Size || Mem.Size == 64); } - bool isMem80() const { + bool isMem80() const { return Kind == Memory && (!Mem.Size || Mem.Size == 80); } - bool isMem128() const { + bool isMem128() const { return Kind == Memory && (!Mem.Size || Mem.Size == 128); } - bool isMem256() const { + bool isMem256() const { return Kind == Memory && (!Mem.Size || Mem.Size == 256); } + bool isMemVX32() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; + } + bool isMemVY32() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 32) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; + } + bool isMemVX64() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::XMM0 && getMemIndexReg() <= X86::XMM15; + } + bool isMemVY64() const { + return Kind == Memory && (!Mem.Size || Mem.Size == 64) && + getMemIndexReg() >= X86::YMM0 && getMemIndexReg() <= X86::YMM15; + } + bool isAbsMem() const { return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && !getMemIndexReg() && getMemScale() == 1; @@ -356,26 +377,38 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addMem8Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem8Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem16Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMem64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem16Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem80Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem32Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem128Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem64Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMem256Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem80Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMemVX32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem128Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMemVY32Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } - void addMem256Operands(MCInst &Inst, unsigned N) const { - addMemOperands(Inst, N); + void addMemVX64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); + } + void addMemVY64Operands(MCInst &Inst, unsigned N) const { + addMemOperands(Inst, N); } void addMemOperands(MCInst &Inst, unsigned N) const { @@ -467,7 +500,7 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && + return Op.isMem() && (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa(Op.Mem.Disp) && cast(Op.Mem.Disp)->getValue() == 0 && @@ -611,7 +644,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); Parser.Lex(); - + if (getLexer().is(AsmToken::Identifier)) { // Parse BaseReg if (ParseRegister(BaseReg, Start, End)) { @@ -638,11 +671,11 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // Handle '[' Scale*IndexReg ']' Parser.Lex(); SMLoc IdxRegLoc = Parser.getTok().getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; } else - return ErrorOperand(Loc, "Unepxeted token"); + return ErrorOperand(Loc, "Unexpected token"); } if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) { @@ -655,8 +688,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().is(AsmToken::Star)) { Parser.Lex(); SMLoc IdxRegLoc = Parser.getTok().getLoc(); - if (ParseRegister(IndexReg, IdxRegLoc, End)) - return ErrorOperand(IdxRegLoc, "Expected register"); + if (ParseRegister(IndexReg, IdxRegLoc, End)) + return ErrorOperand(IdxRegLoc, "Expected register"); Scale = Val; } else if (getLexer().is(AsmToken::RBrac)) { const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext()); @@ -668,7 +701,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, End = Parser.getTok().getLoc(); if (!IndexReg) ParseRegister(IndexReg, Start, End); - else if (getParser().ParseExpression(Disp, End)) return 0; + else if (getParser().ParseExpression(Disp, End)) return 0; } } @@ -881,7 +914,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getParser().ParseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); return 0; - } + } // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ @@ -916,15 +949,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. if (BaseReg != 0 && IndexReg != 0) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && IndexReg != X86::RIZ) { Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); return 0; } if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && IndexReg != X86::EIZ){ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); return 0; @@ -944,7 +980,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if (PatchedName.startswith("set") && PatchedName.endswith("b") && PatchedName != "setb" && PatchedName != "setnb") PatchedName = PatchedName.substr(0, Name.size()-1); - + // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && @@ -1204,20 +1240,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, // Intel syntax X86Operand *Op1 = static_cast(Operands[2]); if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[2]; - Operands.pop_back(); + cast(Op1->getImm())->getValue() == 1) { + delete Operands[2]; + Operands.pop_back(); } } else { X86Operand *Op1 = static_cast(Operands[1]); if (Op1->isImm() && isa(Op1->getImm()) && - cast(Op1->getImm())->getValue() == 1) { - delete Operands[1]; - Operands.erase(Operands.begin() + 1); + cast(Op1->getImm())->getValue() == 1) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); } } } - + // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { @@ -1476,6 +1512,18 @@ bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl &Operands, MCStreamer &Out) { + SmallVector Insts; + bool Error = MatchInstruction(IDLoc, Operands, Insts); + if (!Error) + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + Out.EmitInstruction(Insts[i]); + return Error; +} + +bool X86AsmParser:: +MatchInstruction(SMLoc IDLoc, + SmallVectorImpl &Operands, + SmallVectorImpl &MCInsts) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); @@ -1491,7 +1539,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); const char *Repl = StringSwitch(Op->getToken()) @@ -1520,12 +1568,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, case Match_Success: // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the - // individual transformations can chain off each other. + // individual transformations can chain off each other. while (processInstruction(Inst, Operands)) ; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; case Match_MissingFeature: Error(IDLoc, "instruction requires a CPU feature not currently enabled"); @@ -1558,12 +1606,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, // Otherwise, we assume that this may be an integer instruction, which comes // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; - + // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; unsigned Match1, Match2, Match3, Match4; - + Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); Tmp[Base.size()] = Suffixes[1]; Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore); @@ -1583,7 +1631,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; } @@ -1673,10 +1721,10 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { getParser().setAssemblerDialect(1); if (getLexer().isNot(AsmToken::EndOfStatement)) { if(Parser.getTok().getString() == "noprefix") { - // FIXME : Handle noprefix - Parser.Lex(); + // FIXME : Handle noprefix + Parser.Lex(); } else - return true; + return true; } return false; } @@ -1691,19 +1739,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { const MCExpr *Value; if (getParser().ParseExpression(Value)) return true; - + getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/); - + if (getLexer().is(AsmToken::EndOfStatement)) break; - + // FIXME: Improve diagnostic. if (getLexer().isNot(AsmToken::Comma)) return Error(L, "unexpected token in directive"); Parser.Lex(); } } - + Parser.Lex(); return false; } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index f612e23..b886d46 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -52,6 +52,8 @@ endif() add_llvm_target(X86CodeGen ${sources}) +add_dependencies(LLVMX86CodeGen intrinsics_gen) + add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8278bde..5039887 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, OperandType type = (OperandType)operand.type; + bool isBranch = false; + uint64_t pcrel = 0; if (type == TYPE_RELv) { + isBranch = true; + pcrel = insn.startLocation + + insn.immediateOffset + insn.immediateSize; switch (insn.displacementSize) { default: break; @@ -351,15 +356,15 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, // Special case those X86 instructions that use the imm8 as a set of // bits, bit count, etc. and are not sign-extend. if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && - Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && - Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && - Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && - Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && - Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && - Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && - Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && - Opcode != X86::VINSERTPSrr) - type = TYPE_MOFFS8; + Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && + Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && + Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && + Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && + Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && + Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && + Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && + Opcode != X86::VINSERTPSrr) + type = TYPE_MOFFS8; break; case ENCODING_IW: type = TYPE_MOFFS16; @@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } - bool isBranch = false; - uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); @@ -495,7 +498,38 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { baseReg = MCOperand::CreateReg(0); } - + + // Check whether we are handling VSIB addressing mode for GATHER. + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and + // we should use SIB_INDEX_XMM4|YMM4 for VSIB. + // I don't see a way to get the correct IndexReg in readSIB: + // We can tell whether it is VSIB or SIB after instruction ID is decoded, + // but instruction ID may not be decoded yet when calling readSIB. + uint32_t Opcode = mcInst.getOpcode(); + bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm || + Opcode == X86::VPGATHERDQrm || + Opcode == X86::VPGATHERDQYrm || + Opcode == X86::VPGATHERQQrm || + Opcode == X86::VPGATHERDDrm || + Opcode == X86::VPGATHERQDrm); + bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm || + Opcode == X86::VPGATHERQQYrm || + Opcode == X86::VPGATHERDDYrm || + Opcode == X86::VPGATHERQDYrm); + if (IndexIs128 || IndexIs256) { + unsigned IndexOffset = insn.sibIndex - + (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); + SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + insn.sibIndex = (SIBIndex)(IndexBase + + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); + } + if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -506,6 +540,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, indexReg = MCOperand::CreateReg(X86::x); break; EA_BASES_32BIT EA_BASES_64BIT + REGS_XMM + REGS_YMM #undef ENTRY } } else { @@ -726,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateRegister(mcInst, insn.vvvv); return false; case ENCODING_DUP: - return translateOperand(mcInst, - insn.spec->operands[operand.type - TYPE_DUP0], + return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], insn, Dis); } } @@ -753,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst, insn.numImmediatesTranslated = 0; for (index = 0; index < X86_MAX_OPERANDS; ++index) { - if (insn.spec->operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) { + if (insn.operands[index].encoding != ENCODING_NONE) { + if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { return true; } } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index c11f51c..0dbfa26 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -20,7 +20,7 @@ // 2. Read the opcode, and determine what kind of opcode it is. The // disassembler distinguishes four kinds of opcodes, which are enumerated in // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte -// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a +// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. // // 3. Depending on the opcode type, look in one of four ClassDecision structures @@ -74,8 +74,8 @@ #ifndef X86DISASSEMBLER_H #define X86DISASSEMBLER_H -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; @@ -88,7 +88,7 @@ #include "llvm/MC/MCDisassembler.h" namespace llvm { - + class MCInst; class MCInstrInfo; class MCSubtargetInfo; @@ -96,7 +96,7 @@ class MemoryObject; class raw_ostream; struct EDInstInfo; - + namespace X86Disassembler { /// X86GenericDisassembler - Generic disassembler for all X86 platforms. diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 6020877..0c92912 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { - switch (insn->spec->operands[index].encoding) { + switch (x86OperandSets[insn->spec->operands][index].encoding) { case ENCODING_NONE: break; case ENCODING_REG: case ENCODING_RM: if (readModRM(insn)) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_CB: @@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) { } if (readImmediate(insn, 1)) return -1; - if (insn->spec->operands[index].type == TYPE_IMM3 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; - if (insn->spec->operands[index].type == TYPE_IMM5 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && insn->immediates[insn->numImmediatesConsumed - 1] > 31) return -1; - if (insn->spec->operands[index].type == TYPE_XMM128 || - insn->spec->operands[index].type == TYPE_XMM256) + if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || + x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) sawRegImm = 1; break; case ENCODING_IW: @@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = 0; /* Mark that we have found a VVVV operand. */ if (!hasVVVV) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_DUP: @@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn, insn->instructionID == 0 || readOperands(insn)) return -1; + + insn->operands = &x86OperandSets[insn->spec->operands][0]; insn->length = insn->readerCursor - insn->startLocation; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b..797703f 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -19,17 +19,18 @@ #ifdef __cplusplus extern "C" { #endif - -#define INSTRUCTION_SPECIFIER_FIELDS + +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" - + #undef INSTRUCTION_SPECIFIER_FIELDS #undef INSTRUCTION_IDS - + /* * Accessor functions for various fields of an Intel instruction */ @@ -43,7 +44,7 @@ extern "C" { #define rFromREX(rex) (((rex) & 0x4) >> 2) #define xFromREX(rex) (((rex) & 0x2) >> 1) #define bFromREX(rex) ((rex) & 0x1) - + #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) @@ -237,7 +238,7 @@ extern "C" { ENTRY(YMM13) \ ENTRY(YMM14) \ ENTRY(YMM15) - + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -245,7 +246,7 @@ extern "C" { ENTRY(DS) \ ENTRY(FS) \ ENTRY(GS) - + #define REGS_DEBUG \ ENTRY(DR0) \ ENTRY(DR1) \ @@ -266,12 +267,12 @@ extern "C" { ENTRY(CR6) \ ENTRY(CR7) \ ENTRY(CR8) - + #define ALL_EA_BASES \ EA_BASES_16BIT \ EA_BASES_32BIT \ EA_BASES_64BIT - + #define ALL_SIB_BASES \ REGS_32BIT \ REGS_64BIT @@ -290,7 +291,7 @@ extern "C" { ENTRY(RIP) /* - * EABase - All possible values of the base field for effective-address + * EABase - All possible values of the base field for effective-address * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We * distinguish between bases (EA_BASE_*) and registers that just happen to be * referred to when Mod == 0b11 (EA_REG_*). @@ -305,20 +306,23 @@ typedef enum { #undef ENTRY EA_max } EABase; - -/* + +/* * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM. */ typedef enum { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM #undef ENTRY SIB_INDEX_max } SIBIndex; - + /* * SIBBase - All possible values of the SIB base field. */ @@ -350,7 +354,7 @@ typedef enum { #undef ENTRY MODRM_REG_max } Reg; - + /* * SegmentOverride - All possible segment overrides. */ @@ -364,7 +368,7 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; - + /* * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field */ @@ -428,16 +432,16 @@ struct InternalInstruction { void* dlogArg; /* General instruction information */ - + /* The mode to disassemble for (64-bit, protected, real) */ DisassemblerMode mode; /* The start of the instruction, usable with the reader */ uint64_t startLocation; /* The length of the instruction, in bytes */ size_t length; - + /* Prefix state */ - + /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ @@ -453,7 +457,7 @@ struct InternalInstruction { uint64_t necessaryPrefixLocation; /* The segment override type */ SegmentOverride segmentOverride; - + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; @@ -464,9 +468,9 @@ struct InternalInstruction { needed to find relocation entries for adding symbolic operands */ uint8_t displacementOffset; uint8_t immediateOffset; - + /* opcode state */ - + /* The value of the two-byte escape prefix (usually 0x0f) */ uint8_t twoByteEscape; /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ @@ -475,16 +479,16 @@ struct InternalInstruction { uint8_t opcode; /* The ModR/M byte of the instruction, if it is an opcode extension */ uint8_t modRMExtension; - + /* decode state */ - + /* The type of opcode, used for indexing into the array of decode tables */ OpcodeType opcodeType; /* The instruction ID, extracted from the decode table */ uint16_t instructionID; /* The specifier for the instruction, from the instruction info table */ const struct InstructionSpecifier *spec; - + /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ @@ -492,12 +496,12 @@ struct InternalInstruction { /* The VEX.vvvv field, which contains a third register operand for some AVX instructions */ Reg vvvv; - + /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ BOOL consumedModRM; uint8_t modRM; - + /* The SIB byte, used for more complex 32- or 64-bit memory operands */ BOOL consumedSIB; uint8_t sib; @@ -505,19 +509,19 @@ struct InternalInstruction { /* The displacement, used for memory operands */ BOOL consumedDisplacement; int32_t displacement; - + /* Immediates. There can be two in some cases */ uint8_t numImmediatesConsumed; uint8_t numImmediatesTranslated; uint64_t immediates[2]; - + /* A register or immediate operand encoded into the opcode */ BOOL consumedOpcodeModifier; uint8_t opcodeModifier; Reg opcodeRegister; - + /* Portions of the ModR/M byte */ - + /* These fields determine the allowable values for the ModR/M fields, which depend on operand and address widths */ EABase eaBaseBase; @@ -530,11 +534,13 @@ struct InternalInstruction { EADisplacement eaDisplacement; /* The reg field always encodes a register */ Reg reg; - + /* SIB state */ SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; + + const struct OperandSpecifier *operands; }; /* decodeInstruction - Decode one instruction and store the decoding results in @@ -568,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn, * @param line - The line number that printed the debug message. * @param s - The message to print. */ - + void x86DisassemblerDebug(const char *file, unsigned line, const char *s); const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); -#ifdef __cplusplus +#ifdef __cplusplus } #endif - + #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 13e1136..b0a0e1e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -119,7 +119,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") -#define ENUM_ENTRY(n, r, d) n, +#define ENUM_ENTRY(n, r, d) n, typedef enum { INSTRUCTION_CONTEXTS IC_max @@ -148,11 +148,11 @@ typedef enum { * If a ModR/M byte is not required, "required" is left unset, and the values * for each instructionID are identical. */ - + typedef uint16_t InstrUID; /* - * ModRMDecisionType - describes the type of ModR/M decision, allowing the + * ModRMDecisionType - describes the type of ModR/M decision, allowing the * consumer to determine the number of entries in it. * * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded @@ -172,7 +172,7 @@ typedef uint16_t InstrUID; ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) -#define ENUM_ENTRY(n) n, +#define ENUM_ENTRY(n) n, typedef enum { MODRMTYPES MODRM_max @@ -180,13 +180,13 @@ typedef enum { #undef ENUM_ENTRY /* - * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which + * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which * instruction each possible value of the ModR/M byte corresponds to. Once * this information is known, we have narrowed down to a single instruction. */ struct ModRMDecision { uint8_t modrm_type; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_IDS }; @@ -210,7 +210,7 @@ struct ContextDecision { struct OpcodeDecision opcodeDecisions[IC_max]; }; -/* +/* * Physical encodings of instruction operands. */ @@ -244,14 +244,14 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { ENCODINGS ENCODING_max } OperandEncoding; #undef ENUM_ENTRY -/* +/* * Semantic interpretations of instruction operands. */ @@ -332,14 +332,14 @@ struct ContextDecision { ENUM_ENTRY(TYPE_DUP4, "operand 4") \ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { TYPES TYPE_max } OperandType; #undef ENUM_ENTRY -/* +/* * OperandSpecifier - The specification for how to extract and interpret one * operand. */ @@ -374,8 +374,7 @@ typedef enum { struct InstructionSpecifier { uint8_t modifierType; uint8_t modifierBase; - struct OperandSpecifier operands[X86_MAX_OPERANDS]; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_SPECIFIER_FIELDS }; diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index f532019..64ac5e6 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -96,7 +96,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSHUFHWmi: case X86::VPSHUFHWmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFHWMask(MVT::v8i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPSHUFHWYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPSHUFHWYmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFHWMask(MVT::v16i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFLWri: @@ -106,7 +116,17 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::PSHUFLWmi: case X86::VPSHUFLWmi: DestName = getRegName(MI->getOperand(0).getReg()); - DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + DecodePSHUFLWMask(MVT::v8i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + break; + case X86::VPSHUFLWYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPSHUFLWYmi: + DestName = getRegName(MI->getOperand(0).getReg()); + DecodePSHUFLWMask(MVT::v16i16, + MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; @@ -487,6 +507,16 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; + case X86::VPERMQYri: + case X86::VPERMPDYri: + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMQYmi: + case X86::VPERMPDYmi: + DecodeVPERMMask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index a0bb6dc..db597fb 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -94,40 +94,83 @@ namespace X86II { MO_PLT, /// MO_TLSGD - On a symbol operand this indicates that the immediate is - /// some TLS offset. + /// the offset of the GOT entry with the TLS index structure that contains + /// the module number and variable offset for the symbol. Used in the + /// general dynamic TLS access model. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD MO_TLSGD, + /// MO_TLSLD - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS index for the module that + /// contains the symbol. When this index is passed to a call to to + /// __tls_get_addr, the function will return the base address of the TLS + /// block for the symbol. Used in the x86-64 local dynamic TLS access model. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSLD + MO_TLSLD, + + /// MO_TLSLDM - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS index for the module that + /// contains the symbol. When this index is passed to a call to to + /// ___tls_get_addr, the function will return the base address of the TLS + /// block for the symbol. Used in the IA32 local dynamic TLS access model. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSLDM + MO_TLSLDM, + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. + /// the offset of the GOT entry with the thread-pointer offset for the + /// symbol. Used in the x86-64 initial exec TLS access model. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF MO_GOTTPOFF, /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. + /// the absolute address of the GOT entry with the negative thread-pointer + /// offset for the symbol. Used in the non-PIC IA32 initial exec TLS access + /// model. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF MO_INDNTPOFF, /// MO_TPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. + /// the thread-pointer offset for the symbol. Used in the x86-64 local + /// exec TLS access model. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF MO_TPOFF, + /// MO_DTPOFF - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the TLS offset of the symbol. Used + /// in the local dynamic TLS access model. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @DTPOFF + MO_DTPOFF, + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is - /// some TLS offset. + /// the negative thread-pointer offset for the symbol. Used in the IA32 + /// local exec TLS access model. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF MO_NTPOFF, + /// MO_GOTNTPOFF - On a symbol operand this indicates that the immediate is + /// the offset of the GOT entry with the negative thread-pointer offset for + /// the symbol. Used in the PIC IA32 initial exec TLS access model. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @GOTNTPOFF + MO_GOTNTPOFF, + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the /// reference is actually to the "__imp_FOO" symbol. This is used for /// dllimport linkage on windows. @@ -438,17 +481,17 @@ namespace X86II { // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { + inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(uint64_t TSFlags) { + inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(uint64_t TSFlags) { + inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8: @@ -463,7 +506,7 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(uint64_t TSFlags) { + inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: llvm_unreachable("Unknown immediate size"); case X86II::Imm8PCRel: @@ -486,9 +529,11 @@ namespace X86II { /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only /// counted as one operand. /// - static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { + inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this form"); + case X86II::MRMInitReg: + // FIXME: Remove this form. + return -1; default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!"); case X86II::Pseudo: case X86II::RawFrm: @@ -546,7 +591,7 @@ namespace X86II { /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or /// higher) register? e.g. r8, xmm8, xmm13, etc. - static inline bool isX86_64ExtendedReg(unsigned RegNo) { + inline bool isX86_64ExtendedReg(unsigned RegNo) { switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: @@ -568,7 +613,7 @@ namespace X86II { return false; } - static inline bool isX86_64NonExtLowByteReg(unsigned reg) { + inline bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index afa545c..b0acd7d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -35,19 +35,6 @@ AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), clEnumValEnd)); -static const char *const x86_asm_table[] = { - "{si}", "S", - "{di}", "D", - "{ax}", "a", - "{cx}", "c", - "{memory}", "memory", - "{flags}", "", - "{dirflag}", "", - "{fpsr}", "", - "{fpcr}", "", - "{cc}", "cc", - 0,0}; - void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { @@ -55,7 +42,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { if (is64Bit) PointerSize = 8; - AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; @@ -88,7 +74,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { if (T.getArch() == Triple::x86_64) PointerSize = 8; - AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; @@ -106,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; - // OpenBSD has buggy support for .quad in 32-bit mode, just split into two - // .words. - if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86) + // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split + // into two .words. + if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) && + T.getArch() == Triple::x86) Data64bitsDirective = 0; } @@ -137,7 +123,6 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { PrivateGlobalPrefix = ".L"; } - AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; @@ -151,7 +136,6 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { PrivateGlobalPrefix = ".L"; } - AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 80990e5..4a38324 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -139,6 +139,7 @@ public: MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { return new X86MCCodeEmitter(MCII, STI, Ctx); @@ -569,7 +570,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMDestMem: { @@ -602,11 +613,11 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp++).getReg())) VEX_R = 0x0; if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, 1); + VEX_4V = getVEXRegisterEncoding(MI, CurOp); if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) @@ -616,7 +627,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: @@ -961,11 +977,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } // Keep track of the current byte being emitted. unsigned CurByte = 0; @@ -1037,7 +1056,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, SrcRegNum = CurOp + X86::AddrNumOperands; if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) - SrcRegNum++; + ++SrcRegNum; EmitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(SrcRegNum)), @@ -1050,15 +1069,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, SrcRegNum = CurOp + 1; if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) - SrcRegNum++; + ++SrcRegNum; - if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) - SrcRegNum++; + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; EmitRegModRMByte(MI.getOperand(SrcRegNum), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); - // 2 operands skipped with HasMemOp4, comensate accordingly + // 2 operands skipped with HasMemOp4, compensate accordingly CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; @@ -1071,7 +1090,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } - if(HasMemOp4) // Skip second register source (encoded in I8IMM) + if (HasMemOp4) // Skip second register source (encoded in I8IMM) ++FirstMemOp; EmitByte(BaseOpcode, CurByte, OS); @@ -1089,7 +1108,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, @@ -1100,7 +1119,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, TSFlags, CurByte, OS, Fixups); @@ -1149,22 +1168,23 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } // If there is a remaining operand, it must be a trailing immediate. Emit it - // according to the right size for the instruction. - if (CurOp != NumOps) { + // according to the right size for the instruction. Some instructions + // (SSE4a extrq and insertq) have two trailing immediates. + while (CurOp != NumOps && NumOps - CurOp <= 2) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand - : CurOp); - CurOp++; - bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); - unsigned RegNum = (IsExtReg ? (1 << 7) : 0); - RegNum |= GetX86RegNum(MO) << 4; + : CurOp); + ++CurOp; + unsigned RegNum = GetX86RegNum(MO) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; // If there is an additional 5th operand it must be an immediate, which // is encoded in bits[3:0] - if(CurOp != NumOps) { + if (CurOp != NumOps) { const MCOperand &MIMM = MI.getOperand(CurOp++); - if(MIMM.isImm()) { + if (MIMM.isImm()) { unsigned Val = MIMM.getImm(); assert(Val < 16 && "Immediate operand value out of range"); RegNum |= Val; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 9896cbe..4650069 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -76,6 +76,7 @@ namespace X86_MC { } MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index a802333..8b87c1f 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -64,13 +64,13 @@ void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask) { /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { +void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; - int NewImm = Imm; + unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { ShuffleMask.push_back(NewImm % NumLaneElts + l); @@ -80,48 +80,55 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { } } -void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { - ShuffleMask.push_back(0); - ShuffleMask.push_back(1); - ShuffleMask.push_back(2); - ShuffleMask.push_back(3); - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back(4+(Imm & 3)); - Imm >>= 2; +void DecodePSHUFHWMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + i); + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + 4 + (NewImm & 3)); + NewImm >>= 2; + } } } -void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm & 3)); - Imm >>= 2; +void DecodePSHUFLWMask(MVT VT, unsigned Imm, + SmallVectorImpl &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + + for (unsigned l = 0; l != NumElts; l += 8) { + unsigned NewImm = Imm; + for (unsigned i = 0, e = 4; i != e; ++i) { + ShuffleMask.push_back(l + (NewImm & 3)); + NewImm >>= 2; + } + for (unsigned i = 4, e = 8; i != e; ++i) { + ShuffleMask.push_back(l + i); + } } - ShuffleMask.push_back(4); - ShuffleMask.push_back(5); - ShuffleMask.push_back(6); - ShuffleMask.push_back(7); } /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { +void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; - int NewImm = Imm; + unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - // Part that reads from dest. - for (unsigned i = 0; i != NumLaneElts/2; ++i) { - ShuffleMask.push_back(NewImm % NumLaneElts + l); - NewImm /= NumLaneElts; - } - // Part that reads from src. - for (unsigned i = 0; i != NumLaneElts/2; ++i) { - ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + l); - NewImm /= NumLaneElts; + // each half of a lane comes from different source + for (unsigned s = 0; s != NumElts*2; s += NumElts) { + for (unsigned i = 0; i != NumLaneElts/2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + s + l); + NewImm /= NumLaneElts; + } } if (NumLaneElts == 4) NewImm = Imm; // reload imm } @@ -130,7 +137,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl &ShuffleMask) { +void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -150,7 +157,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl &ShuffleMask) { /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl &ShuffleMask) { +void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -167,19 +174,26 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl &ShuffleMask) { } } -void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask) { if (Imm & 0x88) return; // Not a shuffle unsigned HalfSize = VT.getVectorNumElements()/2; - unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; - unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; - for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) - ShuffleMask.push_back(i); - for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) - ShuffleMask.push_back(i); + for (unsigned l = 0; l != 2; ++l) { + unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; + for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) + ShuffleMask.push_back(i); + } +} + +/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. +/// No VT provided since it only works on 256-bit, 4 element vectors. +void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask) { + for (unsigned i = 0; i != 4; ++i) { + ShuffleMask.push_back((Imm >> (2*i)) & 3); + } } } // llvm namespace diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 5b8c6ef..70d8171 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -35,31 +35,35 @@ void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl &ShuffleMask); -void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl &ShuffleMask); /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl &ShuffleMask); +void DecodeUNPCKHMask(MVT VT, SmallVectorImpl &ShuffleMask); /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl &ShuffleMask); +void DecodeUNPCKLMask(MVT VT, SmallVectorImpl &ShuffleMask); -void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, +void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask); +/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. +/// No VT provided since it only works on 256-bit, 4 element vectors. +void DecodeVPERMMask(unsigned Imm, SmallVectorImpl &ShuffleMask); + } // llvm namespace #endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index ecc7b59..dce5b4d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -26,7 +26,7 @@ class FunctionPass; class JITCodeEmitter; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *createX86ISelDag(X86TargetMachine &TM, @@ -36,6 +36,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM, /// register for PIC on x86-32. FunctionPass* createGlobalBaseRegPass(); +/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses +/// to local-dynamic TLS variables so that the TLS base address for the module +/// is only fetched once per execution path through the function. +FunctionPass *createCleanupLocalDynamicTLSPass(); + /// createX86FloatingPointStackifierPass - This function returns a pass which /// converts floating point register references and pseudo instructions into /// floating point stack references and physical instructions. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index b6591d4..6c1a816 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -86,21 +86,24 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX", def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2", "Enable AVX2 instructions", [FeatureAVX]>; -def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", - "Enable carry-less multiplication instructions">; -def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", +def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", + "Enable packed carry-less multiplication instructions", + [FeatureSSE2]>; +def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true", "Enable three-operand fused multiple-add", [FeatureAVX]>; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add", - [FeatureAVX]>; + [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", - "Enable XOP instructions">; + "Enable XOP instructions", + [FeatureAVX, FeatureSSE4A]>; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", - "Enable AES instructions">; + "Enable AES instructions", + [FeatureSSE2]>; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", @@ -128,10 +131,10 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; class Proc Features> - : Processor; + : ProcessorModel; class AtomProc Features> - : Processor; + : ProcessorModel; def : Proc<"generic", []>; def : Proc<"i386", []>; @@ -169,25 +172,23 @@ def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, // Westmere is the corei3/i5/i7 path from nehalem to sandybridge def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeatureCLMUL]>; + FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; // Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -// FIXME: Disabling AVX for now since it's not ready. -def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeatureCLMUL]>; +def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT, + FeatureAES, FeaturePCLMUL]>; // Ivy Bridge -def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeatureCLMUL, +def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT, + FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; // Haswell -// FIXME: Disabling AVX/AVX2/FMA3 for now since it's not ready. -def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT, - FeatureAES, FeatureCLMUL, FeatureRDRAND, +def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT, + FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2]>; + FeatureBMI2, FeatureFMA]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -211,21 +212,20 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, +def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, FeatureSlowBTMem]>; // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT]>; -// FIXME: Disabling AVX/FMA4 for now since it's not ready. // Bulldozer -def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureAES, FeatureCLMUL, - FeatureXOP, FeatureLZCNT, FeaturePOPCNT]>; +def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, + FeatureAES, FeaturePCLMUL, + FeatureLZCNT, FeaturePOPCNT]>; // Enhanced Bulldozer -def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, - FeatureAES, FeatureCLMUL, - FeatureXOP, FeatureF16C, FeatureLZCNT, +def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, + FeatureAES, FeaturePCLMUL, + FeatureF16C, FeatureLZCNT, FeaturePOPCNT, FeatureBMI]>; def : Proc<"winchip-c6", [FeatureMMX]>; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 7db7ccb..db71e27 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -20,10 +20,10 @@ #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "llvm/CallingConv.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Type.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -186,10 +186,14 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, O << '-' << *MF->getPICBaseSymbol(); break; case X86II::MO_TLSGD: O << "@TLSGD"; break; + case X86II::MO_TLSLD: O << "@TLSLD"; break; + case X86II::MO_TLSLDM: O << "@TLSLDM"; break; case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; case X86II::MO_TPOFF: O << "@TPOFF"; break; + case X86II::MO_DTPOFF: O << "@DTPOFF"; break; case X86II::MO_NTPOFF: O << "@NTPOFF"; break; + case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break; case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break; case X86II::MO_GOT: O << "@GOT"; break; case X86II::MO_GOTOFF: O << "@GOTOFF"; break; @@ -403,7 +407,9 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const MachineOperand &MO = MI->getOperand(OpNo); switch (ExtraCode[0]) { - default: return true; // Unknown modifier. + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); case 'a': // This is an address. Currently only 'i' and 'r' are expected. if (MO.isImm()) { O << MO.getImm(); diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index a6ed9ba..35386cd 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { virtual const char *getPassName() const { return "X86 AT&T-Style Assembly Printer"; } - + const X86Subtarget &getSubtarget() const { return *Subtarget; } virtual void EmitStartOfAsmFile(Module &M); virtual void EmitEndOfAsmFile(Module &M); - + virtual void EmitInstruction(const MachineInstr *MI); - + void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); // These methods are used by the tablegen'erated instruction printer. @@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); bool runOnMachineFunction(MachineFunction &F); - + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index e01ff41..6a6125b 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -17,4 +17,3 @@ using namespace llvm; X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } - diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0cec95a..471eb31 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -1,4 +1,4 @@ -//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// +//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -33,7 +33,7 @@ public: void addExternalFunction(MCSymbol* Symbol) { Externals.insert(Symbol); } - + typedef DenseSet::const_iterator externals_iterator; externals_iterator externals_begin() const { return Externals.begin(); } externals_iterator externals_end() const { return Externals.end(); } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index d148989..a6d2709 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -29,10 +29,13 @@ def RetCC_X86Common : CallingConv<[ // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI // for functions that return two i8 values are currently expected to pack the // values into an i16 (which uses AX, and thus AL:AH). - CCIfType<[i8] , CCAssignToReg<[AL, DL]>>, - CCIfType<[i16], CCAssignToReg<[AX, DX]>>, - CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>, - CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>, + // + // For code that doesn't care about the ABI, we allow returning more than two + // integer values in registers. + CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, + CCIfType<[i64], CCAssignToReg<[RAX, RDX, RCX]>>, // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3 // can only be used by ABI non-compliant code. If the target doesn't have XMM @@ -413,7 +416,7 @@ def CC_X86 : CallingConv<[ // Callee-saved Registers. //===----------------------------------------------------------------------===// -def CSR_Ghc : CalleeSavedRegs<(add)>; +def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>; def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ee3de9a..d705049 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -53,12 +53,12 @@ namespace { public: static char ID; explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(ID), II(0), TD(0), TM(tm), + : MachineFunctionPass(ID), II(0), TD(0), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} Emitter(X86TargetMachine &tm, CodeEmitter &mce, const X86InstrInfo &ii, const TargetData &td, bool is64) - : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), + : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(is64), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -68,8 +68,20 @@ namespace { return "X86 Machine Code Emitter"; } + void emitOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const; + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); @@ -115,17 +127,17 @@ template bool Emitter::runOnMachineFunction(MachineFunction &MF) { MMI = &getAnalysis(); MCE.setModuleInfo(MMI); - + II = TM.getInstrInfo(); TD = TM.getTargetData(); Is64BitMode = TM.getSubtarget().is64Bit(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; - + do { - DEBUG(dbgs() << "JITTing function '" + DEBUG(dbgs() << "JITTing function '" << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); @@ -149,18 +161,18 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { static unsigned determineREX(const MachineInstr &MI) { unsigned REX = 0; const MCInstrDesc &Desc = MI.getDesc(); - + // Pseudo instructions do not need REX prefix byte. if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) return 0; if (Desc.TSFlags & X86II::REX_W) REX |= 1 << 3; - + unsigned NumOps = Desc.getNumOperands(); if (NumOps) { bool isTwoAddr = NumOps > 1 && - Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; - + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; for (unsigned e = NumOps; i != e; ++i) { @@ -171,7 +183,7 @@ static unsigned determineREX(const MachineInstr &MI) { REX |= 0x40; } } - + switch (Desc.TSFlags & X86II::FormMask) { case X86II::MRMInitReg: if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) @@ -362,7 +374,7 @@ void Emitter::emitRegModRMByte(unsigned RegOpcodeFld) { } template -void Emitter::emitSIBByte(unsigned SS, +void Emitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base) { // SIB byte is in the same format as the ModRMByte... @@ -378,8 +390,8 @@ void Emitter::emitConstant(uint64_t Val, unsigned Size) { } } -/// isDisp8 - Return true if this signed displacement fits in a 8-bit -/// sign-extended field. +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. static bool isDisp8(int Value) { return Value == (signed char)Value; } @@ -388,10 +400,10 @@ static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, const TargetMachine &TM) { // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer // mechanism as 32-bit mode. - if (TM.getSubtarget().is64Bit() && + if (TM.getSubtarget().is64Bit() && !TM.getSubtarget().isTargetDarwin()) return false; - + // Return true if this is a reference to a stub containing the address of the // global, not the global itself. return isGlobalStubReference(GVOp.getTargetFlags()); @@ -417,7 +429,7 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp, if (RelocOp->isGlobal()) { // In 64-bit static small code model, we could potentially emit absolute. // But it's probably not beneficial. If the MCE supports using RIP directly - // do it, otherwise fallback to absolute (this is determined by IsPCRel). + // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); @@ -441,7 +453,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; const MachineOperand *DispForReloc = 0; - + // Figure out what sort of displacement we have to handle here. if (Op3.isGlobal()) { DispForReloc = &Op3; @@ -469,7 +481,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - + // Handle %rip relative addressing. if (BaseReg == X86::RIP || (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode @@ -486,7 +498,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; // Is a SIB byte needed? - // If no BaseReg, issue a RIP relative instruction only if the MCE can + // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. unsigned BaseRegNo = -1U; @@ -494,7 +506,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, BaseRegNo = X86_MC::getX86RegNum(BaseReg); if (// The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -508,7 +520,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, emitDisplacementField(DispForReloc, DispVal, PCAdj, true); return; } - + // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -517,20 +529,20 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); return; } - + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (!DispForReloc && isDisp8(DispVal)) { MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); emitConstant(DispVal, 1); return; } - + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); return; } - + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); @@ -563,7 +575,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel + // Handle the SIB byte for the case where there is no base, see Intel // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) @@ -596,94 +608,116 @@ static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, return Desc; } -template -void Emitter::emitInstruction(MachineInstr &MI, - const MCInstrDesc *Desc) { - DEBUG(dbgs() << MI); - - // If this is a pseudo instruction, lower it. - switch (Desc->getOpcode()) { - case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; - case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; - case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; - case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; - case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; - case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; - case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; - case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; - case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; - case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; - case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; - case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; - case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; - case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; - case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; - case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; - case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; - } - +/// Is16BitMemOperand - Return true if the specified instruction has +/// a 16-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} - MCE.processDebugLoc(MI.getDebugLoc(), true); +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} - unsigned Opcode = Desc->Opcode; +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif +template +void Emitter::emitOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { // Emit the lock opcode prefix as needed. if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0); // Emit segment override opcode prefix as needed. - switch (Desc->TSFlags & X86II::SegOvrMask) { - case X86II::FS: - MCE.emitByte(0x64); - break; - case X86II::GS: - MCE.emitByte(0x65); - break; - default: llvm_unreachable("Invalid segment!"); - case 0: break; // No segment override! - } + emitSegmentOverridePrefix(TSFlags, MemOperand, MI); // Emit the repeat opcode prefix as needed. if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3); - // Emit the operand size opcode prefix as needed. - if (Desc->TSFlags & X86II::OpSize) - MCE.emitByte(0x66); - // Emit the address size opcode prefix as needed. - if (Desc->TSFlags & X86II::AdSize) + bool need_address_override; + if (TSFlags & X86II::AdSize) { + need_address_override = true; + } else if (MemOperand == -1) { + need_address_override = false; + } else if (Is64BitMode) { + assert(!Is16BitMemOperand(MI, MemOperand)); + need_address_override = Is32BitMemOperand(MI, MemOperand); + } else { + assert(!Is64BitMemOperand(MI, MemOperand)); + need_address_override = Is16BitMemOperand(MI, MemOperand); + } + + if (need_address_override) MCE.emitByte(0x67); + // Emit the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + MCE.emitByte(0x66); + bool Need0FPrefix = false; switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::TB: // Two-byte opcode prefix - case X86II::T8: // 0F 38 - case X86II::TA: // 0F 3A - case X86II::A6: // 0F A6 - case X86II::A7: // 0F A7 - Need0FPrefix = true; - break; - case X86II::REP: break; // already handled. - case X86II::T8XS: // F3 0F 38 - case X86II::XS: // F3 0F - MCE.emitByte(0xF3); - Need0FPrefix = true; - break; - case X86II::T8XD: // F2 0F 38 - case X86II::TAXD: // F2 0F 3A - case X86II::XD: // F2 0F - MCE.emitByte(0xF2); - Need0FPrefix = true; - break; - case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: - case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: - MCE.emitByte(0xD8+ - (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) - >> X86II::Op0Shift)); - break; // Two-byte opcode prefix - default: llvm_unreachable("Invalid prefix!"); - case 0: break; // No prefix! + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + case X86II::A6: // 0F A6 + case X86II::A7: // 0F A7 + Need0FPrefix = true; + break; + case X86II::REP: break; // already handled. + case X86II::T8XS: // F3 0F 38 + case X86II::XS: // F3 0F + MCE.emitByte(0xF3); + Need0FPrefix = true; + break; + case X86II::T8XD: // F2 0F 38 + case X86II::TAXD: // F2 0F 3A + case X86II::XD: // F2 0F + MCE.emitByte(0xF2); + Need0FPrefix = true; + break; + case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: + case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: + MCE.emitByte(0xD8+ + (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) + >> X86II::Op0Shift)); + break; // Two-byte opcode prefix + default: llvm_unreachable("Invalid prefix!"); + case 0: break; // No prefix! } // Handle REX prefix. @@ -697,50 +731,446 @@ void Emitter::emitInstruction(MachineInstr &MI, MCE.emitByte(0x0F); switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::T8XD: // F2 0F 38 - case X86II::T8XS: // F3 0F 38 - case X86II::T8: // 0F 38 - MCE.emitByte(0x38); - break; - case X86II::TAXD: // F2 0F 38 - case X86II::TA: // 0F 3A - MCE.emitByte(0x3A); - break; - case X86II::A6: // 0F A6 - MCE.emitByte(0xA6); - break; - case X86II::A7: // 0F A7 - MCE.emitByte(0xA7); - break; + case X86II::T8XD: // F2 0F 38 + case X86II::T8XS: // F3 0F 38 + case X86II::T8: // 0F 38 + MCE.emitByte(0x38); + break; + case X86II::TAXD: // F2 0F 38 + case X86II::TA: // 0F 3A + MCE.emitByte(0x3A); + break; + case X86II::A6: // 0F A6 + MCE.emitByte(0xA6); + break; + case X86II::A7: // 0F A7 + MCE.emitByte(0xA7); + break; + } +} + +// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range +// 0-7 and the difference between the 2 groups is given by the REX prefix. +// In the VEX prefix, registers are seen sequencially from 0-15 and encoded +// in 1's complement form, example: +// +// ModRM field => XMM9 => 1 +// VEX.VVVV => XMM9 => ~9 +// +// See table 4-35 of Intel AVX Programming Reference for details. +static unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = X86_MC::getX86RegNum(MI.getOperand(OpNum).getReg()); + if (X86II::isX86_64ExtendedReg(SrcReg)) + SrcRegNum |= 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; +} + +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +template +void Emitter::emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const { + switch (TSFlags & X86II::SegOvrMask) { + default: llvm_unreachable("Invalid segment!"); + case 0: + // No segment override, check for explicit one on memory operand. + if (MemOperand != -1) { // If the instruction has a memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: llvm_unreachable("Unknown segment register!"); + case 0: break; + case X86::CS: MCE.emitByte(0x2E); break; + case X86::SS: MCE.emitByte(0x36); break; + case X86::DS: MCE.emitByte(0x3E); break; + case X86::ES: MCE.emitByte(0x26); break; + case X86::FS: MCE.emitByte(0x64); break; + case X86::GS: MCE.emitByte(0x65); break; + } + } + break; + case X86II::FS: + MCE.emitByte(0x64); + break; + case X86II::GS: + MCE.emitByte(0x65); + break; + } +} + +template +void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // XOP: Use XOP prefix byte 0x8f instead of VEX. + unsigned char XOP = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // 0b01000: XOP map select - 08h instructions with imm byte + // 0b10001: XOP map select - 09h instructions with no imm byte + unsigned char VEX_5M = 0x1; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + // Encode the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + VEX_PP = 0x01; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) + VEX_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::XOP) + XOP = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) + VEX_L = 1; + + switch (TSFlags & X86II::Op0Mask) { + default: llvm_unreachable("Invalid prefix!"); + case X86II::T8: // 0F 38 + VEX_5M = 0x2; + break; + case X86II::TA: // 0F 3A + VEX_5M = 0x3; + break; + case X86II::T8XS: // F3 0F 38 + VEX_PP = 0x2; + VEX_5M = 0x2; + break; + case X86II::T8XD: // F2 0F 38 + VEX_PP = 0x3; + VEX_5M = 0x2; + break; + case X86II::TAXD: // F2 0F 3A + VEX_PP = 0x3; + VEX_5M = 0x3; + break; + case X86II::XS: // F3 0F + VEX_PP = 0x2; + break; + case X86II::XD: // F2 0F + VEX_PP = 0x3; + break; + case X86II::XOP8: + VEX_5M = 0x8; + break; + case X86II::XOP9: + VEX_5M = 0x9; + break; + case X86II::A6: // Bypass: Not used by VEX + case X86II::A7: // Bypass: Not used by VEX + case X86II::TB: // Bypass: Not used by VEX + case 0: + break; // No prefix! + } + + + // Set the vector length to 256-bit if YMM0-YMM15 is used + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isReg()) + continue; + if (MI.getOperand(i).isImplicit()) + continue; + unsigned SrcReg = MI.getOperand(i).getReg(); + if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) + VEX_L = 1; + } + + // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + // Duplicate register. + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + break; + case X86II::MRMDestMem: { + // MRMDestMem instructions forms: + // MemAddr, src1(ModR/M) + // MemAddr, src1(VEX_4V), src2(ModR/M) + // MemAddr, src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + CurOp = X86::AddrNumOperands; + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + const MachineOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + break; + } + case X86II::MRMSrcMem: + // MRMSrcMem instructions forms: + // src1(ModR/M), MemAddr + // src1(ModR/M), src2(VEX_4V), MemAddr + // src1(ModR/M), MemAddr, imm8 + // src1(ModR/M), MemAddr, src2(VEX_I8IMM) + // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_R = 0x0; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, 1); + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + break; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + // MRM[0-9]m instructions forms: + // MemAddr + // src1(VEX_4V), MemAddr + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, 0); + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; + } + case X86II::MRMSrcReg: + // MRMSrcReg instructions forms: + // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M), src1(ModR/M) + // dst(ModR/M), src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + CurOp++; + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: + // dst(ModR/M), src(ModR/M) + // dst(ModR/M), src(ModR/M), imm8 + if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_R = 0x0; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 + VEX_4V = getVEXRegisterEncoding(MI, 0); + if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + VEX_B = 0x0; + break; + default: // RawFrm + break; + } + + // Emit segment override opcode prefix as needed. + emitSegmentOverridePrefix(TSFlags, MemOperand, MI); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix + MCE.emitByte(0xC5); + MCE.emitByte(LastByte | (VEX_R << 7)); + return; + } + + // 3 byte VEX prefix + MCE.emitByte(XOP ? 0x8F : 0xC4); + MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); + MCE.emitByte(LastByte | (VEX_W << 7)); +} + +template +void Emitter::emitInstruction(MachineInstr &MI, + const MCInstrDesc *Desc) { + DEBUG(dbgs() << MI); + + // If this is a pseudo instruction, lower it. + switch (Desc->getOpcode()) { + case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; + case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; + case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; + case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; + case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; + case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; + case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; + case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; + case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; + case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; + case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; + case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; + case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; + case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; + case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; + case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; + case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; } + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + unsigned Opcode = Desc->Opcode; + // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; - if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1) + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) ++CurOp; - else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + uint64_t TSFlags = Desc->TSFlags; + + // Is this instruction encoded using the AVX VEX prefix? + bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX; + // It uses the VEX.VVVV field? + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + const unsigned MemOp4_I8IMMOperand = 2; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + if (!HasVEXPrefix) + emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); + else + emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); - switch (Desc->TSFlags & X86II::FormMask) { + switch (TSFlags & X86II::FormMask) { default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. switch (Opcode) { - default: + default: llvm_unreachable("pseudo instructions should be removed before code" " emission"); - break; // Do nothing for Int_MemBarrier - it's just a comment. Add a debug // to make it slightly easier to see. case X86::Int_MemBarrier: DEBUG(dbgs() << "#MEMBARRIER\n"); break; - + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -752,7 +1182,7 @@ void Emitter::emitInstruction(MachineInstr &MI, case TargetOpcode::EH_LABEL: MCE.emitLabel(MI.getOperand(0).getMCSymbol()); break; - + case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: break; @@ -774,7 +1204,7 @@ void Emitter::emitInstruction(MachineInstr &MI, if (CurOp == NumOps) break; - + const MachineOperand &MO = MI.getOperand(CurOp++); DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); @@ -787,13 +1217,13 @@ void Emitter::emitInstruction(MachineInstr &MI, emitPCRelativeBlockAddress(MO.getMBB()); break; } - + if (MO.isGlobal()) { emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, MO.getOffset(), 0); break; } - + if (MO.isSymbol()) { emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); break; @@ -804,7 +1234,7 @@ void Emitter::emitInstruction(MachineInstr &MI, emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); break; } - + assert(MO.isImm() && "Unknown RawFrm operand!"); if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { // Fix up immediate operand for pc relative calls. @@ -815,21 +1245,21 @@ void Emitter::emitInstruction(MachineInstr &MI, emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); break; } - + case X86II::AddRegFrm: { MCE.emitByte(BaseOpcode + X86_MC::getX86RegNum(MI.getOperand(CurOp++).getReg())); - + if (CurOp == NumOps) break; - + const MachineOperand &MO1 = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (Opcode == X86::MOV64ri64i32) @@ -855,46 +1285,57 @@ void Emitter::emitInstruction(MachineInstr &MI, emitRegModRMByte(MI.getOperand(CurOp).getReg(), X86_MC::getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); break; } case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp + X86::AddrNumOperands; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; emitMemModRMByte(MI, CurOp, - X86_MC::getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) - .getReg())); - CurOp += X86::AddrNumOperands + 1; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); + X86_MC::getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } - case X86II::MRMSrcReg: + case X86II::MRMSrcReg: { MCE.emitByte(BaseOpcode); - emitRegModRMByte(MI.getOperand(CurOp+1).getReg(), + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; + + emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); - CurOp += 2; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); + // 2 operands skipped with HasMemOp4, compensate accordingly + CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; + if (HasVEX_4VOp3) + ++CurOp; break; - + } case X86II::MRMSrcMem: { int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + ++FirstMemOp; + + MCE.emitByte(BaseOpcode); intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; - - MCE.emitByte(BaseOpcode); - emitMemModRMByte(MI, CurOp+1, + emitMemModRMByte(MI, FirstMemOp, X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); CurOp += AddrOperands + 1; - if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), - X86II::getSizeOfImm(Desc->TSFlags)); + if (HasVEX_4VOp3) + ++CurOp; break; } @@ -902,20 +1343,22 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; MCE.emitByte(BaseOpcode); emitRegModRMByte(MI.getOperand(CurOp++).getReg(), (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; - + const MachineOperand &MO1 = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (Opcode == X86::MOV64ri32) @@ -937,8 +1380,10 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? - (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); @@ -948,14 +1393,14 @@ void Emitter::emitInstruction(MachineInstr &MI, if (CurOp == NumOps) break; - + const MachineOperand &MO = MI.getOperand(CurOp++); unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) { emitConstant(MO.getImm(), Size); break; } - + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (Opcode == X86::MOV64mi32) @@ -980,7 +1425,7 @@ void Emitter::emitInstruction(MachineInstr &MI, X86_MC::getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; - + case X86II::MRM_C1: MCE.emitByte(BaseOpcode); MCE.emitByte(0xC1); @@ -1003,6 +1448,33 @@ void Emitter::emitInstruction(MachineInstr &MI, break; } + while (CurOp != NumOps && NumOps - CurOp <= 2) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte. + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { + const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand + : CurOp); + ++CurOp; + unsigned RegNum = X86_MC::getX86RegNum(MO.getReg()) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if (CurOp != NumOps) { + const MachineOperand &MIMM = MI.getOperand(CurOp++); + if (MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } + emitConstant(RegNum, 1); + } else { + emitConstant(MI.getOperand(CurOp++).getImm(), + X86II::getSizeOfImm(Desc->TSFlags)); + } + } + if (!MI.isVariadic() && CurOp != NumOps) { #ifndef NDEBUG dbgs() << "Cannot encode all operands of: " << MI << "\n"; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 69752c5..e5952aa 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -57,7 +57,9 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) - return false; + return false; if (VT == MVT::f32 && !X86ScalarSSEf32) - return false; + return false; // Similarly, no f80 support yet. if (VT == MVT::f80) return false; @@ -183,37 +185,37 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, case MVT::i1: case MVT::i8: Opc = X86::MOV8rm; - RC = X86::GR8RegisterClass; + RC = &X86::GR8RegClass; break; case MVT::i16: Opc = X86::MOV16rm; - RC = X86::GR16RegisterClass; + RC = &X86::GR16RegClass; break; case MVT::i32: Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; break; case MVT::i64: // Must be in x86-64 mode. Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; break; case MVT::f32: if (X86ScalarSSEf32) { Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = X86::FR32RegisterClass; + RC = &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; - RC = X86::RFP32RegisterClass; + RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = X86::FR64RegisterClass; + RC = &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; - RC = X86::RFP64RegisterClass; + RC = &X86::RFP64RegClass; } break; case MVT::f80: @@ -240,7 +242,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { default: return false; case MVT::i1: { // Mask out all but lowest bit. - unsigned AndResult = createResultReg(X86::GR8RegisterClass); + unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); Val = AndResult; @@ -547,13 +549,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; } else { Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; } LoadReg = createResultReg(RC); @@ -743,7 +745,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector ValLocs; CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, - I->getContext()); + I->getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); const Value *RV = Ret->getOperand(0); @@ -1258,7 +1260,7 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(X86::FR64RegisterClass); + unsigned ResultReg = createResultReg(&X86::FR64RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::CVTSS2SDrr), ResultReg) .addReg(OpReg); @@ -1277,7 +1279,7 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; - unsigned ResultReg = createResultReg(X86::FR32RegisterClass); + unsigned ResultReg = createResultReg(&X86::FR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::CVTSD2SSrr), ResultReg) .addReg(OpReg); @@ -1314,8 +1316,9 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { if (!Subtarget->is64Bit()) { // If we're on x86-32; we can't extract an i8 from a general register. // First issue a copy to GR16_ABCD or GR32_ABCD. - const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) - ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; + const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? + (const TargetRegisterClass*)&X86::GR16_ABCDRegClass : + (const TargetRegisterClass*)&X86::GR32_ABCDRegClass; unsigned CopyReg = createResultReg(CopyRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); @@ -1423,7 +1426,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return DoSelectCall(&I, "memset"); } case Intrinsic::stackprotector: { - // Emit code inline code to store the stack guard onto the stack. + // Emit code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); const Value *Op1 = I.getArgOperand(0); // The guard's value. @@ -1484,7 +1487,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; // The call to CreateRegs builds two sequential registers, to store the - // both the the returned values. + // both the returned values. unsigned ResultReg = FuncInfo.CreateRegs(I.getType()); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) .addReg(Reg1).addReg(Reg2); @@ -1515,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return DoSelectCall(I, 0); } +static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, + const ImmutableCallSite &CS) { + if (Subtarget.is64Bit()) + return 0; + if (Subtarget.isTargetWindows()) + return 0; + CallingConv::ID CC = CS.getCallingConv(); + if (CC == CallingConv::Fast || CC == CallingConv::GHC) + return 0; + if (!CS.paramHasAttr(1, Attribute::StructRet)) + return 0; + if (CS.paramHasAttr(1, Attribute::InReg)) + return 0; + return 4; +} + // Select either a call, or an llvm.memcpy/memmove/memset intrinsic bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { const CallInst *CI = cast(I); @@ -1548,12 +1567,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Check whether the function can return without sret-demotion. SmallVector Outs; - SmallVector Offsets; GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(), - Outs, TLI, &Offsets); + Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - *FuncInfo.MF, FTy->isVarArg(), - Outs, FTy->getContext()); + *FuncInfo.MF, FTy->isVarArg(), + Outs, FTy->getContext()); if (!CanLowerReturn) return false; @@ -1667,7 +1685,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, - I->getParent()->getContext()); + I->getParent()->getContext()); // Allocate shadow area for Win64 if (Subtarget->isTargetWin64()) @@ -1693,7 +1711,6 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Promote the value if needed. switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && @@ -1737,6 +1754,14 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { ArgVT = VA.getLocVT(); break; } + case CCValAssign::VExt: + // VExt has not been implemented, so this should be impossible to reach + // for now. However, fallback to Selection DAG isel once implemented. + return false; + case CCValAssign::Indirect: + // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully + // support this. + return false; } if (VA.isRegLoc()) { @@ -1838,27 +1863,24 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { MIB.addGlobalAddress(GV, 0, OpFlags); } + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + // Add an implicit use GOT pointer in EBX. if (Subtarget->isPICStyleGOT()) - MIB.addReg(X86::EBX); + MIB.addReg(X86::EBX, RegState::Implicit); if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) - MIB.addReg(X86::AL); + MIB.addReg(X86::AL, RegState::Implicit); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i]); - - // Add a register mask with the call-preserved registers. - // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + MIB.addReg(RegArgs[i], RegState::Implicit); // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() && - CS.paramHasAttr(1, Attribute::StructRet)) - NumBytesCallee = 4; + const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); @@ -1889,7 +1911,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { SmallVector UsedRegs; SmallVector RVLocs; CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs, - I->getParent()->getContext()); + I->getParent()->getContext()); unsigned ResultReg = FuncInfo.CreateRegs(I->getType()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1903,7 +1925,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { RVLocs[i].getLocReg() == X86::ST1)) { if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) { CopyVT = MVT::f80; - CopyReg = createResultReg(X86::RFP80RegisterClass); + CopyReg = createResultReg(&X86::RFP80RegClass); } BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL), CopyReg); @@ -2001,37 +2023,37 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { default: return false; case MVT::i8: Opc = X86::MOV8rm; - RC = X86::GR8RegisterClass; + RC = &X86::GR8RegClass; break; case MVT::i16: Opc = X86::MOV16rm; - RC = X86::GR16RegisterClass; + RC = &X86::GR16RegClass; break; case MVT::i32: Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; break; case MVT::i64: // Must be in x86-64 mode. Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; break; case MVT::f32: if (X86ScalarSSEf32) { Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; - RC = X86::FR32RegisterClass; + RC = &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; - RC = X86::RFP32RegisterClass; + RC = &X86::RFP32RegClass; } break; case MVT::f64: if (X86ScalarSSEf64) { Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; - RC = X86::FR64RegisterClass; + RC = &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; - RC = X86::RFP64RegisterClass; + RC = &X86::RFP64RegClass; } break; case MVT::f80: @@ -2120,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = X86::FsFLD0SS; - RC = X86::FR32RegisterClass; - } else { - Opc = X86::LD_Fp032; - RC = X86::RFP32RegisterClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = X86::FsFLD0SD; - RC = X86::FR64RegisterClass; - } else { - Opc = X86::LD_Fp064; - RC = X86::RFP64RegisterClass; - } - break; - case MVT::f80: - // No f80 support yet. - return false; + default: return false; + case MVT::f32: + if (X86ScalarSSEf32) { + Opc = X86::FsFLD0SS; + RC = &X86::FR32RegClass; + } else { + Opc = X86::LD_Fp032; + RC = &X86::RFP32RegClass; + } + break; + case MVT::f64: + if (X86ScalarSSEf64) { + Opc = X86::FsFLD0SD; + RC = &X86::FR64RegClass; + } else { + Opc = X86::LD_Fp064; + RC = &X86::RFP64RegClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; } unsigned ResultReg = createResultReg(RC); @@ -2160,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!X86SelectAddress(LI->getOperand(0), AM)) return false; - X86InstrInfo &XII = (X86InstrInfo&)TII; + const X86InstrInfo &XII = (const X86InstrInfo&)TII; unsigned Size = TD.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); @@ -2179,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { - return new X86FastISel(funcInfo); + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new X86FastISel(funcInfo, libInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index ed1707d..955c75a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -130,7 +130,7 @@ namespace { // The hardware keeps track of how many FP registers are live, so we have // to model that exactly. Usually, each live register corresponds to an // FP register, but when dealing with calls, returns, and inline - // assembly, it is sometimes neccesary to have live scratch registers. + // assembly, it is sometimes necessary to have live scratch registers. unsigned Stack[8]; // FP Registers in each stack slot... unsigned StackTop; // The current top of the FP stack. @@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { // Change from the pseudo instruction to the concrete instruction. MI->RemoveOperand(0); // Remove the explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // Result gets pushed on the stack. pushReg(DestReg); } @@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { } else { moveToTop(Reg, I); // Move to the top of the stack... } - + // Convert from the pseudo instruction to the concrete instruction. MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); @@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { MI->RemoveOperand(1); MI->getOperand(0).setReg(getSTReg(Op1)); MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // If we kill the second operand, make sure to pop it from the stack. if (Op0 != Op1 && KillsOp1) { // Get this value off of the register stack. @@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Assert that the top of stack contains the right FP register. assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && "Top of stack not the right register for RET!"); - + // Ok, everything is good, mark the value as not being on the stack // anymore so that our assertion about the stack being empty at end of // block doesn't fire. StackTop = 0; return; } - + // Otherwise, we are returning two values: // 2) If returning the same value for both, we only have one thing in the FP // stack. Consider: RET FP1, FP1 if (StackTop == 1) { assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& "Stack misconfiguration for RET!"); - + // Duplicate the TOS so that we return it twice. Just pick some other FPx // register to hold it. unsigned NewReg = getScratchReg(); duplicateToTop(FirstFPRegOp, NewReg, MI); FirstFPRegOp = NewReg; } - + /// Okay we know we have two different FPx operands now: assert(StackTop == 2 && "Must have two values live!"); - + /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently /// in ST(1). In this case, emit an fxch. if (getStackEntry(0) == SecondFPRegOp) { assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); moveToTop(FirstFPRegOp, MI); } - + /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in /// ST(1). Just remove both from our understanding of the stack and return. assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 000e375..2238688 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -45,14 +45,14 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetRegisterInfo *RegInfo = TM.getRegisterInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || - RI->needsStackRealignment(MF) || + RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MF.getInfo()->getForceFramePointer() || - MMI.callsUnwindInit()); + MMI.callsUnwindInit() || MMI.callsEHReturn()); } static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { @@ -125,8 +125,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, unsigned Reg = MO.getReg(); if (!Reg) continue; - for (const uint16_t *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI) - Uses.insert(*AsI); + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + Uses.insert(*AI); } const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; @@ -369,7 +369,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, /// getCompactUnwindRegNum - Get the compact unwind number for a given /// register. The number corresponds to the enum lists in /// compact_unwind_encoding.h. -static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { +static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) { for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; @@ -398,13 +398,13 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], // 4 3 // 5 3 // - static const unsigned CU32BitRegs[] = { + static const uint16_t CU32BitRegs[] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; - static const unsigned CU64BitRegs[] = { + static const uint16_t CU64BitRegs[] = { X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 }; - const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); @@ -466,13 +466,13 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], bool Is64Bit) { - static const unsigned CU32BitRegs[] = { + static const uint16_t CU32BitRegs[] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; - static const unsigned CU64BitRegs[] = { + static const uint16_t CU64BitRegs[] = { X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 }; - const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); // Encode the registers in the order they were saved, 3-bits per register. The // registers are numbered from 1 to CU_NUM_SAVED_REGS. @@ -650,6 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); + unsigned BasePtr = RegInfo->getBaseRegister(); DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -721,10 +722,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - if (RegInfo->needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; - - NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + if (RegInfo->needsStackRealignment(MF)) { + // Callee-saved registers are pushed on stack before the stack + // is realigned. + FrameSize -= X86FI->getCalleeSavedFrameSize(); + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + } else { + NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); + } // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. @@ -781,19 +786,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); - - // Realign stack - if (RegInfo->needsStackRealignment(MF)) { - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) - .addReg(StackPtr) - .addImm(-MaxAlign) - .setMIFlag(MachineInstr::FrameSetup); - - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); - } } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } @@ -823,6 +815,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { } } + // Realign stack after we pushed callee-saved registers (so that we'll be + // able to calculate their offsets from the frame pointer). + + // NOTE: We push the registers before realigning the stack, so + // vector callee-saved (xmm) registers may be saved w/o proper + // alignment in this way. However, currently these regs are saved in + // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so + // this shouldn't be a problem. + if (RegInfo->needsStackRealignment(MF)) { + assert(HasFP && "There should be a frame pointer if stack is realigned."); + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) + .addReg(StackPtr) + .addImm(-MaxAlign) + .setMIFlag(MachineInstr::FrameSetup); + + // The EFLAGS implicit def is dead. + MI->getOperand(3).setIsDead(); + } + DL = MBB.findDebugLoc(MBBI); // If there is an SUB32ri of ESP immediately before this instruction, merge @@ -913,6 +926,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, UseLEA, TII, *RegInfo); + // If we need a base pointer, set it up here. It's whatever the value + // of the stack pointer is at this point. Any variable size objects + // will be allocated after this, so we can still use the base pointer + // to reference locals. + if (RegInfo->hasBasePointer(MF)) { + // Update the frame pointer with the current stack pointer. + unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; + BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) + .addReg(StackPtr) + .setMIFlag(MachineInstr::FrameSetup); + } + if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); @@ -997,10 +1022,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (hasFP(MF)) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; - if (RegInfo->needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; - - NumBytes = FrameSize - CSSize; + if (RegInfo->needsStackRealignment(MF)) { + // Callee-saved registers were pushed on stack before the stack + // was realigned. + FrameSize -= CSSize; + NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; + } else { + NumBytes = FrameSize - CSSize; + } // Pop EBP. BuildMI(MBB, MBBI, DL, @@ -1010,7 +1039,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } // Skip the callee-saved pop instructions. - MachineBasicBlock::iterator LastCSPop = MBBI; while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); @@ -1021,6 +1049,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, --MBBI; } + MachineBasicBlock::iterator FirstCSPop = MBBI; DL = MBBI->getDebugLoc(); @@ -1032,28 +1061,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was // realigned. - if (RegInfo->needsStackRealignment(MF)) { - // We cannot use LEA here, because stack pointer was realigned. We need to - // deallocate local frame back. - if (CSSize) { - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, - *RegInfo); - MBBI = prior(LastCSPop); - } - - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(FramePtr); - } else if (MFI->hasVarSizedObjects()) { - if (CSSize) { - unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; - MachineInstr *MI = - addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); - MBB.insert(MBBI, MI); + if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { + if (RegInfo->needsStackRealignment(MF)) + MBBI = FirstCSPop; + if (CSSize != 0) { + unsigned Opc = getLEArOpcode(Is64Bit); + addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); } else { - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) + unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) .addReg(FramePtr); } } else if (NumBytes) { @@ -1124,8 +1141,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } MachineInstr *NewMI = prior(MBBI); - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - NewMI->addOperand(MBBI->getOperand(i)); + NewMI->copyImplicitOps(MBBI); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); @@ -1142,16 +1158,25 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { - const X86RegisterInfo *RI = + const X86RegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); const MachineFrameInfo *MFI = MF.getFrameInfo(); int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); - if (RI->needsStackRealignment(MF)) { + if (RegInfo->hasBasePointer(MF)) { + assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); if (FI < 0) { // Skip the saved EBP. - Offset += RI->getSlotSize(); + return Offset + RegInfo->getSlotSize(); + } else { + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); + return Offset + StackSize; + } + } else if (RegInfo->needsStackRealignment(MF)) { + if (FI < 0) { + // Skip the saved EBP. + return Offset + RegInfo->getSlotSize(); } else { assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; @@ -1162,7 +1187,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con return Offset + StackSize; // Skip the saved EBP. - Offset += RI->getSlotSize(); + Offset += RegInfo->getSlotSize(); // Skip the RETADDR move area const X86MachineFunctionInfo *X86FI = MF.getInfo(); @@ -1174,6 +1199,22 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con return Offset; } +int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const X86RegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + // We can't calculate offset from frame pointer if the stack is realigned, + // so enforce usage of stack/base pointer. The base pointer is used when we + // have dynamic allocas in addition to dynamic realignment. + if (RegInfo->hasBasePointer(MF)) + FrameReg = RegInfo->getBaseRegister(); + else if (RegInfo->needsStackRealignment(MF)) + FrameReg = RegInfo->getStackRegister(); + else + FrameReg = RegInfo->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); +} + bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, @@ -1307,6 +1348,10 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; } + + // Spill the BasePtr if it's used. + if (RegInfo->hasBasePointer(MF)) + MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); } static bool diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index d55a497..dc515dc 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -60,6 +60,8 @@ public: bool hasReservedCallFrame(const MachineFunction &MF) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const; uint32_t getCompactUnwindEncoding(MachineFunction &MF) const; }; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 8e2b1d6..27195b4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -60,7 +60,7 @@ namespace { int Base_FrameIndex; unsigned Scale; - SDValue IndexReg; + SDValue IndexReg; int32_t Disp; SDValue Segment; const GlobalValue *GV; @@ -80,11 +80,11 @@ namespace { bool hasSymbolicDisplacement() const { return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; } - + bool hasBaseOrIndexReg() const { return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } - + /// isRIPRelative - Return true if this addressing mode is already RIP /// relative. bool isRIPRelative() const { @@ -94,7 +94,7 @@ namespace { return RegNode->getReg() == X86::RIP; return false; } - + void setBaseReg(SDValue Reg) { BaseType = RegBase; Base_Reg = Reg; @@ -104,7 +104,7 @@ namespace { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; if (Base_Reg.getNode() != 0) - Base_Reg.getNode()->dump(); + Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' @@ -113,7 +113,7 @@ namespace { if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - dbgs() << "nul"; + dbgs() << "nul"; dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) @@ -187,6 +187,7 @@ namespace { private: SDNode *Select(SDNode *N); + SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -212,21 +213,21 @@ namespace { SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &NodeWithChain); - + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps); - + void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); - inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, + inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? @@ -425,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); - + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. @@ -461,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { ++NumLoadMoved; continue; } - + // Lower fpround and fpextend nodes that target the FP stack to be store and // load to the stack. This is a gross hack. We would like to simply mark // these as being illegal, but when we do that, legalize produces these when @@ -472,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - + EVT SrcVT = N->getOperand(0).getValueType(); EVT DstVT = N->getValueType(0); @@ -495,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getConstantOperandVal(1)) continue; } - + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. @@ -504,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else MemVT = SrcIsSSE ? SrcVT : DstVT; - + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); DebugLoc dl = N->getDebugLoc(); - + // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), @@ -523,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // To avoid invalidating 'I', back it up to the convert node. --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - + // Now that we did that, the node is dead. Increment the iterator to the // next node to process, then delete N. ++I; CurDAG->DeleteNode(N); - } + } } @@ -583,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); - + // load gs:0 -> GS segment register. // load fs:0 -> FS segment register. // @@ -592,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast(Address)) if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && - Subtarget->isTargetELF()) + Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); @@ -601,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); return false; } - + return true; } @@ -991,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::SHL: if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; - + if (ConstantSDNode *CN = dyn_cast(N.getNode()->getOperand(1))) { unsigned Val = CN->getZExtValue(); @@ -1166,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) return false; AM = Backup; - + // Try again after commuting the operands. if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) @@ -1202,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; } break; - + case ISD::AND: { // Perform some heroic transforms on an and of a constant-count shift // with a constant to enable use of the scaled offset field. @@ -1274,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - + if (Parent && // This list of opcodes are all the nodes that have an "addr:$ptr" operand // that are not a MemSDNode, and thus don't have proper addrspace info. @@ -1289,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (AddrSpace == 257) AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); } - + if (MatchAddress(N, AM)) return false; @@ -1335,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, // elements. This is a vector shuffle from the zero vector. if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && // Check to see if the top elements are all zeros (or bitcast of zeros). - N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && N.getOperand(0).getOperand(0).hasOneUse() && @@ -1410,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, // If it isn't worth using an LEA, reject it. if (Complexity <= 2) return false; - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1421,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1434,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, } else { AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1448,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsProfitableToFold(N, P, P) || !IsLegalToFold(N, P, P, OptLevel)) return false; - + return SelectAddr(N.getNode(), N.getOperand(1), Base, Scale, Index, Disp, Segment); } @@ -1699,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; - + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. @@ -1726,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { default: return 0; } - + bool isCN = false; ConstantSDNode *CN = dyn_cast(Val); if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { isCN = true; Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); } - + unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { default: return 0; @@ -1771,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { } break; } - + assert(Opc != 0 && "Invalid arith lock transform!"); DebugLoc dl = Node->getDebugLoc(); @@ -1851,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode /// is suitable for doing the {load; increment or decrement; store} to modify /// transformation. -static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode* &LoadNode, SDValue &InputChain) { @@ -1875,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, // Return LoadNode by reference. LoadNode = cast(Load); // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) - EVT LdVT = LoadNode->getMemoryVT(); - if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && LdVT != MVT::i8) return false; // Is store the only read of the loaded value? if (!Load.hasOneUse()) return false; - + // Is the address of the store the same as the load? if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || LoadNode->getOffset() != StoreNode->getOffset()) @@ -1905,6 +1906,20 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, ChainCheck = true; continue; } + + // Make sure using Op as part of the chain would not cause a cycle here. + // In theory, we could check whether the chain node is a predecessor of + // the load. But that can be very expensive. Instead visit the uses and + // make sure they all have smaller node id than the load. + int LoadId = LoadNode->getNodeId(); + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = UI->use_end(); UI != UE; ++UI) { + if (UI.getUse().getResNo() != 0) + continue; + if (UI->getNodeId() > LoadId) + return false; + } + ChainOps.push_back(Op); } @@ -1938,12 +1953,44 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { + // Operands of Gather: VSrc, Base, VIdx, VMask, Scale + SDValue Chain = Node->getOperand(0); + SDValue VSrc = Node->getOperand(2); + SDValue Base = Node->getOperand(3); + SDValue VIdx = Node->getOperand(4); + SDValue VMask = Node->getOperand(5); + ConstantSDNode *Scale = dyn_cast(Node->getOperand(6)); + if (!Scale) + return 0; + + SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), + MVT::Other); + + // Memory Operands: Base, Scale, Index, Disp, Segment + SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Segment = CurDAG->getRegister(0, MVT::i32); + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + Disp, Segment, VMask, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + VTs, Ops, array_lengthof(Ops)); + // Node has 2 outputs: VDst and MVT::Other. + // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. + // We replace VDst of Node with VDst of ResNode, and Other of Node with Other + // of ResNode. + ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2)); + return ResNode; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); - + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { @@ -1953,23 +2000,82 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: break; + case Intrinsic::x86_avx2_gather_d_pd: + case Intrinsic::x86_avx2_gather_d_pd_256: + case Intrinsic::x86_avx2_gather_q_pd: + case Intrinsic::x86_avx2_gather_q_pd_256: + case Intrinsic::x86_avx2_gather_d_ps: + case Intrinsic::x86_avx2_gather_d_ps_256: + case Intrinsic::x86_avx2_gather_q_ps: + case Intrinsic::x86_avx2_gather_q_ps_256: + case Intrinsic::x86_avx2_gather_d_q: + case Intrinsic::x86_avx2_gather_d_q_256: + case Intrinsic::x86_avx2_gather_q_q: + case Intrinsic::x86_avx2_gather_q_q_256: + case Intrinsic::x86_avx2_gather_d_d: + case Intrinsic::x86_avx2_gather_d_d_256: + case Intrinsic::x86_avx2_gather_q_d: + case Intrinsic::x86_avx2_gather_q_d_256: { + unsigned Opc; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); + case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break; + case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break; + case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break; + case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break; + case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break; + case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break; + case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break; + case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break; + case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break; + case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break; + case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break; + case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break; + case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break; + case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break; + case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break; + case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break; + } + SDNode *RetVal = SelectGather(Node, Opc); + if (RetVal) + // We already called ReplaceUses inside SelectGather. + return NULL; + break; + } + } + break; + } case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); + case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); + case X86ISD::ATOMSWAP64_DAG: { + unsigned Opc; + switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); + case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; + case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; + case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; + case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; + case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; + case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; + case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; + } + SDNode *RetVal = SelectAtomic64(Node, Opc); + if (RetVal) + return RetVal; + break; + } case ISD::ATOMIC_LOAD_ADD: { SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); @@ -2013,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) break; - unsigned ShlOp, Op = 0; + unsigned ShlOp, Op; EVT CstVT = NVT; // Check the minimum bitwidth for the new constant. @@ -2036,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL32ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = X86::AND32ri8; break; case ISD::OR: Op = X86::OR32ri8; break; case ISD::XOR: Op = X86::XOR32ri8; break; @@ -2046,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL64ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; @@ -2062,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - + unsigned LoReg; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); @@ -2071,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; } - + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, N0, SDValue()).getValue(1); - + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); - + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); return NULL; } - + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -2128,7 +2236,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, - N0, SDValue()).getValue(1); + N0, SDValue()).getValue(1); if (foldedLoad) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), @@ -2168,7 +2276,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); + LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); @@ -2181,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } @@ -2332,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return NULL; } - case X86ISD::CMP: { + case X86ISD::CMP: + case X86ISD::SUB: { + // Sometimes a SUB is used to perform comparison. + if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) + // This node is not a CMP. + break; SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); @@ -2449,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // a simple increment or decrement through memory of that value, if the // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. (This also applies to + // the EFLAGS on the original DEC are used. (This also applies to // {INC,DEC}X{64,32,16,8}.) // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword @@ -2481,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - EVT LdVT = LoadNode->getMemoryVT(); + EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), @@ -2494,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPESTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + SDValue N4 = Node->getOperand(4); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast(N4); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + X86::EAX, N1, SDValue()).getValue(1); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, + N3, InFlag).getValue(1); + + SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : + X86::PCMPESTRIrr; + InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPISTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast(N2); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : + X86::PCMPISTRIrr; + SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } } SDNode *ResNode = SelectCode(Node); @@ -2521,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return true; break; } - + OutOps.push_back(Op0); OutOps.push_back(Op1); OutOps.push_back(Op2); @@ -2530,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 04299f3..ea66a61 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include +#include using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -62,41 +63,33 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, /// simple subregister reference. Idx is an index in the 128 bits we /// want. It need not be aligned to a 128-bit bounday. That makes /// lowering EXTRACT_VECTOR_ELT operations easier. -static SDValue Extract128BitVector(SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl) { +static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, DebugLoc dl) { EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); + assert(VT.is256BitVector() && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); - int Factor = VT.getSizeInBits()/128; + unsigned Factor = VT.getSizeInBits()/128; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, VT.getVectorNumElements()/Factor); // Extract from UNDEF is UNDEF. if (Vec.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::UNDEF, dl, ResultVT); - - if (isa(Idx)) { - unsigned IdxVal = cast(Idx)->getZExtValue(); - - // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR - // we can match to VEXTRACTF128. - unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); + return DAG.getUNDEF(ResultVT); - // This is the index of the first element of the 128-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) - * ElemsPerChunk); + // Extract the relevant 128 bits. Generate an EXTRACT_SUBVECTOR + // we can match to VEXTRACTF128. + unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits(); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, - VecIdx); + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) + * ElemsPerChunk); - return Result; - } + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, + VecIdx); - return SDValue(); + return Result; } /// Generate a DAG to put 128-bits into a vector > 128 bits. This @@ -104,34 +97,41 @@ static SDValue Extract128BitVector(SDValue Vec, /// simple superregister reference. Idx is an index in the 128 bits /// we want. It need not be aligned to a 128-bit bounday. That makes /// lowering INSERT_VECTOR_ELT operations easier. -static SDValue Insert128BitVector(SDValue Result, - SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, +static SDValue Insert128BitVector(SDValue Result, SDValue Vec, + unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { - if (isa(Idx)) { - EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); + // Inserting UNDEF is Result + if (Vec.getOpcode() == ISD::UNDEF) + return Result; - EVT ElVT = VT.getVectorElementType(); - unsigned IdxVal = cast(Idx)->getZExtValue(); - EVT ResultVT = Result.getValueType(); + EVT VT = Vec.getValueType(); + assert(VT.is128BitVector() && "Unexpected vector size!"); - // Insert the relevant 128 bits. - unsigned ElemsPerChunk = 128/ElVT.getSizeInBits(); + EVT ElVT = VT.getVectorElementType(); + EVT ResultVT = Result.getValueType(); - // This is the index of the first element of the 128-bit chunk - // we want. - unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) - * ElemsPerChunk); + // Insert the relevant 128 bits. + unsigned ElemsPerChunk = 128/ElVT.getSizeInBits(); - SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, - VecIdx); - return Result; - } + // This is the index of the first element of the 128-bit chunk + // we want. + unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/128) + * ElemsPerChunk); - return SDValue(); + SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, + VecIdx); +} + +/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 +/// instructions. This is used because creating CONCAT_VECTOR nodes of +/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower +/// large BUILD_VECTORS. +static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT, + unsigned NumElems, SelectionDAG &DAG, + DebugLoc dl) { + SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); + return Insert128BitVector(V, V2, NumElems/2, DAG, dl); } static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { @@ -140,10 +140,12 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { if (Subtarget->isTargetEnvMacho()) { if (is64Bit) - return new X8664_MachoTargetObjectFile(); + return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); } + if (Subtarget->isTargetLinux()) + return new X86LinuxTargetObjectFile(); if (Subtarget->isTargetELF()) return new TargetLoweringObjectFileELF(); if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) @@ -162,7 +164,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) TD = getTargetData(); // Set up the TargetLowering object. - static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; + static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); @@ -171,11 +173,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. - // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling. - if (Subtarget->is64Bit()) + // For Atom, always use ILP scheduling. + if (Subtarget->isAtom()) + setSchedulingPreference(Sched::ILP); + else if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); - else if (Subtarget->isAtom()) - setSchedulingPreference(Sched::Hybrid); else setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -215,11 +217,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Set up the register classes. - addRegisterClass(MVT::i8, X86::GR8RegisterClass); - addRegisterClass(MVT::i16, X86::GR16RegisterClass); - addRegisterClass(MVT::i32, X86::GR32RegisterClass); + addRegisterClass(MVT::i8, &X86::GR8RegClass); + addRegisterClass(MVT::i16, &X86::GR16RegClass); + addRegisterClass(MVT::i32, &X86::GR32RegClass); if (Subtarget->is64Bit()) - addRegisterClass(MVT::i64, X86::GR64RegisterClass); + addRegisterClass(MVT::i64, &X86::GR64RegClass); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -345,7 +347,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - for (unsigned i = 0, e = 4; i != e; ++i) { + for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); @@ -492,7 +494,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setShouldFoldAtomicFences(true); // Expand certain atomics - for (unsigned i = 0, e = 4; i != e; ++i) { + for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); @@ -567,8 +569,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. - addRegisterClass(MVT::f32, X86::FR32RegisterClass); - addRegisterClass(MVT::f64, X86::FR64RegisterClass); + addRegisterClass(MVT::f32, &X86::FR32RegClass); + addRegisterClass(MVT::f64, &X86::FR64RegClass); // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS , MVT::f64, Custom); @@ -599,8 +601,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. - addRegisterClass(MVT::f32, X86::FR32RegisterClass); - addRegisterClass(MVT::f64, X86::RFP64RegisterClass); + addRegisterClass(MVT::f32, &X86::FR32RegClass); + addRegisterClass(MVT::f64, &X86::RFP64RegClass); // Use ANDPS to simulate FABS. setOperationAction(ISD::FABS , MVT::f32, Custom); @@ -632,8 +634,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. // Set up the FP register classes. - addRegisterClass(MVT::f64, X86::RFP64RegisterClass); - addRegisterClass(MVT::f32, X86::RFP32RegisterClass); + addRegisterClass(MVT::f64, &X86::RFP64RegClass); + addRegisterClass(MVT::f32, &X86::RFP32RegClass); setOperationAction(ISD::UNDEF, MVT::f64, Expand); setOperationAction(ISD::UNDEF, MVT::f32, Expand); @@ -660,7 +662,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Long double always uses X87. if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f80, X86::RFP80RegisterClass); + addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); { @@ -705,8 +707,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // First set operation action for all vector types to either promote // (for widening) or expand (for scalarization). Then we will selectively // turn on ones that can be effectively codegen'd. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + for (int VT = MVT::FIRST_VECTOR_VALUETYPE; + VT <= MVT::LAST_VECTOR_VALUETYPE; ++VT) { setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); @@ -729,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); @@ -764,8 +767,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand); - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) setTruncStoreAction((MVT::SimpleValueType)VT, (MVT::SimpleValueType)InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); @@ -776,7 +779,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) { - addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass); + addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); // No operations on x86mmx supported, everything uses intrinsics. } @@ -813,7 +816,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) { - addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); + addRegisterClass(MVT::v4f32, &X86::VR128RegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); @@ -826,18 +829,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { - addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); + addRegisterClass(MVT::v2f64, &X86::VR128RegClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM // registers cannot be used even for integer operations. - addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); - addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); - addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); - addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); + addRegisterClass(MVT::v16i8, &X86::VR128RegClass); + addRegisterClass(MVT::v8i16, &X86::VR128RegClass); + addRegisterClass(MVT::v4i32, &X86::VR128RegClass); + addRegisterClass(MVT::v2i64, &X86::VR128RegClass); setOperationAction(ISD::ADD, MVT::v16i8, Legal); setOperationAction(ISD::ADD, MVT::v8i16, Legal); @@ -867,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { - EVT VT = (MVT::SimpleValueType)i; + for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; // Do not attempt to custom lower non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, - VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); @@ -903,24 +896,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v2i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v2i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1007,16 +999,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42()) - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { - addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); - addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); - addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); - addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); - addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); - addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); + addRegisterClass(MVT::v32i8, &X86::VR256RegClass); + addRegisterClass(MVT::v16i16, &X86::VR256RegClass); + addRegisterClass(MVT::v8i32, &X86::VR256RegClass); + addRegisterClass(MVT::v8f32, &X86::VR256RegClass); + addRegisterClass(MVT::v4i64, &X86::VR256RegClass); + addRegisterClass(MVT::v4f64, &X86::VR256RegClass); setOperationAction(ISD::LOAD, MVT::v8f32, Legal); setOperationAction(ISD::LOAD, MVT::v4f64, Legal); @@ -1040,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); - setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); @@ -1070,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + if (Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::v8f32, Custom); + setOperationAction(ISD::FMA, MVT::v4f64, Custom); + setOperationAction(ISD::FMA, MVT::v4f32, Custom); + setOperationAction(ISD::FMA, MVT::v2f64, Custom); + setOperationAction(ISD::FMA, MVT::f32, Custom); + setOperationAction(ISD::FMA, MVT::f64, Custom); + } + if (Subtarget->hasAVX2()) { setOperationAction(ISD::ADD, MVT::v4i64, Legal); setOperationAction(ISD::ADD, MVT::v8i32, Legal); @@ -1121,60 +1112,60 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Custom lower several nodes for 256-bit types. - for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + for (int i = MVT::FIRST_VECTOR_VALUETYPE; + i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. if (VT.is128BitVector()) - setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v4i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v4i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); } } // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion // of this type with custom code. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) { + for (int VT = MVT::FIRST_VECTOR_VALUETYPE; + VT != MVT::LAST_VECTOR_VALUETYPE; VT++) { setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom); } // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't @@ -1218,17 +1209,21 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); + setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::FP_TO_SINT); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasBMI()) - setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::XOR); computeRegisterProperties(); @@ -1243,6 +1238,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setPrefLoopAlignment(4); // 2^4 bytes. benefitFromCodePlacementOpt = true; + // Predictable cmov don't hurt on atom because it's in-order. + predictableSelectIsExpensive = !Subtarget->isAtom(); + setPrefFunctionAlignment(4); // 2^4 bytes. } @@ -1276,7 +1274,6 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) { break; } } - return; } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -1411,18 +1408,19 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ default: return TargetLowering::findRepresentativeClass(VT); case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: - RRC = (Subtarget->is64Bit() - ? X86::GR64RegisterClass : X86::GR32RegisterClass); + RRC = Subtarget->is64Bit() ? + (const TargetRegisterClass*)&X86::GR64RegClass : + (const TargetRegisterClass*)&X86::GR32RegClass; break; case MVT::x86mmx: - RRC = X86::VR64RegisterClass; + RRC = &X86::VR64RegClass; break; case MVT::f32: case MVT::f64: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: - RRC = X86::VR128RegisterClass; + RRC = &X86::VR128RegClass; break; } return std::make_pair(RRC, Cost); @@ -1457,7 +1455,7 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, + MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; @@ -1501,6 +1499,16 @@ X86TargetLowering::LowerReturn(SDValue Chain, SDValue ValToCopy = OutVals[i]; EVT ValVT = ValToCopy.getValueType(); + // Promote values to the appropriate types + if (VA.getLocInfo() == CCValAssign::SExt) + ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); + else if (VA.getLocInfo() == CCValAssign::AExt) + ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); + else if (VA.getLocInfo() == CCValAssign::BCvt) + ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); + // If this is x86-64, and we disabled SSE, we can't return FP values, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || @@ -1638,7 +1646,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector RVLocs; bool Is64Bit = Subtarget->is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. @@ -1655,7 +1663,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue Val; // If this is a call to a function that returns an fp value on the floating - // point stack, we must guarantee the the value is popped from the stack, so + // point stack, we must guarantee the value is popped from the stack, so // a CopyFromReg is not good enough - the copy instruction may be eliminated // if the return value is not used. We use the FpPOP_RETVAL instruction // instead. @@ -1699,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -static bool CallIsStructReturn(const SmallVectorImpl &Outs) { +enum StructReturnType { + NotStructReturn, + RegStructReturn, + StackStructReturn +}; +static StructReturnType +callIsStructReturn(const SmallVectorImpl &Outs) { if (Outs.empty()) - return false; + return NotStructReturn; - return Outs[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Outs[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// ArgsAreStructReturn - Determines whether a function uses struct /// return semantics. -static bool -ArgsAreStructReturn(const SmallVectorImpl &Ins) { +static StructReturnType +argsAreStructReturn(const SmallVectorImpl &Ins) { if (Ins.empty()) - return false; + return NotStructReturn; - return Ins[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Ins[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified @@ -1850,19 +1874,19 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, EVT RegVT = VA.getLocVT(); const TargetRegisterClass *RC; if (RegVT == MVT::i32) - RC = X86::GR32RegisterClass; + RC = &X86::GR32RegClass; else if (Is64Bit && RegVT == MVT::i64) - RC = X86::GR64RegisterClass; + RC = &X86::GR64RegClass; else if (RegVT == MVT::f32) - RC = X86::FR32RegisterClass; + RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) - RC = X86::FR64RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 256) - RC = X86::VR256RegisterClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) - RC = X86::VR128RegisterClass; + RC = &X86::FR64RegClass; + else if (RegVT.is256BitVector()) + RC = &X86::VR256RegClass; + else if (RegVT.is128BitVector()) + RC = &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) - RC = X86::VR64RegisterClass; + RC = &X86::VR64RegClass; else llvm_unreachable("Unknown argument type!"); @@ -2004,7 +2028,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass); + &X86::GR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2020,7 +2044,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); + unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -2031,7 +2055,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass); + &X86::VR128RegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2054,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - ArgsAreStructReturn(Ins)) + argsAreStructReturn(Ins) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -2127,19 +2151,24 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, } SDValue -X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + bool &isTailCall = CLI.IsTailCall; + bool isVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); bool IsWindows = Subtarget->isTargetWindows(); - bool IsStructRet = CallIsStructReturn(Outs); + StructReturnType SR = callIsStructReturn(Outs); bool IsSibcall = false; if (MF.getTarget().Options.DisableTailCalls) @@ -2148,8 +2177,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + isVarArg, SR != NotStructReturn, + MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -2231,7 +2261,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { + if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); @@ -2282,27 +2312,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into registers. - SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (!isTailCall) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc(), getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::EBX), + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()))); } else { // If we are tail calling and generating PIC/GOT style code load the // address of the callee into ECX. The value in ecx is used as target of @@ -2340,12 +2355,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, assert((Subtarget->hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, - DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); - InFlag = Chain.getValue(1); + RegsToPass.push_back(std::make_pair(unsigned(X86::AL), + DAG.getConstant(NumXMMRegs, MVT::i8))); } - // For tail calls lower the arguments to the 'real' stack slot. if (isTailCall) { // Force all the incoming stack arguments to be loaded from the stack @@ -2359,8 +2372,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); if (getTargetMachine().Options.GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2400,19 +2411,20 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains2[0], MemOpChains2.size()); - // Copy arguments to their registers. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag =SDValue(); - // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, FPDiff, dl); } + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into registers. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2514,14 +2526,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - // Add an implicit use GOT pointer in EBX. - if (!isTailCall && Subtarget->isPICStyleGOT()) - Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); - - // Add an implicit use of AL for non-Windows x86 64-bit vararg functions. - if (Is64Bit && isVarArg && !IsWin64) - Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -2551,7 +2555,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - IsStructRet) + SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2743,7 +2747,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_X86); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) @@ -2764,7 +2768,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Unused) { SmallVector RVLocs; CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; @@ -2778,12 +2782,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (!CCMatch) { SmallVector RVLocs1; CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext()); + getTargetMachine(), RVLocs1, *DAG.getContext()); CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); SmallVector RVLocs2; CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext()); + getTargetMachine(), RVLocs2, *DAG.getContext()); CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); if (RVLocs1.size() != RVLocs2.size()) @@ -2810,7 +2814,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // argument is passed on the stack. SmallVector ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 if (Subtarget->isTargetWin64()) { @@ -2872,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } FastISel * -X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return X86::createFastISel(funcInfo); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return X86::createFastISel(funcInfo, libInfo); } @@ -2911,6 +2916,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::UNPCKH: case X86ISD::VPERMILP: case X86ISD::VPERM2X128: + case X86ISD::VPERMI: return true; } } @@ -3051,10 +3057,12 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, // X > -1 -> X == 0, jump !sign. RHS = DAG.getConstant(0, RHS.getValueType()); return X86::COND_NS; - } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { + } + if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { // X < 0 -> X == 0, jump on sign. return X86::COND_S; - } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { + } + if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { // X < 1 -> X <= 0 RHS = DAG.getConstant(0, RHS.getValueType()); return X86::COND_LE; @@ -3170,12 +3178,12 @@ static bool isUndefOrEqual(int Val, int CmpVal) { return false; } -/// isSequentialOrUndefInRange - Return true if every element in Mask, begining +/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size, falls within the specified /// sequential range (L, L+Pos]. or is undef. static bool isSequentialOrUndefInRange(ArrayRef Mask, - int Pos, int Size, int Low) { - for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (!isUndefOrEqual(Mask[i], Low)) return false; return true; @@ -3194,8 +3202,8 @@ static bool isPSHUFDMask(ArrayRef Mask, EVT VT) { /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(ArrayRef Mask, EVT VT) { - if (VT != MVT::v8i16) +static bool isPSHUFHWMask(ArrayRef Mask, EVT VT, bool HasAVX2) { + if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16)) return false; // Lower quadword copied in order or undef. @@ -3204,16 +3212,27 @@ static bool isPSHUFHWMask(ArrayRef Mask, EVT VT) { // Upper quadword shuffled. for (unsigned i = 4; i != 8; ++i) - if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) + if (!isUndefOrInRange(Mask[i], 4, 8)) return false; + if (VT == MVT::v16i16) { + // Lower quadword copied in order or undef. + if (!isSequentialOrUndefInRange(Mask, 8, 4, 8)) + return false; + + // Upper quadword shuffled. + for (unsigned i = 12; i != 16; ++i) + if (!isUndefOrInRange(Mask[i], 12, 16)) + return false; + } + return true; } /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(ArrayRef Mask, EVT VT) { - if (VT != MVT::v8i16) +static bool isPSHUFLWMask(ArrayRef Mask, EVT VT, bool HasAVX2) { + if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16)) return false; // Upper quadword copied in order. @@ -3222,9 +3241,20 @@ static bool isPSHUFLWMask(ArrayRef Mask, EVT VT) { // Lower quadword shuffled. for (unsigned i = 0; i != 4; ++i) - if (Mask[i] >= 4) + if (!isUndefOrInRange(Mask[i], 0, 4)) + return false; + + if (VT == MVT::v16i16) { + // Upper quadword copied in order. + if (!isSequentialOrUndefInRange(Mask, 12, 4, 12)) return false; + // Lower quadword shuffled. + for (unsigned i = 8; i != 12; ++i) + if (!isUndefOrInRange(Mask[i], 8, 12)) + return false; + } + return true; } @@ -3374,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef Mask, EVT VT, bool HasAVX, /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. static bool isMOVHLPSMask(ArrayRef Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3393,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef Mask, EVT VT) { /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> static bool isMOVHLPS_v_undef_Mask(ArrayRef Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3410,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef Mask, EVT VT) { /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. static bool isMOVLPMask(ArrayRef Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3418,11 +3448,11 @@ static bool isMOVLPMask(ArrayRef Mask, EVT VT) { if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0; i != NumElems/2; ++i) + for (unsigned i = 0, e = NumElems/2; i != e; ++i) if (!isUndefOrEqual(Mask[i], i + NumElems)) return false; - for (unsigned i = NumElems/2; i != NumElems; ++i) + for (unsigned i = NumElems/2, e = NumElems; i != e; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; @@ -3432,23 +3462,71 @@ static bool isMOVLPMask(ArrayRef Mask, EVT VT) { /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLHPS. static bool isMOVLHPSMask(ArrayRef Mask, EVT VT) { + if (!VT.is128BitVector()) + return false; + unsigned NumElems = VT.getVectorNumElements(); - if ((NumElems != 2 && NumElems != 4) - || VT.getSizeInBits() > 128) + if (NumElems != 2 && NumElems != 4) return false; - for (unsigned i = 0; i != NumElems/2; ++i) + for (unsigned i = 0, e = NumElems/2; i != e; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = 0; i != NumElems/2; ++i) - if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems)) + for (unsigned i = 0, e = NumElems/2; i != e; ++i) + if (!isUndefOrEqual(Mask[i + e], i + NumElems)) return false; return true; } +// +// Some special combinations that can be optimized. +// +static +SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp, + SelectionDAG &DAG) { + EVT VT = SVOp->getValueType(0); + DebugLoc dl = SVOp->getDebugLoc(); + + if (VT != MVT::v8i32 && VT != MVT::v8f32) + return SDValue(); + + ArrayRef Mask = SVOp->getMask(); + + // These are the special masks that may be optimized. + static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14}; + static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15}; + bool MatchEvenMask = true; + bool MatchOddMask = true; + for (int i=0; i<8; ++i) { + if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i])) + MatchEvenMask = false; + if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i])) + MatchOddMask = false; + } + static const int CompactionMaskEven[] = {0, 2, -1, -1, 4, 6, -1, -1}; + static const int CompactionMaskOdd [] = {1, 3, -1, -1, 5, 7, -1, -1}; + + const int *CompactionMask; + if (MatchEvenMask) + CompactionMask = CompactionMaskEven; + else if (MatchOddMask) + CompactionMask = CompactionMaskOdd; + else + return SDValue(); + + SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT); + + SDValue Op0 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(0), + UndefNode, CompactionMask); + SDValue Op1 = DAG.getVectorShuffle(VT, dl, SVOp->getOperand(1), + UndefNode, CompactionMask); + static const int UnpackMask[] = {0, 8, 1, 9, 4, 12, 5, 13}; + return DAG.getVectorShuffle(VT, dl, Op0, Op1, UnpackMask); +} + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(ArrayRef Mask, EVT VT, @@ -3606,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, EVT VT, bool HasAVX2) { static bool isMOVLMask(ArrayRef Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3628,7 +3706,7 @@ static bool isMOVLMask(ArrayRef Mask, EVT VT) { /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. static bool isVPERM2X128Mask(ArrayRef Mask, EVT VT, bool HasAVX) { - if (!HasAVX || VT.getSizeInBits() != 256) + if (!HasAVX || !VT.is256BitVector()) return false; // The shuffle result is divided into half A and half B. In total the two @@ -3720,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef Mask, EVT VT, bool HasAVX) { /// element of vector 2 and the other elements to come from vector 1 in order. static bool isCommutedMOVLMask(ArrayRef Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { - unsigned NumOps = VT.getVectorNumElements(); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; + + unsigned NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; @@ -3788,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef Mask, EVT VT, /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. static bool isMOVDDUPYMask(ArrayRef Mask, EVT VT, bool HasAVX) { - unsigned NumElts = VT.getVectorNumElements(); + if (!HasAVX || !VT.is256BitVector()) + return false; - if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts != 4) return false; for (unsigned i = 0; i != NumElts/2; ++i) @@ -3806,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef Mask, EVT VT, bool HasAVX) { /// specifies a shuffle of elements that is suitable for input to 128-bit /// version of MOVDDUP. static bool isMOVDDUPMask(ArrayRef Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned e = VT.getVectorNumElements() / 2; @@ -3880,9 +3961,8 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Elt = N->getMaskElt(i); if (Elt < 0) continue; - Elt %= NumLaneElts; - unsigned ShAmt = i << Shift; - if (ShAmt >= 8) ShAmt -= 8; + Elt &= NumLaneElts - 1; + unsigned ShAmt = (i << Shift) % 8; Mask |= Elt << ShAmt; } @@ -3892,30 +3972,48 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) { /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + assert((VT == MVT::v8i16 || VT == MVT::v16i16) && + "Unsupported vector type for PSHUFHW"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned Mask = 0; - // 8 nodes, but we only care about the last 4. - for (unsigned i = 7; i >= 4; --i) { - int Val = N->getMaskElt(i); - if (Val >= 0) - Mask |= (Val - 4); - if (i != 4) - Mask <<= 2; + for (unsigned l = 0; l != NumElts; l += 8) { + // 8 nodes per lane, but we only care about the last 4. + for (unsigned i = 0; i < 4; ++i) { + int Elt = N->getMaskElt(l+i+4); + if (Elt < 0) continue; + Elt &= 0x3; // only 2-bits. + Mask |= Elt << (i * 2); + } } + return Mask; } /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + assert((VT == MVT::v8i16 || VT == MVT::v16i16) && + "Unsupported vector type for PSHUFHW"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned Mask = 0; - // 8 nodes, but we only care about the first 4. - for (int i = 3; i >= 0; --i) { - int Val = N->getMaskElt(i); - if (Val >= 0) - Mask |= Val; - if (i != 0) - Mask <<= 2; + for (unsigned l = 0; l != NumElts; l += 8) { + // 8 nodes per lane, but we only care about the first 4. + for (unsigned i = 0; i < 4; ++i) { + int Elt = N->getMaskElt(l+i); + if (Elt < 0) continue; + Elt &= 0x3; // only 2-bits + Mask |= Elt << (i * 2); + } } + return Mask; } @@ -4016,13 +4114,14 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SmallVector MaskVec; for (unsigned i = 0; i != NumElems; ++i) { - int idx = SVOp->getMaskElt(i); - if (idx < 0) - MaskVec.push_back(idx); - else if (idx < (int)NumElems) - MaskVec.push_back(idx + NumElems); - else - MaskVec.push_back(idx - NumElems); + int Idx = SVOp->getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElems) + Idx += NumElems; + else + Idx -= NumElems; + } + MaskVec.push_back(Idx); } return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), SVOp->getOperand(0), &MaskVec[0]); @@ -4033,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). static bool ShouldXformToMOVHLPS(ArrayRef Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (VT.getVectorNumElements() != 4) return false; @@ -4090,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) { /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, ArrayRef Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) @@ -4107,7 +4206,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, for (unsigned i = 0, e = NumElems/2; i != e; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; - for (unsigned i = NumElems/2; i != NumElems; ++i) + for (unsigned i = NumElems/2, e = NumElems; i != e; ++i) if (!isUndefOrEqual(Mask[i], i+NumElems)) return false; return true; @@ -4159,11 +4258,12 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); + unsigned Size = VT.getSizeInBits(); // Always build SSE zero vectors as <4 x i32> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - if (VT.getSizeInBits() == 128) { // SSE + if (Size == 128) { // SSE if (Subtarget->hasSSE2()) { // SSE2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); @@ -4171,7 +4271,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } - } else if (VT.getSizeInBits() == 256) { // AVX + } else if (Size == 256) { // AVX if (Subtarget->hasAVX2()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; @@ -4183,7 +4283,9 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); } - } + } else + llvm_unreachable("Unexpected vector type"); + return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -4194,25 +4296,22 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - assert((VT.is128BitVector() || VT.is256BitVector()) - && "Expected a 128-bit or 256-bit vector type"); + unsigned Size = VT.getSizeInBits(); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); SDValue Vec; - if (VT.getSizeInBits() == 256) { + if (Size == 256) { if (HasAVX2) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), - Vec, DAG.getConstant(0, MVT::i32), DAG, dl); - Vec = Insert128BitVector(InsV, Vec, - DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); } - } else { + } else if (Size == 128) { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); - } + } else + llvm_unreachable("Unexpected vector type"); return DAG.getNode(ISD::BITCAST, dl, VT, Vec); } @@ -4255,9 +4354,8 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); - unsigned Half = NumElems/2; SmallVector Mask; - for (unsigned i = 0; i != Half; ++i) { + for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) { Mask.push_back(i + Half); Mask.push_back(i + NumElems + Half); } @@ -4289,15 +4387,14 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) { static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { EVT VT = V.getValueType(); DebugLoc dl = V.getDebugLoc(); - assert((VT.getSizeInBits() == 128 || VT.getSizeInBits() == 256) - && "Vector size not supported"); + unsigned Size = VT.getSizeInBits(); - if (VT.getSizeInBits() == 128) { + if (Size == 128) { V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V); int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32), &SplatMask[0]); - } else { + } else if (Size == 256) { // To use VPERMILPS to splat scalars, the second half of indicies must // refer to the higher part, which is a duplication of the lower one, // because VPERMILPS can only handle in-lane permutations. @@ -4307,7 +4404,8 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) { V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V); V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32), &SplatMask[0]); - } + } else + llvm_unreachable("Vector size not supported"); return DAG.getNode(ISD::BITCAST, dl, VT, V); } @@ -4328,9 +4426,8 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // Extract the 128-bit part containing the splat element and update // the splat element index when it refers to the higher register. if (Size == 256) { - unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0; - V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl); - if (Idx > 0) + V1 = Extract128BitVector(V1, EltNo, DAG, dl); + if (EltNo >= NumElems/2) EltNo -= NumElems/2; } @@ -4346,10 +4443,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) { // into the low and high part. This is necessary because we want // to use VPERM* to shuffle the vectors if (Size == 256) { - SDValue InsV = Insert128BitVector(DAG.getUNDEF(SrcVT), V1, - DAG.getConstant(0, MVT::i32), DAG, dl); - V1 = Insert128BitVector(InsV, V1, - DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1); } return getLegalSplat(DAG, V1, EltNo); @@ -4377,7 +4471,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// getTargetShuffleMask - Calculates the shuffle mask corresponding to the /// target specific opcode. Returns true if the Mask could be calculated. /// Sets IsUnary to true if only uses one source. -static bool getTargetShuffleMask(SDNode *N, EVT VT, +static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVectorImpl &Mask, bool &IsUnary) { unsigned NumElems = VT.getVectorNumElements(); SDValue ImmN; @@ -4408,12 +4502,17 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, break; case X86ISD::PSHUFHW: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFHWMask(cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFHWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFLW: ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFLWMask(cast(ImmN)->getZExtValue(), Mask); + DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::VPERMI: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERMMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: @@ -4473,20 +4572,21 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { - unsigned NumElems = VT.getVectorNumElements(); + MVT ShufVT = V.getValueType().getSimpleVT(); + unsigned NumElems = ShufVT.getVectorNumElements(); SmallVector ShuffleMask; SDValue ImmN; bool IsUnary; - if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary)) + if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary)) return SDValue(); int Elt = ShuffleMask[Index]; if (Elt < 0) - return DAG.getUNDEF(VT.getVectorElementType()); + return DAG.getUNDEF(ShufVT.getVectorElementType()); SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0) - : N->getOperand(1); + : N->getOperand(1); return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } @@ -4631,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { // Although the logic below support any bitwidth size, there are no // shift instructions which handle more than 128-bit vectors. - if (SVOp->getValueType(0).getSizeInBits() > 128) + if (!SVOp->getValueType(0).is128BitVector()) return false; if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || @@ -4726,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); + assert(VT.is128BitVector() && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); @@ -4794,7 +4894,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); int EltNo = (Offset - StartOffset) >> 2; - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, @@ -4802,7 +4902,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, false, false, false, 0); SmallVector Mask; - for (int i = 0; i < NumElems; ++i) + for (unsigned i = 0; i != NumElems; ++i) Mask.push_back(EltNo); return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]); @@ -4866,8 +4966,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->isInvariant(), LDBase->getAlignment()); - } else if (NumElems == 4 && LastLoadedElt == 1 && - DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { + } + if (NumElems == 4 && LastLoadedElt == 1 && + DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = @@ -4896,6 +4997,9 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for broadcast."); + SDValue Ld; bool ConstSplatVal; @@ -4930,8 +5034,17 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { return SDValue(); SDValue Sc = Op.getOperand(0); - if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR) - return SDValue(); + if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR && + Sc.getOpcode() != ISD::BUILD_VECTOR) { + + if (!Subtarget->hasAVX2()) + return SDValue(); + + // Use the register form of the broadcast instruction available on AVX2. + if (VT.is256BitVector()) + Sc = Extract128BitVector(Sc, 0, DAG, dl); + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc); + } Ld = Sc.getOperand(0); ConstSplatVal = (Ld.getOpcode() == ISD::Constant || @@ -4946,8 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { } } - bool Is256 = VT.getSizeInBits() == 256; - bool Is128 = VT.getSizeInBits() == 128; + bool Is256 = VT.is256BitVector(); // Handle the broadcasting a single constant scalar from the constant pool // into a vector. On Sandybridge it is still better to load a constant vector @@ -4957,9 +5069,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { assert(!CVT.isVector() && "Must not broadcast a vector type"); unsigned ScalarSize = CVT.getSizeInBits(); - if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) || - (Is128 && (ScalarSize == 32))) { - + if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) { const Constant *C = 0; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); @@ -4971,40 +5081,32 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { SDValue CP = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast(CP)->getAlignment(); Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } } - // The scalar source must be a normal load. - if (!ISD::isNormalLoad(Ld.getNode())) - return SDValue(); - - // Reject loads that have uses of the chain result - if (Ld->hasAnyUseOfValue(1)) - return SDValue(); - + bool IsLoad = ISD::isNormalLoad(Ld.getNode()); unsigned ScalarSize = Ld.getValueType().getSizeInBits(); - // VBroadcast to YMM - if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) + // Handle AVX2 in-register broadcasts. + if (!IsLoad && Subtarget->hasAVX2() && + (ScalarSize == 32 || (Is256 && ScalarSize == 64))) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); - // VBroadcast to XMM - if (Is128 && (ScalarSize == 32)) + // The scalar source must be a normal load. + if (!IsLoad) + return SDValue(); + + if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match - // double since there is vbroadcastsd xmm + // double since there is no vbroadcastsd xmm if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) { - // VBroadcast to YMM - if (Is256 && (ScalarSize == 8 || ScalarSize == 16)) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); - - // VBroadcast to XMM - if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) + if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } @@ -5102,8 +5204,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { Mask.push_back(Idx); for (unsigned i = 1; i != VecElts; ++i) Mask.push_back(i); - Item = DAG.getVectorShuffle(VecVT, dl, Item, - DAG.getUNDEF(Item.getValueType()), + Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT), &Mask[0]); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5120,12 +5221,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -5134,12 +5235,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); - Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32), - DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); } else { - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5171,7 +5271,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Turn it into a shuffle of zero and zero-extended scalar to vector. Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG); SmallVector MaskVec; - for (unsigned i = 0; i < NumElems; i++) + for (unsigned i = 0; i != NumElems; ++i) MaskVec.push_back(i == Idx ? 0 : 1); return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); } @@ -5199,7 +5299,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SmallVector V; for (unsigned i = 0; i != NumElems; ++i) V.push_back(Op.getOperand(i)); @@ -5212,10 +5312,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { NumElems/2); // Recreate the wider vector with the lower and upper part. - SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Lower, - DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(Vec, Upper, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl); } // Let legalizer expand 2-wide build_vectors. @@ -5283,7 +5380,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } - if (Values.size() > 1 && VT.getSizeInBits() == 128) { + if (Values.size() > 1 && VT.is128BitVector()) { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); @@ -5344,62 +5441,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place -// them in a MMX register. This is better than doing a stack convert. -static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); - - assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || - ResVT == MVT::v8i16 || ResVT == MVT::v16i8); - int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); - SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - InVec = Op.getOperand(1); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, - InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); - } else { - InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); - SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - Mask[0] = 0; Mask[1] = 2; - VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); - } - return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); -} - // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT ResVT = Op.getValueType(); - assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide"); + assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, ResVT), V1, - DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } SDValue X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - EVT ResVT = Op.getValueType(); - assert(Op.getNumOperands() == 2); - assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) && - "Unsupported CONCAT_VECTORS for value type"); - - // We support concatenate two MMX registers and place them in a MMX register. - // This is better than doing a stack convert. - if (ResVT.is128BitVector()) - return LowerMMXCONCAT_VECTORS(Op, DAG); // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. @@ -5407,75 +5466,64 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { } // Try to lower a shuffle node into a simple blend instruction. -static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, +static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - EVT VT = Op.getValueType(); - EVT InVT = V1.getValueType(); - int MaskSize = VT.getVectorNumElements(); - int InSize = InVT.getVectorNumElements(); + MVT VT = SVOp->getValueType(0).getSimpleVT(); + unsigned NumElems = VT.getVectorNumElements(); if (!Subtarget->hasSSE41()) return SDValue(); - if (MaskSize != InSize) - return SDValue(); - - int ISDNo = 0; + unsigned ISDNo = 0; MVT OpTy; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return SDValue(); case MVT::v8i16: - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v8i16; - break; + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; case MVT::v4i32: case MVT::v4f32: - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v4f32; - break; + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; case MVT::v2i64: case MVT::v2f64: - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v2f64; - break; + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; case MVT::v8i32: case MVT::v8f32: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPS; - OpTy = MVT::v8f32; - break; + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; case MVT::v4i64: case MVT::v4f64: - if (!Subtarget->hasAVX()) - return SDValue(); - ISDNo = X86ISD::BLENDPD; - OpTy = MVT::v4f64; - break; - case MVT::v16i16: - if (!Subtarget->hasAVX2()) - return SDValue(); - ISDNo = X86ISD::BLENDPW; - OpTy = MVT::v16i16; - break; + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; } assert(ISDNo && "Invalid Op Number"); unsigned MaskVals = 0; - for (int i = 0; i < MaskSize; ++i) { + for (unsigned i = 0; i != NumElems; ++i) { int EltIdx = SVOp->getMaskElt(i); - if (EltIdx == i || EltIdx == -1) + if (EltIdx == (int)i || EltIdx < 0) MaskVals |= (1<= 16)) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } - pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); + int Idx0 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx; + int Idx1 = (TwoInputs && (EltIdx >= 16)) ? 0x80 : EltIdx+1; + pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8)); } V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1); V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, @@ -5649,13 +5694,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, pshufbMask.clear(); for (unsigned i = 0; i != 8; ++i) { int EltIdx = MaskVals[i] * 2; - if (EltIdx < 16) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } - pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); + int Idx0 = (EltIdx < 16) ? 0x80 : EltIdx - 16; + int Idx1 = (EltIdx < 16) ? 0x80 : EltIdx - 15; + pshufbMask.push_back(DAG.getConstant(Idx0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(Idx1, MVT::i8)); } V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2); V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, @@ -5731,10 +5773,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, int EltIdx = MaskVals[i]; if (EltIdx < 0) continue; - SDValue ExtOp = (EltIdx < 8) - ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1, - DAG.getIntPtrConstant(EltIdx)) - : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2, + SDValue ExtOp = (EltIdx < 8) ? + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1, + DAG.getIntPtrConstant(EltIdx)) : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2, DAG.getIntPtrConstant(EltIdx - 8)); NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp, DAG.getIntPtrConstant(i)); @@ -5755,21 +5797,11 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); ArrayRef MaskVals = SVOp->getMask(); + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + // If we have SSSE3, case 1 is generated when all result bytes come from // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. - // FIXME: kill V2Only once shuffles are canonizalized by getNode. - bool V1Only = true; - bool V2Only = true; - for (unsigned i = 0; i < 16; ++i) { - int EltIdx = MaskVals[i]; - if (EltIdx < 0) - continue; - if (EltIdx < 16) - V2Only = false; - else - V1Only = false; - } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. if (TLI.getSubtarget()->hasSSSE3()) { @@ -5781,23 +5813,16 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // Otherwise, we have elements from both input vectors, and must zero out // elements that come from V2 in the first mask, and V1 in the second mask // so that we can OR them together. - bool TwoInputs = !(V1Only || V2Only); for (unsigned i = 0; i != 16; ++i) { int EltIdx = MaskVals[i]; - if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } + if (EltIdx < 0 || EltIdx >= 16) + EltIdx = 0x80; pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); } - // If all the elements are from V2, assign it to V1 and return after - // building the first pshufb. - if (V2Only) - V1 = V2; V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); - if (!TwoInputs) + if (V2IsUndef) return V1; // Calculate the shuffle mask for the second input, shuffle it, and @@ -5805,11 +5830,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, pshufbMask.clear(); for (unsigned i = 0; i != 16; ++i) { int EltIdx = MaskVals[i]; - if (EltIdx < 16) { - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); - continue; - } - pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); + EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16; + pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); } V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, @@ -5822,7 +5844,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, // the 16 different words that comprise the two doublequadword input vectors. V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); - SDValue NewV = V2Only ? V2 : V1; + SDValue NewV = V1; for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; int Elt1 = MaskVals[i*2+1]; @@ -5832,9 +5854,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, continue; // This word of the result is already in the correct place, skip it. - if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) - continue; - if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) + if ((Elt0 == i*2) && (Elt1 == i*2+1)) continue; SDValue Elt0Src = Elt0 < 16 ? V1 : V2; @@ -5896,41 +5916,37 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, DebugLoc dl) { - EVT VT = SVOp->getValueType(0); - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); + MVT VT = SVOp->getValueType(0).getSimpleVT(); unsigned NumElems = VT.getVectorNumElements(); - unsigned NewWidth = (NumElems == 4) ? 2 : 4; - EVT NewVT; - switch (VT.getSimpleVT().SimpleTy) { + MVT NewVT; + unsigned Scale; + switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected!"); - case MVT::v4f32: NewVT = MVT::v2f64; break; - case MVT::v4i32: NewVT = MVT::v2i64; break; - case MVT::v8i16: NewVT = MVT::v4i32; break; - case MVT::v16i8: NewVT = MVT::v4i32; break; + case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break; + case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break; + case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break; + case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break; + case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break; + case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break; } - int Scale = NumElems / NewWidth; SmallVector MaskVec; - for (unsigned i = 0; i < NumElems; i += Scale) { + for (unsigned i = 0; i != NumElems; i += Scale) { int StartIdx = -1; - for (int j = 0; j < Scale; ++j) { + for (unsigned j = 0; j != Scale; ++j) { int EltIdx = SVOp->getMaskElt(i+j); if (EltIdx < 0) continue; - if (StartIdx == -1) - StartIdx = EltIdx - (EltIdx % Scale); - if (EltIdx != StartIdx + j) + if (StartIdx < 0) + StartIdx = (EltIdx / Scale); + if (EltIdx != (int)(StartIdx*Scale + j)) return SDValue(); } - if (StartIdx == -1) - MaskVec.push_back(-1); - else - MaskVec.push_back(StartIdx / Scale); + MaskVec.push_back(StartIdx); } - V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); - V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); + SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0)); + SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1)); return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); } @@ -5973,6 +5989,11 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT, /// which could not be matched by any known target speficic shuffle static SDValue LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { + + SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG); + if (NewOp.getNode()) + return NewOp; + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); @@ -5981,14 +6002,15 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { DebugLoc dl = SVOp->getDebugLoc(); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); - SDValue Shufs[2]; + SDValue Output[2]; SmallVector Mask; for (unsigned l = 0; l < 2; ++l) { // Build a shuffle mask for the output, discovering on the fly which // input vectors to use as shuffle operands (recorded in InputUsed). // If building a suitable shuffle vector proves too hard, then bail - // out with useBuildVector set. + // out with UseBuildVector set. + bool UseBuildVector = false; int InputUsed[2] = { -1, -1 }; // Not yet discovered. unsigned LaneStart = l * NumLaneElems; for (unsigned i = 0; i != NumLaneElems; ++i) { @@ -6020,38 +6042,61 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } if (OpNo >= array_lengthof(InputUsed)) { - // More than two input vectors used! Give up. - return SDValue(); + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + UseBuildVector = true; + break; } // Add the mask index for the new shuffle vector. Mask.push_back(Idx + OpNo * NumLaneElems); } - if (InputUsed[0] < 0) { + if (UseBuildVector) { + SmallVector SVOps; + for (unsigned i = 0; i != NumLaneElems; ++i) { + // The mask element. This indexes into the input. + int Idx = SVOp->getMaskElt(i+LaneStart); + if (Idx < 0) { + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // The input vector this mask element indexes into. + int Input = Idx / NumElems; + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NumElems; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + SVOp->getOperand(Input), + DAG.getIntPtrConstant(Idx))); + } + + // Construct the output using a BUILD_VECTOR. + Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0], + SVOps.size()); + } else if (InputUsed[0] < 0) { // No input vectors were used! The result is undefined. - Shufs[l] = DAG.getUNDEF(NVT); + Output[l] = DAG.getUNDEF(NVT); } else { SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2), - DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32), - DAG, dl); + (InputUsed[0] % 2) * NumLaneElems, + DAG, dl); // If only one input was used, use an undefined vector for the other. SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) : Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2), - DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32), - DAG, dl); + (InputUsed[1] % 2) * NumLaneElems, DAG, dl); // At least one input vector was used. Create a new shuffle vector. - Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]); + Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]); } Mask.clear(); } // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0], - DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32), - DAG, dl); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]); } /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with @@ -6063,7 +6108,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { DebugLoc dl = SVOp->getDebugLoc(); EVT VT = SVOp->getValueType(0); - assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); + assert(VT.is128BitVector() && "Unsupported vector size"); std::pair Locs[4]; int Mask1[] = { -1, -1, -1, -1 }; @@ -6107,7 +6152,9 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); - } else if (NumLo == 3 || NumHi == 3) { + } + + if (NumLo == 3 || NumHi == 3) { // Otherwise, we must have three elements from one vector, call it X, and // one element from the other, call it Y. First, use a shufps to build an // intermediate vector with the one element from Y and the element from X @@ -6143,17 +6190,17 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { Mask1[2] = HiIndex & 1 ? 6 : 4; Mask1[3] = HiIndex & 1 ? 4 : 6; return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); - } else { - Mask1[0] = HiIndex & 1 ? 2 : 0; - Mask1[1] = HiIndex & 1 ? 0 : 2; - Mask1[2] = PermMask[2]; - Mask1[3] = PermMask[3]; - if (Mask1[2] >= 0) - Mask1[2] += 4; - if (Mask1[3] >= 0) - Mask1[3] += 4; - return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } + + Mask1[0] = HiIndex & 1 ? 2 : 0; + Mask1[1] = HiIndex & 1 ? 0 : 2; + Mask1[2] = PermMask[2]; + Mask1[3] = PermMask[3]; + if (Mask1[2] >= 0) + Mask1[2] += 4; + if (Mask1[3] >= 0) + Mask1[3] += 4; + return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); } // Break it into (shuffle shuffle_hi, shuffle_lo). @@ -6302,7 +6349,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - // If we don't care about the second element, procede to use movss. + // If we don't care about the second element, proceed to use movss. if (SVOp->getMaskElt(1) != -1) return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } @@ -6360,7 +6407,8 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8) { + if (VT == MVT::v8i16 || VT == MVT::v16i8 || + VT == MVT::v16i16 || VT == MVT::v32i8) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); @@ -6564,11 +6612,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // new vector_shuffle with the corrected mask.p SmallVector NewMask(M.begin(), M.end()); NormalizeMask(NewMask, NumElems); - if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) { + if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); - } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) { + if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); - } } if (Commuted) { @@ -6605,12 +6652,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); } - if (isPSHUFHWMask(M, VT)) + if (isPSHUFHWMask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1, getShufflePSHUFHWImmediate(SVOp), DAG); - if (isPSHUFLWMask(M, VT)) + if (isPSHUFLWMask(M, VT, HasAVX2)) return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1, getShufflePSHUFLWImmediate(SVOp), DAG); @@ -6647,7 +6694,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG); if (BlendOp.getNode()) return BlendOp; @@ -6689,7 +6736,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. - if (NumElems == 4 && VT.getSizeInBits() == 128) + if (NumElems == 4 && VT.is128BitVector()) return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG); // Handle general 256-bit shuffles @@ -6705,7 +6752,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getSizeInBits() != 128) + if (!Op.getOperand(0).getValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -6714,7 +6761,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); - } else if (VT.getSizeInBits() == 16) { + } + + if (VT.getSizeInBits() == 16) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); // If Idx is 0, it's cheaper to do a move instead of a pextrw. if (Idx == 0) @@ -6729,7 +6778,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); - } else if (VT == MVT::f32) { + } + + if (VT == MVT::f32) { // EXTRACTPS outputs to a GPR32 register which will require a movd to copy // the result back to FR32 register. It's only worth matching if the // result has a single use which is a store or a bitcast to i32. And in @@ -6749,7 +6800,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, Op.getOperand(0)), Op.getOperand(1)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); - } else if (VT == MVT::i32 || VT == MVT::i64) { + } + + if (VT == MVT::i32 || VT == MVT::i64) { // ExtractPS/pextrq works with constant index. if (isa(Op.getOperand(1))) return Op; @@ -6769,22 +6822,22 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. - if (VecVT.getSizeInBits() == 256) { + if (VecVT.is256BitVector()) { DebugLoc dl = Op.getNode()->getDebugLoc(); unsigned NumElems = VecVT.getVectorNumElements(); SDValue Idx = Op.getOperand(1); unsigned IdxVal = cast(Idx)->getZExtValue(); // Get the 128-bit vector. - bool Upper = IdxVal >= NumElems/2; - Vec = Extract128BitVector(Vec, - DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32), DAG, dl); + Vec = Extract128BitVector(Vec, IdxVal, DAG, dl); + if (IdxVal >= NumElems/2) + IdxVal -= NumElems/2; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, - Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : Idx); + DAG.getConstant(IdxVal, MVT::i32)); } - assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); + assert(VecVT.is128BitVector() && "Unexpected vector length"); if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); @@ -6811,7 +6864,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); - } else if (VT.getSizeInBits() == 32) { + } + + if (VT.getSizeInBits() == 32) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; @@ -6823,7 +6878,9 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); - } else if (VT.getSizeInBits() == 64) { + } + + if (VT.getSizeInBits() == 64) { // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught // to match extract_elt for f64. @@ -6856,7 +6913,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return SDValue(); if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && @@ -6876,7 +6933,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); return DAG.getNode(Opc, dl, VT, N0, N1, N2); - } else if (EltVT == MVT::f32 && isa(N2)) { + } + + if (EltVT == MVT::f32 && isa(N2)) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into these // bits. For example (insert (extract, 3), 2) could be matched by putting @@ -6889,8 +6948,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) && - isa(N2)) { + } + + if ((EltVT == MVT::i32 || EltVT == MVT::i64) && isa(N2)) { // PINSR* works with constant index. return Op; } @@ -6909,23 +6969,22 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first extract the 128-bit vector, // insert the element into the extracted half and then place it back. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { if (!isa(N2)) return SDValue(); // Get the desired 128-bit vector half. unsigned NumElems = VT.getVectorNumElements(); unsigned IdxVal = cast(N2)->getZExtValue(); - bool Upper = IdxVal >= NumElems/2; - SDValue Ins128Idx = DAG.getConstant(Upper ? NumElems/2 : 0, MVT::i32); - SDValue V = Extract128BitVector(N0, Ins128Idx, DAG, dl); + SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl); // Insert the element into the desired half. - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, - N1, Upper ? DAG.getConstant(IdxVal-NumElems/2, MVT::i32) : N2); + bool Upper = IdxVal >= NumElems/2; + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, + DAG.getConstant(Upper ? IdxVal-NumElems/2 : IdxVal, MVT::i32)); // Insert the changed part back to the 256-bit vector - return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); + return Insert128BitVector(N0, V, IdxVal, DAG, dl); } if (Subtarget->hasSSE41()) @@ -6954,7 +7013,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. - if (OpVT.getSizeInBits() > 128) { + if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. EVT VT128 = EVT::getVectorVT(*Context, OpVT.getVectorElementType(), @@ -6963,19 +7022,16 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); // Insert the 128-bit vector. - return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op, - DAG.getConstant(0, MVT::i32), - DAG, dl); + return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); } - if (Op.getValueType() == MVT::v1i64 && + if (OpVT == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 && - "Expected an SSE type!"); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), + assert(OpVT.is128BitVector() && "Expected an SSE type!"); + return DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } @@ -6989,9 +7045,11 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getNode()->getOperand(0); SDValue Idx = Op.getNode()->getOperand(1); - if (Op.getNode()->getValueType(0).getSizeInBits() == 128 - && Vec.getNode()->getValueType(0).getSizeInBits() == 256) { - return Extract128BitVector(Vec, Idx, DAG, dl); + if (Op.getNode()->getValueType(0).is128BitVector() && + Vec.getNode()->getValueType(0).is256BitVector() && + isa(Idx)) { + unsigned IdxVal = cast(Idx)->getZExtValue(); + return Extract128BitVector(Vec, IdxVal, DAG, dl); } } return SDValue(); @@ -7008,9 +7066,11 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue SubVec = Op.getNode()->getOperand(1); SDValue Idx = Op.getNode()->getOperand(2); - if (Op.getNode()->getValueType(0).getSizeInBits() == 256 - && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) { - return Insert128BitVector(Vec, SubVec, Idx, DAG, dl); + if (Op.getNode()->getValueType(0).is256BitVector() && + SubVec.getNode()->getValueType(0).is128BitVector() && + isa(Idx)) { + unsigned IdxVal = cast(Idx)->getZExtValue(); + return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); } } return SDValue(); @@ -7219,7 +7279,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags) { + unsigned char OperandFlags, bool LocalDynamic = false) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); DebugLoc dl = GA->getDebugLoc(); @@ -7227,12 +7287,16 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, GA->getValueType(0), GA->getOffset(), OperandFlags); + + X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; + if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -7264,47 +7328,96 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, X86::RAX, X86II::MO_TLSGD); } -// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or -// "local exec" model. -static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const EVT PtrVT, TLSModel::Model model, - bool is64Bit) { +static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, + SelectionDAG &DAG, + const EVT PtrVT, + bool is64Bit) { DebugLoc dl = GA->getDebugLoc(); - // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). - Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), - is64Bit ? 257 : 256)); - - SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - DAG.getIntPtrConstant(0), - MachinePointerInfo(Ptr), - false, false, false, 0); + // Get the start address of the TLS block for this module. + X86MachineFunctionInfo* MFI = DAG.getMachineFunction() + .getInfo(); + MFI->incNumLocalDynamicTLSAccesses(); - unsigned char OperandFlags = 0; + SDValue Base; + if (is64Bit) { + Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, + X86II::MO_TLSLD, /*LocalDynamic=*/true); + } else { + SDValue InFlag; + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag); + InFlag = Chain.getValue(1); + Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, + X86II::MO_TLSLDM, /*LocalDynamic=*/true); + } + + // Note: the CleanupLocalDynamicTLSPass will remove redundant computations + // of Base. + + // Build x@dtpoff. + unsigned char OperandFlags = X86II::MO_DTPOFF; + unsigned WrapperKind = X86ISD::Wrapper; + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), + GA->getOffset(), OperandFlags); + SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); + + // Add x@dtpoff with the base. + return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); +} + +// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model. +static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT, TLSModel::Model model, + bool is64Bit, bool isPIC) { + DebugLoc dl = GA->getDebugLoc(); + + // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit). + Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), + is64Bit ? 257 : 256)); + + SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getIntPtrConstant(0), + MachinePointerInfo(Ptr), + false, false, false, 0); + + unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is // initialexec. unsigned WrapperKind = X86ISD::Wrapper; if (model == TLSModel::LocalExec) { OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; - } else if (is64Bit) { - assert(model == TLSModel::InitialExec); - OperandFlags = X86II::MO_GOTTPOFF; - WrapperKind = X86ISD::WrapperRIP; + } else if (model == TLSModel::InitialExec) { + if (is64Bit) { + OperandFlags = X86II::MO_GOTTPOFF; + WrapperKind = X86ISD::WrapperRIP; + } else { + OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; + } } else { - assert(model == TLSModel::InitialExec); - OperandFlags = X86II::MO_INDNTPOFF; + llvm_unreachable("Unexpected model"); } - // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial - // exec) + // emit "addl x@ntpoff,%eax" (local exec) + // or "addl x@indntpoff,%eax" (initial exec) + // or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); - if (model == TLSModel::InitialExec) + if (model == TLSModel::InitialExec) { + if (isPIC && !is64Bit) { + Offset = DAG.getNode(ISD::ADD, dl, PtrVT, + DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), + Offset); + } + Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - MachinePointerInfo::getGOT(), false, false, false, 0); + MachinePointerInfo::getGOT(), false, false, false, + 0); + } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -7318,29 +7431,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - // TODO: implement the "local dynamic" model - // TODO: implement the "initial exec"model for pic executables - - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::LocalDynamic: + return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(), + Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, - Subtarget->is64Bit()); + Subtarget->is64Bit(), + getTargetMachine().getRelocationModel() == Reloc::PIC_); } - } else if (Subtarget->isTargetDarwin()) { + llvm_unreachable("Unknown TLS model."); + } + + if (Subtarget->isTargetDarwin()) { // Darwin only has one model of TLS. Lower to that. unsigned char OpFlag = 0; unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? @@ -7383,7 +7493,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), Chain.getValue(1)); - } else if (Subtarget->isTargetWindows()) { + } + + if (Subtarget->isTargetWindows()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7429,7 +7541,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { false, false, false, 0); SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), - getPointerTy()); + getPointerTy()); IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX); @@ -7600,9 +7712,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } #ifdef __SSE3__ - haddpd %xmm0, %xmm0 + haddpd %xmm0, %xmm0 #else - pshufd $0x4e, %xmm0, %xmm1 + pshufd $0x4e, %xmm0, %xmm1 addpd %xmm1, %xmm0 #endif */ @@ -7693,12 +7805,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, // Handle final rounding. EVT DestVT = Op.getValueType(); - if (DestVT.bitsLT(MVT::f64)) { + if (DestVT.bitsLT(MVT::f64)) return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, DAG.getIntPtrConstant(0)); - } else if (DestVT.bitsGT(MVT::f64)) { + if (DestVT.bitsGT(MVT::f64)) return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - } // Handle final rounding. return Sub; @@ -7719,10 +7830,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, EVT DstVT = Op.getValueType(); if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); - else if (SrcVT == MVT::i32 && X86ScalarSSEf64) + if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - else if (Subtarget->is64Bit() && - SrcVT == MVT::i64 && DstVT == MVT::f32) + if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); // Make a 64-bit buffer, and use it to build an FILD. @@ -7899,9 +8009,9 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), FIST, StackSlot, MachinePointerInfo(), false, false, false, 0); - else - // The node is the result. - return FIST; + + // The node is the result. + return FIST; } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, @@ -7916,9 +8026,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), FIST, StackSlot, MachinePointerInfo(), false, false, false, 0); - else - // The node is the result. - return FIST; + + // The node is the result. + return FIST; } SDValue X86TargetLowering::LowerFABS(SDValue Op, @@ -7931,7 +8041,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EltVT = VT.getVectorElementType(); Constant *C; if (EltVT == MVT::f64) { - C = ConstantVector::getSplat(2, + C = ConstantVector::getSplat(2, ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); } else { C = ConstantVector::getSplat(4, @@ -7965,15 +8075,15 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, 16); if (VT.isVector()) { - MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64; + MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, XORVT, - DAG.getNode(ISD::BITCAST, dl, XORVT, - Op.getOperand(0)), - DAG.getNode(ISD::BITCAST, dl, XORVT, Mask))); - } else { - return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); + DAG.getNode(ISD::BITCAST, dl, XORVT, + Op.getOperand(0)), + DAG.getNode(ISD::BITCAST, dl, XORVT, Mask))); } + + return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { @@ -8172,7 +8282,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // Otherwise use a regular EFLAGS-setting instruction. switch (Op.getNode()->getOpcode()) { default: llvm_unreachable("unexpected operator!"); - case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::SUB: + Opcode = X86ISD::SUB; + break; case ISD::OR: Opcode = X86ISD::OR; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; @@ -8198,6 +8310,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); + if (Opcode == X86ISD::CMP) { + SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0), + Op.getOperand(1)); + // We can't replace usage of SUB with CMP. + // The SUB node will be removed later because there is no use of it. + return SDValue(New.getNode(), 0); + } + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector Ops; for (unsigned i = 0; i != NumOperands; ++i) @@ -8217,9 +8337,41 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return EmitTest(Op0, X86CC, DAG); DebugLoc dl = Op0.getDebugLoc(); + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || + Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { + // Use SUB instead of CMP to enable CSE between SUB and CMP. + SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); + SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, + Op0, Op1); + return SDValue(Sub.getNode(), 1); + } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } +/// Convert a comparison if required by the subtarget. +SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, + SelectionDAG &DAG) const { + // If the subtarget does not support the FUCOMI instruction, floating-point + // comparisons have to be converted. + if (Subtarget->hasCMov() || + Cmp.getOpcode() != X86ISD::CMP || + !Cmp.getOperand(0).getValueType().isFloatingPoint() || + !Cmp.getOperand(1).getValueType().isFloatingPoint()) + return Cmp; + + // The instruction selector will select an FUCOM instruction instead of + // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence + // build an SDNode sequence that transfers the result from FPSW into EFLAGS: + // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8)))) + DebugLoc dl = Cmp.getDebugLoc(); + SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); + SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); + SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, + DAG.getConstant(8, MVT::i8)); + SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); + return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); +} + /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, @@ -8341,6 +8493,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SDValue(); SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), EFLAGS); } @@ -8350,24 +8503,22 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC && + assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); DebugLoc dl = Op.getDebugLoc(); SDValue CC = Op.getOperand(2); - SDValue Idx0 = DAG.getConstant(0, MVT::i32); - SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); - SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); - SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); - SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); - SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); // Issue the operation on the smaller types and concatenate the result back MVT EltVT = VT.getVectorElementType().getSimpleVT(); @@ -8389,10 +8540,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (isFP) { - unsigned SSECC = 8; +#ifndef NDEBUG EVT EltVT = Op0.getValueType().getVectorElementType(); - assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT; + assert(EltVT == MVT::f32 || EltVT == MVT::f64); +#endif + unsigned SSECC; bool Swap = false; // SSE Condition code mapping: @@ -8405,7 +8558,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // 6 - NLE // 7 - ORD switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: @@ -8419,33 +8572,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUO: SSECC = 3; break; case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; // Fallthrough case ISD::SETUGE: SSECC = 5; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; // Fallthrough case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; + case ISD::SETUEQ: + case ISD::SETONE: SSECC = 8; break; } if (Swap) std::swap(Op0, Op1); // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { + unsigned CC0, CC1; + unsigned CombineOpc; if (SetCCOpcode == ISD::SETUEQ) { - SDValue UNORD, EQ; - UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(3, MVT::i8)); - EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(0, MVT::i8)); - return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); - } else if (SetCCOpcode == ISD::SETONE) { - SDValue ORD, NEQ; - ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(7, MVT::i8)); - NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(4, MVT::i8)); - return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); + CC0 = 3; CC1 = 0; CombineOpc = ISD::OR; + } else { + assert(SetCCOpcode == ISD::SETONE); + CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; } - llvm_unreachable("Illegal FP comparison"); + + SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC0, MVT::i8)); + SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC1, MVT::i8)); + return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, @@ -8453,17 +8606,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { } // Break 256-bit integer vector compare into smaller ones. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntVSETCC(Op, DAG); // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc = 0; + unsigned Opc; bool Swap = false, Invert = false, FlipSigns = false; switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; @@ -8480,10 +8633,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42()) - return SDValue(); - if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41()) - return SDValue(); + if (VT == MVT::v2i64) { + if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) + return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) + return SDValue(); + } // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. @@ -8510,7 +8665,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); - if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) + if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || + Opc == X86ISD::SAHF) return true; if (Op.getResNo() == 1 && (Opc == X86ISD::ADD || @@ -8542,6 +8698,16 @@ static bool isAllOnes(SDValue V) { return C && C->isAllOnesValue(); } +static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { + if (V.getOpcode() != ISD::TRUNCATE) + return false; + + SDValue VOp0 = V.getOperand(0); + unsigned InBits = VOp0.getValueSizeInBits(); + unsigned Bits = V.getValueSizeInBits(); + return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); +} + SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); @@ -8572,8 +8738,25 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Y = isAllOnes(Op2) ? Op1 : Op2; SDValue CmpOp0 = Cmp.getOperand(0); + // Apply further optimizations for special cases + // (select (x != 0), -1, 0) -> neg & sbb + // (select (x == 0), 0, -1) -> neg & sbb + if (ConstantSDNode *YC = dyn_cast(Y)) + if (YC->isNullValue() && + (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { + SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, + DAG.getConstant(0, CmpOp0.getValueType()), + CmpOp0); + SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), + SDValue(Neg.getNode(), 1)); + return Res; + } + Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDValue Res = // Res = 0 or -1. DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), @@ -8654,9 +8837,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -8679,7 +8862,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a < b ? 0 : -1 -> RES = setcc_carry // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry - if (Cond.getOpcode() == X86ISD::CMP) { + if (Cond.getOpcode() == X86ISD::SUB) { + Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && @@ -8918,6 +9102,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); + Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); @@ -8947,6 +9132,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); + Cmp = ConvertCmpIfNecessary(Cmp, DAG); CC = DAG.getConstant(X86::COND_NE, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); @@ -8960,9 +9146,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -8980,6 +9166,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); } + Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cond); } @@ -9018,7 +9205,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, const Function *F = MF.getFunction(); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; I++) + I != E; ++I) if (I->hasNestAttr()) report_fatal_error("Cannot use segmented stacks with functions that " "have nested arguments."); @@ -9201,12 +9388,15 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); if (isa(ShAmt)) { + // Constant may be a TargetConstant. Use a regular constant. + uint32_t ShiftAmt = cast(ShAmt)->getZExtValue(); switch (Opc) { default: llvm_unreachable("Unknown target vector shift node"); case X86ISD::VSHLI: case X86ISD::VSRLI: case X86ISD::VSRAI: - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); + return DAG.getNode(Opc, dl, VT, SrcOp, + DAG.getConstant(ShiftAmt, MVT::i32)); } } @@ -9223,10 +9413,15 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue ShOps[4]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); + ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); + + // The return type has to be a 128-bit type with the same element + // type as the input type. + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); + + ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } @@ -9261,8 +9456,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_sse2_ucomigt_sd: case Intrinsic::x86_sse2_ucomige_sd: case Intrinsic::x86_sse2_ucomineq_sd: { - unsigned Opc = 0; - ISD::CondCode CC = ISD::SETCC_INVALID; + unsigned Opc; + ISD::CondCode CC; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_comieq_ss: @@ -9336,245 +9531,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // XOP comparison intrinsics - case Intrinsic::x86_xop_vpcomltb: - case Intrinsic::x86_xop_vpcomltw: - case Intrinsic::x86_xop_vpcomltd: - case Intrinsic::x86_xop_vpcomltq: - case Intrinsic::x86_xop_vpcomltub: - case Intrinsic::x86_xop_vpcomltuw: - case Intrinsic::x86_xop_vpcomltud: - case Intrinsic::x86_xop_vpcomltuq: - case Intrinsic::x86_xop_vpcomleb: - case Intrinsic::x86_xop_vpcomlew: - case Intrinsic::x86_xop_vpcomled: - case Intrinsic::x86_xop_vpcomleq: - case Intrinsic::x86_xop_vpcomleub: - case Intrinsic::x86_xop_vpcomleuw: - case Intrinsic::x86_xop_vpcomleud: - case Intrinsic::x86_xop_vpcomleuq: - case Intrinsic::x86_xop_vpcomgtb: - case Intrinsic::x86_xop_vpcomgtw: - case Intrinsic::x86_xop_vpcomgtd: - case Intrinsic::x86_xop_vpcomgtq: - case Intrinsic::x86_xop_vpcomgtub: - case Intrinsic::x86_xop_vpcomgtuw: - case Intrinsic::x86_xop_vpcomgtud: - case Intrinsic::x86_xop_vpcomgtuq: - case Intrinsic::x86_xop_vpcomgeb: - case Intrinsic::x86_xop_vpcomgew: - case Intrinsic::x86_xop_vpcomged: - case Intrinsic::x86_xop_vpcomgeq: - case Intrinsic::x86_xop_vpcomgeub: - case Intrinsic::x86_xop_vpcomgeuw: - case Intrinsic::x86_xop_vpcomgeud: - case Intrinsic::x86_xop_vpcomgeuq: - case Intrinsic::x86_xop_vpcomeqb: - case Intrinsic::x86_xop_vpcomeqw: - case Intrinsic::x86_xop_vpcomeqd: - case Intrinsic::x86_xop_vpcomeqq: - case Intrinsic::x86_xop_vpcomequb: - case Intrinsic::x86_xop_vpcomequw: - case Intrinsic::x86_xop_vpcomequd: - case Intrinsic::x86_xop_vpcomequq: - case Intrinsic::x86_xop_vpcomneb: - case Intrinsic::x86_xop_vpcomnew: - case Intrinsic::x86_xop_vpcomned: - case Intrinsic::x86_xop_vpcomneq: - case Intrinsic::x86_xop_vpcomneub: - case Intrinsic::x86_xop_vpcomneuw: - case Intrinsic::x86_xop_vpcomneud: - case Intrinsic::x86_xop_vpcomneuq: - case Intrinsic::x86_xop_vpcomfalseb: - case Intrinsic::x86_xop_vpcomfalsew: - case Intrinsic::x86_xop_vpcomfalsed: - case Intrinsic::x86_xop_vpcomfalseq: - case Intrinsic::x86_xop_vpcomfalseub: - case Intrinsic::x86_xop_vpcomfalseuw: - case Intrinsic::x86_xop_vpcomfalseud: - case Intrinsic::x86_xop_vpcomfalseuq: - case Intrinsic::x86_xop_vpcomtrueb: - case Intrinsic::x86_xop_vpcomtruew: - case Intrinsic::x86_xop_vpcomtrued: - case Intrinsic::x86_xop_vpcomtrueq: - case Intrinsic::x86_xop_vpcomtrueub: - case Intrinsic::x86_xop_vpcomtrueuw: - case Intrinsic::x86_xop_vpcomtrueud: - case Intrinsic::x86_xop_vpcomtrueuq: { - unsigned CC = 0; - unsigned Opc = 0; - - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_xop_vpcomltb: - case Intrinsic::x86_xop_vpcomltw: - case Intrinsic::x86_xop_vpcomltd: - case Intrinsic::x86_xop_vpcomltq: - CC = 0; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomltub: - case Intrinsic::x86_xop_vpcomltuw: - case Intrinsic::x86_xop_vpcomltud: - case Intrinsic::x86_xop_vpcomltuq: - CC = 0; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomleb: - case Intrinsic::x86_xop_vpcomlew: - case Intrinsic::x86_xop_vpcomled: - case Intrinsic::x86_xop_vpcomleq: - CC = 1; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomleub: - case Intrinsic::x86_xop_vpcomleuw: - case Intrinsic::x86_xop_vpcomleud: - case Intrinsic::x86_xop_vpcomleuq: - CC = 1; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomgtb: - case Intrinsic::x86_xop_vpcomgtw: - case Intrinsic::x86_xop_vpcomgtd: - case Intrinsic::x86_xop_vpcomgtq: - CC = 2; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomgtub: - case Intrinsic::x86_xop_vpcomgtuw: - case Intrinsic::x86_xop_vpcomgtud: - case Intrinsic::x86_xop_vpcomgtuq: - CC = 2; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomgeb: - case Intrinsic::x86_xop_vpcomgew: - case Intrinsic::x86_xop_vpcomged: - case Intrinsic::x86_xop_vpcomgeq: - CC = 3; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomgeub: - case Intrinsic::x86_xop_vpcomgeuw: - case Intrinsic::x86_xop_vpcomgeud: - case Intrinsic::x86_xop_vpcomgeuq: - CC = 3; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomeqb: - case Intrinsic::x86_xop_vpcomeqw: - case Intrinsic::x86_xop_vpcomeqd: - case Intrinsic::x86_xop_vpcomeqq: - CC = 4; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomequb: - case Intrinsic::x86_xop_vpcomequw: - case Intrinsic::x86_xop_vpcomequd: - case Intrinsic::x86_xop_vpcomequq: - CC = 4; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomneb: - case Intrinsic::x86_xop_vpcomnew: - case Intrinsic::x86_xop_vpcomned: - case Intrinsic::x86_xop_vpcomneq: - CC = 5; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomneub: - case Intrinsic::x86_xop_vpcomneuw: - case Intrinsic::x86_xop_vpcomneud: - case Intrinsic::x86_xop_vpcomneuq: - CC = 5; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomfalseb: - case Intrinsic::x86_xop_vpcomfalsew: - case Intrinsic::x86_xop_vpcomfalsed: - case Intrinsic::x86_xop_vpcomfalseq: - CC = 6; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomfalseub: - case Intrinsic::x86_xop_vpcomfalseuw: - case Intrinsic::x86_xop_vpcomfalseud: - case Intrinsic::x86_xop_vpcomfalseuq: - CC = 6; - Opc = X86ISD::VPCOMU; - break; - case Intrinsic::x86_xop_vpcomtrueb: - case Intrinsic::x86_xop_vpcomtruew: - case Intrinsic::x86_xop_vpcomtrued: - case Intrinsic::x86_xop_vpcomtrueq: - CC = 7; - Opc = X86ISD::VPCOM; - break; - case Intrinsic::x86_xop_vpcomtrueub: - case Intrinsic::x86_xop_vpcomtrueuw: - case Intrinsic::x86_xop_vpcomtrueud: - case Intrinsic::x86_xop_vpcomtrueuq: - CC = 7; - Opc = X86ISD::VPCOMU; - break; - } - - SDValue LHS = Op.getOperand(1); - SDValue RHS = Op.getOperand(2); - return DAG.getNode(Opc, dl, Op.getValueType(), LHS, RHS, - DAG.getConstant(CC, MVT::i8)); - } // Arithmetic intrinsics. case Intrinsic::x86_sse2_pmulu_dq: case Intrinsic::x86_avx2_pmulu_dq: return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + + // SSE3/AVX horizontal add/sub intrinsics case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: case Intrinsic::x86_avx_hadd_ps_256: case Intrinsic::x86_avx_hadd_pd_256: - return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse3_hsub_ps: case Intrinsic::x86_sse3_hsub_pd: case Intrinsic::x86_avx_hsub_ps_256: case Intrinsic::x86_avx_hsub_pd_256: - return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phadd_w_128: case Intrinsic::x86_ssse3_phadd_d_128: case Intrinsic::x86_avx2_phadd_w: case Intrinsic::x86_avx2_phadd_d: - return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phsub_w_128: case Intrinsic::x86_ssse3_phsub_d_128: case Intrinsic::x86_avx2_phsub_w: - case Intrinsic::x86_avx2_phsub_d: - return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + case Intrinsic::x86_avx2_phsub_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: + case Intrinsic::x86_avx_hadd_ps_256: + case Intrinsic::x86_avx_hadd_pd_256: + Opcode = X86ISD::FHADD; + break; + case Intrinsic::x86_sse3_hsub_ps: + case Intrinsic::x86_sse3_hsub_pd: + case Intrinsic::x86_avx_hsub_ps_256: + case Intrinsic::x86_avx_hsub_pd_256: + Opcode = X86ISD::FHSUB; + break; + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + Opcode = X86ISD::HADD; + break; + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + Opcode = X86ISD::HSUB; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: case Intrinsic::x86_avx2_psllv_q_256: - return DAG.getNode(ISD::SHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrlv_d: case Intrinsic::x86_avx2_psrlv_q: case Intrinsic::x86_avx2_psrlv_d_256: case Intrinsic::x86_avx2_psrlv_q_256: - return DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrav_d: - case Intrinsic::x86_avx2_psrav_d_256: - return DAG.getNode(ISD::SRA, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrav_d_256: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q_256: + Opcode = ISD::SHL; + break; + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q_256: + Opcode = ISD::SRL; + break; + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + Opcode = ISD::SRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: case Intrinsic::x86_ssse3_psign_w_128: case Intrinsic::x86_ssse3_psign_d_128: @@ -9583,15 +9635,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psign_d: return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_insertps: return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_pd_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: // Operands intentionally swapped. Mask is last operand to intrinsic, @@ -9621,7 +9676,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256: { bool IsTestPacked = false; - unsigned X86CC = 0; + unsigned X86CC; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx_vtestz_ps: @@ -9672,44 +9727,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psll_w: case Intrinsic::x86_avx2_psll_d: case Intrinsic::x86_avx2_psll_q: - return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psrl_w: case Intrinsic::x86_sse2_psrl_d: case Intrinsic::x86_sse2_psrl_q: case Intrinsic::x86_avx2_psrl_w: case Intrinsic::x86_avx2_psrl_d: case Intrinsic::x86_avx2_psrl_q: - return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psra_w: case Intrinsic::x86_sse2_psra_d: case Intrinsic::x86_avx2_psra_w: - case Intrinsic::x86_avx2_psra_d: - return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psra_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + Opcode = X86ISD::VSHL; + break; + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + Opcode = X86ISD::VSRL; + break; + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + Opcode = X86ISD::VSRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // SSE/AVX immediate shift intrinsics case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: case Intrinsic::x86_avx2_pslli_w: case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: - return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: case Intrinsic::x86_avx2_psrli_w: case Intrinsic::x86_avx2_psrli_d: case Intrinsic::x86_avx2_psrli_q: - return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psrai_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + Opcode = X86ISD::VSHLI; + break; + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + Opcode = X86ISD::VSRLI; + break; + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + Opcode = X86ISD::VSRAI; + break; + } + return getTargetVShiftNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), DAG); + } + // Fix vector shift instructions where the last operand is a non-immediate // i32 value. case Intrinsic::x86_mmx_pslli_w: @@ -9724,8 +9828,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const if (isa(ShAmt)) return SDValue(); - unsigned NewIntNo = 0; + unsigned NewIntNo; switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_mmx_pslli_w: NewIntNo = Intrinsic::x86_mmx_psll_w; break; @@ -9750,7 +9855,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but @@ -9766,6 +9870,116 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); } + case Intrinsic::x86_sse42_pcmpistria128: + case Intrinsic::x86_sse42_pcmpestria128: + case Intrinsic::x86_sse42_pcmpistric128: + case Intrinsic::x86_sse42_pcmpestric128: + case Intrinsic::x86_sse42_pcmpistrio128: + case Intrinsic::x86_sse42_pcmpestrio128: + case Intrinsic::x86_sse42_pcmpistris128: + case Intrinsic::x86_sse42_pcmpestris128: + case Intrinsic::x86_sse42_pcmpistriz128: + case Intrinsic::x86_sse42_pcmpestriz128: { + unsigned Opcode; + unsigned X86CC; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse42_pcmpistria128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpestria128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpistric128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpestric128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpistrio128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpestrio128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpistris128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpestris128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpistriz128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse42_pcmpestriz128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_E; + break; + } + SmallVector NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), + SDValue(PCMP.getNode(), 1)); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } + + case Intrinsic::x86_sse42_pcmpistri128: + case Intrinsic::x86_sse42_pcmpestri128: { + unsigned Opcode; + if (IntNo == Intrinsic::x86_sse42_pcmpistri128) + Opcode = X86ISD::PCMPISTRI; + else + Opcode = X86ISD::PCMPESTRI; + + SmallVector NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + } + } +} + +SDValue +X86TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + default: return SDValue(); // Don't custom lower most intrinsics. + + // RDRAND intrinsics. + case Intrinsic::x86_rdrand_16: + case Intrinsic::x86_rdrand_32: + case Intrinsic::x86_rdrand_64: { + // Emit the node with the right value type. + SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); + SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0)); + + // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise + // return the value from Rand, which is always 0, casted to i32. + SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), + DAG.getConstant(1, Op->getValueType(1)), + DAG.getConstant(X86::COND_B, MVT::i32), + SDValue(Result.getNode(), 1) }; + SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, + DAG.getVTList(Op->getValueType(1), MVT::Glue), + Ops, 4); + + // Return { result, isValid, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, + SDValue(Result.getNode(), 2)); + } } } @@ -9816,7 +10030,6 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); @@ -9833,7 +10046,6 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - MF.getRegInfo().addLiveOut(StoreAddrReg); return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, @@ -10149,23 +10361,21 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && VT.isInteger() && + assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); DebugLoc dl = Op.getDebugLoc(); - SDValue Idx0 = DAG.getConstant(0, MVT::i32); - SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); - SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); - SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); // Extract the RHS vectors SDValue RHS = Op.getOperand(1); - SDValue RHS1 = Extract128BitVector(RHS, Idx0, DAG, dl); - SDValue RHS2 = Extract128BitVector(RHS, Idx1, DAG, dl); + SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl); + SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); @@ -10176,14 +10386,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); @@ -10193,7 +10403,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntArith(Op, DAG); assert((VT == MVT::v2i64 || VT == MVT::v4i64) && @@ -10310,6 +10520,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } + llvm_unreachable("Unknown shift opcode."); } if (Subtarget->hasAVX2() && VT == MVT::v32i8) { @@ -10353,6 +10564,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } + llvm_unreachable("Unknown shift opcode."); } } } @@ -10421,15 +10633,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Decompose 256-bit shifts into smaller 128-bit shifts. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); // Extract the two vectors - SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); - SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + SDValue V1 = Extract128BitVector(R, 0, DAG, dl); + SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl); // Recreate the shift amount vectors SDValue Amt1, Amt2; @@ -10448,9 +10659,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { &Amt2Csts[0], NumElems/2); } else { // Variable shift amount - Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); - Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); + Amt1 = Extract128BitVector(Amt, 0, DAG, dl); + Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl); } // Issue new vector shifts for the smaller types @@ -10560,20 +10770,18 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, return SDValue(); if (!Subtarget->hasAVX2()) { // needs to be split - int NumElems = VT.getVectorNumElements(); - SDValue Idx0 = DAG.getConstant(0, MVT::i32); - SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + unsigned NumElems = VT.getVectorNumElements(); // Extract the LHS vectors SDValue LHS = Op.getOperand(0); - SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); - SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); EVT ExtraEltVT = ExtraVT.getVectorElementType(); - int ExtraNumElems = ExtraVT.getVectorNumElements(); + unsigned ExtraNumElems = ExtraVT.getVectorNumElements(); ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT, ExtraNumElems/2); SDValue Extra = DAG.getValueType(ExtraVT); @@ -10859,6 +11067,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::FRAME_TO_ARGS_OFFSET: @@ -11013,7 +11222,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RBX : X86::EBX, swapInL, cpInH.getValue(1)); swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, - Regs64bit ? X86::RCX : X86::ECX, + Regs64bit ? X86::RCX : X86::ECX, swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), @@ -11118,10 +11327,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; + case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; @@ -11190,6 +11401,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; + case X86ISD::SAHF: return "X86ISD::SAHF"; + case X86ISD::RDRAND: return "X86ISD::RDRAND"; + case X86ISD::FMADD: return "X86ISD::FMADD"; + case X86ISD::FMSUB: return "X86ISD::FMSUB"; + case X86ISD::FNMADD: return "X86ISD::FNMADD"; + case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; + case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; + case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; } } @@ -11258,6 +11477,15 @@ bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return true; } +bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return Imm == (int32_t)Imm; +} + +bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { + // Can also use sub to handle negated immediates. + return Imm == (int32_t)Imm; +} + bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; @@ -11300,8 +11528,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isMOVLMask(M, VT) || isSHUFPMask(M, VT, Subtarget->hasAVX()) || isPSHUFDMask(M, VT) || - isPSHUFHWMask(M, VT) || - isPSHUFLWMask(M, VT) || + isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) || + isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) || isPALIGNRMask(M, VT, Subtarget) || isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || @@ -11316,7 +11544,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, // FIXME: This collection of masks seems suspect. if (NumElts == 2) return true; - if (NumElts == 4 && VT.getSizeInBits() == 128) { + if (NumElts == 4 && VT.is128BitVector()) { return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || isSHUFPMask(Mask, VT, Subtarget->hasAVX()) || @@ -11460,7 +11688,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // result in out1, out2 // fallthrough -->nextMBB - const TargetRegisterClass *RC = X86::GR32RegisterClass; + const TargetRegisterClass *RC = &X86::GR32RegClass; const unsigned LoadOpc = X86::MOV32rm; const unsigned NotOpc = X86::NOT32r; const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -11662,7 +11890,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; - unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + unsigned t1 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); @@ -11672,7 +11900,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, argOpers[valArgIndx]->isImm()) && "invalid operand"); - unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + unsigned t2 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); if (argOpers[valArgIndx]->isReg()) MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2); else @@ -11687,7 +11915,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(t2); // Generate movc - unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + unsigned t3 = F->getRegInfo().createVirtualRegister(&X86::GR32RegClass); MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3); MIB.addReg(t2); MIB.addReg(t1); @@ -11742,8 +11970,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, MIB.addOperand(Op); } BuildMI(*BB, MI, dl, - TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr), - MI->getOperand(0).getReg()) + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); @@ -11776,24 +12003,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { } MachineBasicBlock * -X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { - DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - // First arg in ECX, the second in EAX. - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) - .addReg(MI->getOperand(0).getReg()); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) - .addReg(MI->getOperand(1).getReg()); - - // The instruction doesn't actually take any operands though. - BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); - - MI->eraseFromParent(); // The pseudo is gone now. - return BB; -} - -MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -12306,8 +12515,9 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) - .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI) + .addExternalSymbol("__morestack_allocate_stack_space") .addRegMask(RegMask) + .addReg(X86::RDI, RegState::Implicit) .addReg(X86::RAX, RegState::ImplicitDefine); } else { BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) @@ -12517,7 +12727,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Load the old value of the high byte of the control word... unsigned OldCW = - F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); + F->getRegInfo().createVirtualRegister(&X86::GR16RegClass); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); @@ -12596,8 +12806,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Thread synchronization. case X86::MONITOR: return EmitMonitor(MI, BB); - case X86::MWAIT: - return EmitMwait(MI, BB); // Atomic Lowering. case X86::ATOMAND32: @@ -12605,25 +12813,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::AND32ri, X86::MOV32rm, X86::LCMPXCHG32, X86::NOT32r, X86::EAX, - X86::GR32RegisterClass); + &X86::GR32RegClass); case X86::ATOMOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, X86::OR32ri, X86::MOV32rm, X86::LCMPXCHG32, X86::NOT32r, X86::EAX, - X86::GR32RegisterClass); + &X86::GR32RegClass); case X86::ATOMXOR32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, X86::XOR32ri, X86::MOV32rm, X86::LCMPXCHG32, X86::NOT32r, X86::EAX, - X86::GR32RegisterClass); + &X86::GR32RegClass); case X86::ATOMNAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, X86::LCMPXCHG32, X86::NOT32r, X86::EAX, - X86::GR32RegisterClass, true); + &X86::GR32RegClass, true); case X86::ATOMMIN32: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); case X86::ATOMMAX32: @@ -12638,25 +12846,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::AND16ri, X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - X86::GR16RegisterClass); + &X86::GR16RegClass); case X86::ATOMOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, X86::OR16ri, X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - X86::GR16RegisterClass); + &X86::GR16RegClass); case X86::ATOMXOR16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, X86::XOR16ri, X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - X86::GR16RegisterClass); + &X86::GR16RegClass); case X86::ATOMNAND16: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, X86::AND16ri, X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - X86::GR16RegisterClass, true); + &X86::GR16RegClass, true); case X86::ATOMMIN16: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); case X86::ATOMMAX16: @@ -12671,25 +12879,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::AND8ri, X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - X86::GR8RegisterClass); + &X86::GR8RegClass); case X86::ATOMOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, X86::OR8ri, X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - X86::GR8RegisterClass); + &X86::GR8RegClass); case X86::ATOMXOR8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, X86::XOR8ri, X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - X86::GR8RegisterClass); + &X86::GR8RegClass); case X86::ATOMNAND8: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, X86::AND8ri, X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - X86::GR8RegisterClass, true); + &X86::GR8RegClass, true); // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. // This group is for 64-bit host. case X86::ATOMAND64: @@ -12697,25 +12905,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::AND64ri32, X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - X86::GR64RegisterClass); + &X86::GR64RegClass); case X86::ATOMOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, X86::OR64ri32, X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - X86::GR64RegisterClass); + &X86::GR64RegClass); case X86::ATOMXOR64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, X86::XOR64ri32, X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - X86::GR64RegisterClass); + &X86::GR64RegClass); case X86::ATOMNAND64: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, X86::AND64ri32, X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - X86::GR64RegisterClass, true); + &X86::GR64RegClass, true); case X86::ATOMMIN64: return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); case X86::ATOMMAX64: @@ -12870,10 +13078,10 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, /// inserting the result into the low part of a new 256-bit vector static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> - for (int i = 0, j = NumElems/2; i < NumElems/2; ++i, ++j) + for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j) if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || SVOp->getMaskElt(j) >= 0) return false; @@ -12886,10 +13094,10 @@ static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) { /// inserting the result into the high part of a new 256-bit vector static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); // vector_shuffle or - for (int i = NumElems/2, j = 0; i < NumElems; ++i, ++j) + for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j) if (!isUndefOrEqual(SVOp->getMaskElt(i), j) || SVOp->getMaskElt(j) >= 0) return false; @@ -12906,7 +13114,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); + unsigned NumElems = VT.getVectorNumElements(); if (V1.getOpcode() == ISD::CONCAT_VECTORS && V2.getOpcode() == ISD::CONCAT_VECTORS) { @@ -12931,30 +13139,31 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, // To match the shuffle mask, the first half of the mask should // be exactly the first vector, and all the rest a splat with the // first element of the second one. - for (int i = 0; i < NumElems/2; ++i) + for (unsigned i = 0; i != NumElems/2; ++i) if (!isUndefOrEqual(SVOp->getMaskElt(i), i) || !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems)) return SDValue(); // If V1 is coming from a vector load then just fold to a VZEXT_LOAD. if (LoadSDNode *Ld = dyn_cast(V1.getOperand(0))) { - SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); - SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; - SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2, - Ld->getMemoryVT(), - Ld->getPointerInfo(), - Ld->getAlignment(), - false/*isVolatile*/, true/*ReadMem*/, - false/*WriteMem*/); - return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); - } + if (Ld->hasNUsesOfValue(1, 0)) { + SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); + SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; + SDValue ResNode = + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2, + Ld->getMemoryVT(), + Ld->getPointerInfo(), + Ld->getAlignment(), + false/*isVolatile*/, true/*ReadMem*/, + false/*WriteMem*/); + return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); + } + } // Emit a zeroed vector and insert the desired subvector on its // first half. SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); - SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), - DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl); return DCI.CombineTo(N, InsV); } @@ -12964,18 +13173,15 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u> if (isShuffleHigh128VectorInsertLow(SVOp)) { - SDValue V = Extract128BitVector(V1, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); - SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), - V, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl); return DCI.CombineTo(N, InsV); } // vector_shuffle or if (isShuffleLow128VectorInsertHigh(SVOp)) { - SDValue V = Extract128BitVector(V1, DAG.getConstant(0, MVT::i32), DAG, dl); - SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), - V, DAG.getConstant(NumElems/2, MVT::i32), DAG, dl); + SDValue V = Extract128BitVector(V1, 0, DAG, dl); + SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl); return DCI.CombineTo(N, InsV); } @@ -12995,12 +13201,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Combine 256-bit vector shuffles. This is only profitable when in AVX mode - if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && + if (Subtarget->hasAVX() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); // Only handle 128 wide vector from here on. - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return SDValue(); // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, @@ -13014,16 +13220,17 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, } -/// PerformTruncateCombine - Converts truncate operation to +/// DCI, PerformTruncateCombine - Converts truncate operation to /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector -SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, +SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const { if (!DCI.isBeforeLegalizeOps()) return SDValue(); - if (!Subtarget->hasAVX()) return SDValue(); + if (!Subtarget->hasAVX()) + return SDValue(); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); @@ -13032,55 +13239,102 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) { + if (Subtarget->hasAVX2()) { + // AVX2: v4i64 -> v4i32 + + // VPERMD + static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; + + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v8i32, Op); + Op = DAG.getVectorShuffle(MVT::v8i32, dl, Op, DAG.getUNDEF(MVT::v8i32), + ShufMask); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + DAG.getIntPtrConstant(0)); + } + + // AVX: v4i64 -> v4i32 SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, - DAG.getIntPtrConstant(2)); + DAG.getIntPtrConstant(2)); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); // PSHUFD - int ShufMask1[] = {0, 2, 0, 0}; + static const int ShufMask1[] = {0, 2, 0, 0}; - OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), - ShufMask1); - OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), - ShufMask1); + SDValue Undef = DAG.getUNDEF(VT); + OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); // MOVLHPS - int ShufMask2[] = {0, 1, 4, 5}; + static const int ShufMask2[] = {0, 1, 4, 5}; return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2); } + if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) { + if (Subtarget->hasAVX2()) { + // AVX2: v8i32 -> v8i16 + + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op); + + // PSHUFB + SmallVector pshufbMask; + for (unsigned i = 0; i < 2; ++i) { + pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); + for (unsigned j = 0; j < 8; ++j) + pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v32i8, + &pshufbMask[0], 32); + Op = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v32i8, Op, BV); + + Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i64, Op); + + static const int ShufMask[] = {0, 2, -1, -1}; + Op = DAG.getVectorShuffle(MVT::v4i64, dl, Op, DAG.getUNDEF(MVT::v4i64), + &ShufMask[0]); + + Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op, + DAG.getIntPtrConstant(0)); + + return DAG.getNode(ISD::BITCAST, dl, VT, Op); + } + SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi); // PSHUFB - int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, - -1, -1, -1, -1, -1, -1, -1, -1}; + static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, + -1, -1, -1, -1, -1, -1, -1, -1}; - OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, - DAG.getUNDEF(MVT::v16i8), - ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, - DAG.getUNDEF(MVT::v16i8), - ShufMask1); + SDValue Undef = DAG.getUNDEF(MVT::v16i8); + OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); // MOVLHPS - int ShufMask2[] = {0, 1, 4, 5}; + static const int ShufMask2[] = {0, 1, 4, 5}; SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2); return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res); @@ -13127,7 +13381,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SmallVector ShuffleMask; bool UnaryShuffle; - if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle)) + if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask, + UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. @@ -13276,8 +13531,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - - DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. @@ -13559,9 +13812,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // to simplify previous instructions. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && - !DCI.isBeforeLegalize() && - TLI.isOperationLegal(ISD::VSELECT, VT)) { + !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) { unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); + + // Don't optimize vector selects that map to mask-registers. + if (BitWidth == 1) + return SDValue(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); @@ -13576,6 +13833,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Check whether a boolean test is testing a boolean value generated by +// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition +// code. +// +// Simplify the following patterns: +// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ) +// to (Op EFLAGS Cond) +// +// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ) +// to (Op EFLAGS !Cond) +// +// where Op could be BRCOND or CMOV. +// +static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { + // Quit if not CMP and SUB with its value result used. + if (Cmp.getOpcode() != X86ISD::CMP && + (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) + return SDValue(); + + // Quit if not used as a boolean value. + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + // Check CMP operands. One of them should be 0 or 1 and the other should be + // an SetCC or extended from it. + SDValue Op1 = Cmp.getOperand(0); + SDValue Op2 = Cmp.getOperand(1); + + SDValue SetCC; + const ConstantSDNode* C = 0; + bool needOppositeCond = (CC == X86::COND_E); + + if ((C = dyn_cast(Op1))) + SetCC = Op2; + else if ((C = dyn_cast(Op2))) + SetCC = Op1; + else // Quit if all operands are not constants. + return SDValue(); + + if (C->getZExtValue() == 1) + needOppositeCond = !needOppositeCond; + else if (C->getZExtValue() != 0) + // Quit if the constant is neither 0 or 1. + return SDValue(); + + // Skip 'zext' node. + if (SetCC.getOpcode() == ISD::ZERO_EXTEND) + SetCC = SetCC.getOperand(0); + + // Quit if not SETCC. + // FIXME: So far we only handle the boolean value generated from SETCC. If + // there is other ways to generate boolean values, we need handle them here + // as well. + if (SetCC.getOpcode() != X86ISD::SETCC) + return SDValue(); + + // Set the condition code or opposite one if necessary. + CC = X86::CondCode(SetCC.getConstantOperandVal(0)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + + return SetCC.getOperand(1); +} + +static bool IsValidFCMOVCondition(X86::CondCode CC) { + switch (CC) { + default: + return false; + case X86::COND_B: + case X86::COND_BE: + case X86::COND_E: + case X86::COND_P: + case X86::COND_AE: + case X86::COND_A: + case X86::COND_NE: + case X86::COND_NP: + return true; + } +} + /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { @@ -13589,6 +13928,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); + if (CC == X86::COND_E || CC == X86::COND_NE) { switch (Cond.getOpcode()) { default: break; @@ -13600,6 +13940,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } + SDValue Flags; + + Flags = BoolTestSetCCCombine(Cond, CC); + if (Flags.getNode() && + // Extra check as FCMOV only supports a subset of X86 cond. + (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), + Ops, array_lengthof(Ops)); + } + // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. @@ -14022,7 +14374,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { // Sometimes the operand may come from a insert_subvector building a 256-bit // allones vector - if (VT.getSizeInBits() == 256 && + if (VT.is256BitVector() && N->getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); @@ -14260,6 +14612,41 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Generate NEG and CMOV for integer abs. +static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + // Since X86 does not have CMOV for 8-bit integer, we don't convert + // 8-bit integer abs to NEG and CMOV. + if (VT.isInteger() && VT.getSizeInBits() == 8) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) + // and change it to SUB and CMOV. + if (VT.isInteger() && N->getOpcode() == ISD::XOR && + N0.getOpcode() == ISD::ADD && + N0.getOperand(1) == N1 && + N1.getOpcode() == ISD::SRA && + N1.getOperand(0) == N0.getOperand(0)) + if (ConstantSDNode *Y1C = dyn_cast(N1.getOperand(1))) + if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) { + // Generate SUB & CMOV. + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, VT), N0.getOperand(0)); + + SDValue Ops[] = { N0.getOperand(0), Neg, + DAG.getConstant(X86::COND_GE, MVT::i8), + SDValue(Neg.getNode(), 1) }; + return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), + Ops, array_lengthof(Ops)); + } + return SDValue(); +} + // PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -14267,6 +14654,16 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (Subtarget->hasCMov()) { + SDValue RV = performIntegerAbsCombine(N, DAG); + if (RV.getNode()) + return RV; + } + + // Try forming BMI if it is available. + if (!Subtarget->hasBMI()) + return SDValue(); + EVT VT = N->getValueType(0); if (VT != MVT::i32 && VT != MVT::i64) @@ -14292,7 +14689,8 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { LoadSDNode *Ld = cast(N); EVT RegVT = Ld->getValueType(0); EVT MemVT = Ld->getMemoryVT(); @@ -14314,63 +14712,94 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned RegSz = RegVT.getSizeInBits(); unsigned MemSz = MemVT.getSizeInBits(); assert(RegSz > MemSz && "Register size must be greater than the mem size"); - // All sizes must be a power of two - if (!isPowerOf2_32(RegSz * MemSz * NumElems)) return SDValue(); - // Attempt to load the original value using a single load op. - // Find a scalar type which is equal to the loaded word size. + // All sizes must be a power of two. + if (!isPowerOf2_32(RegSz * MemSz * NumElems)) + return SDValue(); + + // Attempt to load the original value using scalar loads. + // Find the largest scalar type that divides the total loaded size. MVT SclrLoadTy = MVT::i8; for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { MVT Tp = (MVT::SimpleValueType)tp; - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() == MemSz) { + if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) { SclrLoadTy = Tp; - break; } } - // Proceed if a load word is found. - if (SclrLoadTy.getSizeInBits() != MemSz) return SDValue(); + // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. + if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 && + (64 <= MemSz)) + SclrLoadTy = MVT::f64; + // Calculate the number of scalar loads that we need to perform + // in order to load our vector from memory. + unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits(); + + // Represent our vector as a sequence of elements which are the + // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT(*DAG.getContext(), SclrLoadTy, RegSz/SclrLoadTy.getSizeInBits()); + // Represent the data using the same element type that is stored in + // memory. In practice, we ''widen'' MemVT. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), RegSz/MemVT.getScalarType().getSizeInBits()); - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); - // Perform a single load. - SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), - Ld->getBasePtr(), - Ld->getPointerInfo(), Ld->isVolatile(), - Ld->isNonTemporal(), Ld->isInvariant(), - Ld->getAlignment()); + assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && + "Invalid vector type"); + + // We can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) + return SDValue(); - // Insert the word loaded into a vector. - SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, - LoadUnitVecVT, ScalarLoad); + SmallVector Chains; + SDValue Ptr = Ld->getBasePtr(); + SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue Res = DAG.getUNDEF(LoadUnitVecVT); + + for (unsigned i = 0; i < NumLoads; ++i) { + // Perform a single load. + SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), + Ptr, Ld->getPointerInfo(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->isInvariant(), Ld->getAlignment()); + Chains.push_back(ScalarLoad.getValue(1)); + // Create the first element type using SCALAR_TO_VECTOR in order to avoid + // another round of DAGCombining. + if (i == 0) + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); + else + Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, + ScalarLoad, DAG.getIntPtrConstant(i)); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], + Chains.size()); // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. - SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, - ScalarInVector); + SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res); unsigned SizeRatio = RegSz/MemSz; // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; i++) ShuffleVec[i*SizeRatio] = i; + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i*SizeRatio] = i; SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec, - DAG.getUNDEF(SlicedVec.getValueType()), - ShuffleVec.data()); + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); // Bitcast to the requested type. Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff); // Replace the original load with the new sequence // and return the new chain. - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shuff); - return SDValue(ScalarLoad.getNode(), 1); + return DCI.CombineTo(N, Shuff, TF, true); } return SDValue(); @@ -14387,13 +14816,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If we are saving a concatenation of two XMM registers, perform two stores. - // This is better in Sandy Bridge cause one 256-bit mem op is done via two - // 128-bit ones. If in the future the cost becomes only one memory access the - // first version would be better. - if (VT.getSizeInBits() == 256 && - StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && - StoredVal.getNumOperands() == 2) { - + // On Sandy Bridge, 256-bit memory operations are executed by two + // 128-bit ports. However, on Haswell it is better to issue a single 256-bit + // memory operation. + if (VT.is256BitVector() && !Subtarget->hasAVX2() && + StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && + StoredVal.getNumOperands() == 2) { SDValue Value0 = StoredVal.getOperand(0); SDValue Value1 = StoredVal.getOperand(1); @@ -14438,14 +14866,16 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue()); SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i != NumElems; ++i) + ShuffleVec[i] = i * SizeRatio; - // Can't shuffle using an illegal type - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) + return SDValue(); SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec, - DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec.data()); + DAG.getUNDEF(WideVecVT), + &ShuffleVec[0]); // At this point all of the data is stored at the bottom of the // register. We now need to save it to mem. @@ -14454,13 +14884,18 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { MVT Tp = (MVT::SimpleValueType)tp; - if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz) + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz) StoreType = Tp; } + // On 32bit systems, we can't save 64bit integers. Try bitcasting to F64. + if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 && + (64 <= NumElems * ToSz)) + StoreType = MVT::f64; + // Bitcast the original vector into a vector of store-size units EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + StoreType, VT.getSizeInBits()/StoreType.getSizeInBits()); assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); SmallVector Chains; @@ -14469,7 +14904,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ptr = St->getBasePtr(); // Perform one or more big stores into memory. - for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) { + for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType, ShuffWide, DAG.getIntPtrConstant(i)); @@ -14818,18 +15253,9 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, if (!DCI.isBeforeLegalizeOps()) return SDValue(); - if (!Subtarget->hasAVX()) + if (!Subtarget->hasAVX()) return SDValue(); - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT - EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); EVT OpVT = Op.getValueType(); @@ -14838,23 +15264,37 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { + if (Subtarget->hasAVX2()) + return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op); + + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT + unsigned NumElems = OpVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(OpVT); + SmallVector ShufMask1(NumElems, -1); - for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i; + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask1[i] = i; - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - ShufMask1.data()); + SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); SmallVector ShufMask2(NumElems, -1); - for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2; + for (unsigned i = 0; i != NumElems/2; ++i) + ShufMask2[i] = i + NumElems/2; - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - ShufMask2.data()); + SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), VT.getVectorNumElements()/2); - OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); @@ -14862,7 +15302,42 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + EVT ScalarVT = VT.getScalarType(); + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + return SDValue(); + + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + SDValue C = N->getOperand(2); + + bool NegA = (A.getOpcode() == ISD::FNEG); + bool NegB = (B.getOpcode() == ISD::FNEG); + bool NegC = (C.getOpcode() == ISD::FNEG); + + // Negative multiplication when NegA xor NegB + bool NegMul = (NegA != NegB); + if (NegA) + A = A.getOperand(0); + if (NegB) + B = B.getOperand(0); + if (NegC) + C = C.getOperand(0); + + unsigned Opcode; + if (!NegMul) + Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + else + Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); +} + static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) @@ -14887,6 +15362,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, N00.getOperand(0), N00.getOperand(1)), DAG.getConstant(1, VT)); } + // Optimize vectors in AVX mode: // // v8i16 -> v8i32 @@ -14899,50 +15375,139 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, // Use vpunpckhdq for 4 upper elements v4i32 -> v2i64. // Concat upper and lower parts. // - if (Subtarget->hasAVX()) { + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (!Subtarget->hasAVX()) + return SDValue(); - if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || + if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) || ((VT == MVT::v4i64) && (OpVT == MVT::v4i32))) { - SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); - SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG); - SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG); + if (Subtarget->hasAVX2()) + return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0); - EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - VT.getVectorNumElements()/2); + SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl); + SDValue OpLo = getUnpackl(DAG, dl, OpVT, N0, ZeroVec); + SDValue OpHi = getUnpackh(DAG, dl, OpVT, N0, ZeroVec); - OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); - OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements()/2); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); - } + OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo); + OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); } + return SDValue(); +} + +// Optimize x == -y --> x+y == 0 +// x != -y --> x+y != 0 +static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { + ISD::CondCode CC = cast(N->getOperand(2))->get(); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) + if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0))) + if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { + SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(), + LHS.getValueType(), RHS, LHS.getOperand(1)); + return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0), + addV, DAG.getConstant(0, addV.getValueType()), CC); + } + if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) + if (ConstantSDNode *C = dyn_cast(RHS.getOperand(0))) + if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { + SDValue addV = DAG.getNode(ISD::ADD, N->getDebugLoc(), + RHS.getValueType(), LHS, RHS.getOperand(1)); + return DAG.getSetCC(N->getDebugLoc(), N->getValueType(0), + addV, DAG.getConstant(0, addV.getValueType()), CC); + } return SDValue(); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { - unsigned X86CC = N->getConstantOperandVal(0); - SDValue EFLAG = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); + SDValue EFLAGS = N->getOperand(1); // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. - if (X86CC == X86::COND_B) + if (CC == X86::COND_B) return DAG.getNode(ISD::AND, DL, MVT::i8, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(CC, MVT::i8), EFLAGS), DAG.getConstant(1, MVT::i8)); + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); + } + + return SDValue(); +} + +// Optimize branch condition evaluation. +// +static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + DebugLoc DL = N->getDebugLoc(); + SDValue Chain = N->getOperand(0); + SDValue Dest = N->getOperand(1); + SDValue EFLAGS = N->getOperand(3); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); + + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); + } + + return SDValue(); +} + +static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + EVT InVT = Op0->getValueType(0); + + // UINT_TO_FP(v4i8) -> SINT_TO_FP(ZEXT(v4i8 to v4i32)) + if (InVT == MVT::v8i8 || InVT == MVT::v4i8) { + DebugLoc dl = N->getDebugLoc(); + MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; + SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); + // Notice that we use SINT_TO_FP because we know that the high bits + // are zero and SINT_TO_FP is better supported by the hardware. + return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); + } + return SDValue(); } static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) { SDValue Op0 = N->getOperand(0); + EVT InVT = Op0->getValueType(0); + + // SINT_TO_FP(v4i8) -> SINT_TO_FP(SEXT(v4i8 to v4i32)) + if (InVT == MVT::v8i8 || InVT == MVT::v4i8) { + DebugLoc dl = N->getDebugLoc(); + MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; + SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); + } + // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. if (Op0.getOpcode() == ISD::LOAD) { @@ -14961,6 +15526,20 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT() + if (VT == MVT::v8i8 || VT == MVT::v4i8) { + DebugLoc dl = N->getDebugLoc(); + MVT DstVT = VT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32; + SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, I); + } + + return SDValue(); +} + // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { @@ -15095,9 +15674,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); - case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget); + case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); + case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); + case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: @@ -15105,10 +15686,13 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, Subtarget); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); + case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: @@ -15123,6 +15707,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILP: case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); + case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); } return SDValue(); @@ -15652,55 +16237,55 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // in the normal allocation? case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. if (Subtarget->is64Bit()) { - if (VT == MVT::i32 || VT == MVT::f32) - return std::make_pair(0U, X86::GR32RegisterClass); - else if (VT == MVT::i16) - return std::make_pair(0U, X86::GR16RegisterClass); - else if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, X86::GR8RegisterClass); - else if (VT == MVT::i64 || VT == MVT::f64) - return std::make_pair(0U, X86::GR64RegisterClass); - break; + if (VT == MVT::i32 || VT == MVT::f32) + return std::make_pair(0U, &X86::GR32RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16RegClass); + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8RegClass); + if (VT == MVT::i64 || VT == MVT::f64) + return std::make_pair(0U, &X86::GR64RegClass); + break; } // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) - return std::make_pair(0U, X86::GR32_ABCDRegisterClass); - else if (VT == MVT::i16) - return std::make_pair(0U, X86::GR16_ABCDRegisterClass); - else if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass); - else if (VT == MVT::i64) - return std::make_pair(0U, X86::GR64_ABCDRegisterClass); + return std::make_pair(0U, &X86::GR32_ABCDRegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16_ABCDRegClass); + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); + if (VT == MVT::i64) + return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; case 'r': // GENERAL_REGS case 'l': // INDEX_REGS if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, X86::GR8RegisterClass); + return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) - return std::make_pair(0U, X86::GR16RegisterClass); + return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit()) - return std::make_pair(0U, X86::GR32RegisterClass); - return std::make_pair(0U, X86::GR64RegisterClass); + return std::make_pair(0U, &X86::GR32RegClass); + return std::make_pair(0U, &X86::GR64RegClass); case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, X86::GR8_NOREXRegisterClass); + return std::make_pair(0U, &X86::GR8_NOREXRegClass); if (VT == MVT::i16) - return std::make_pair(0U, X86::GR16_NOREXRegisterClass); + return std::make_pair(0U, &X86::GR16_NOREXRegClass); if (VT == MVT::i32 || !Subtarget->is64Bit()) - return std::make_pair(0U, X86::GR32_NOREXRegisterClass); - return std::make_pair(0U, X86::GR64_NOREXRegisterClass); + return std::make_pair(0U, &X86::GR32_NOREXRegClass); + return std::make_pair(0U, &X86::GR64_NOREXRegClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) - return std::make_pair(0U, X86::RFP32RegisterClass); + return std::make_pair(0U, &X86::RFP32RegClass); if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) - return std::make_pair(0U, X86::RFP64RegisterClass); - return std::make_pair(0U, X86::RFP80RegisterClass); + return std::make_pair(0U, &X86::RFP64RegClass); + return std::make_pair(0U, &X86::RFP80RegClass); case 'y': // MMX_REGS if MMX allowed. if (!Subtarget->hasMMX()) break; - return std::make_pair(0U, X86::VR64RegisterClass); + return std::make_pair(0U, &X86::VR64RegClass); case 'Y': // SSE_REGS if SSE2 allowed if (!Subtarget->hasSSE2()) break; // FALL THROUGH. @@ -15712,10 +16297,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // Scalar SSE types. case MVT::f32: case MVT::i32: - return std::make_pair(0U, X86::FR32RegisterClass); + return std::make_pair(0U, &X86::FR32RegClass); case MVT::f64: case MVT::i64: - return std::make_pair(0U, X86::FR64RegisterClass); + return std::make_pair(0U, &X86::FR64RegClass); // Vector types. case MVT::v16i8: case MVT::v8i16: @@ -15723,7 +16308,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: - return std::make_pair(0U, X86::VR128RegisterClass); + return std::make_pair(0U, &X86::VR128RegClass); // AVX types. case MVT::v32i8: case MVT::v16i16: @@ -15731,8 +16316,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case MVT::v4i64: case MVT::v8f32: case MVT::v4f64: - return std::make_pair(0U, X86::VR256RegisterClass); - + return std::make_pair(0U, &X86::VR256RegClass); } break; } @@ -15755,28 +16339,28 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Constraint[6] == '}') { Res.first = X86::ST0+Constraint[4]-'0'; - Res.second = X86::RFP80RegisterClass; + Res.second = &X86::RFP80RegClass; return Res; } // GCC allows "st(0)" to be called just plain "st". if (StringRef("{st}").equals_lower(Constraint)) { Res.first = X86::ST0; - Res.second = X86::RFP80RegisterClass; + Res.second = &X86::RFP80RegClass; return Res; } // flags -> EFLAGS if (StringRef("{flags}").equals_lower(Constraint)) { Res.first = X86::EFLAGS; - Res.second = X86::CCRRegisterClass; + Res.second = &X86::CCRRegClass; return Res; } // 'A' means EAX + EDX. if (Constraint == "A") { Res.first = X86::EAX; - Res.second = X86::GR32_ADRegisterClass; + Res.second = &X86::GR32_ADRegClass; return Res; } return Res; @@ -15792,7 +16376,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we // really want an 8-bit or 32-bit register, map to the appropriate register // class and return the appropriate register. - if (Res.second == X86::GR16RegisterClass) { + if (Res.second == &X86::GR16RegClass) { if (VT == MVT::i8) { unsigned DestReg = 0; switch (Res.first) { @@ -15804,7 +16388,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = X86::GR8RegisterClass; + Res.second = &X86::GR8RegClass; } } else if (VT == MVT::i32) { unsigned DestReg = 0; @@ -15821,7 +16405,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = X86::GR32RegisterClass; + Res.second = &X86::GR32RegClass; } } else if (VT == MVT::i64) { unsigned DestReg = 0; @@ -15838,22 +16422,25 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } if (DestReg) { Res.first = DestReg; - Res.second = X86::GR64RegisterClass; + Res.second = &X86::GR64RegClass; } } - } else if (Res.second == X86::FR32RegisterClass || - Res.second == X86::FR64RegisterClass || - Res.second == X86::VR128RegisterClass) { + } else if (Res.second == &X86::FR32RegClass || + Res.second == &X86::FR64RegClass || + Res.second == &X86::VR128RegClass) { // Handle references to XMM physical registers that got mapped into the // wrong class. This can happen with constraints like {xmm0} where the // target independent register mapper will just pick the first match it can // find, ignoring the required type. - if (VT == MVT::f32) - Res.second = X86::FR32RegisterClass; - else if (VT == MVT::f64) - Res.second = X86::FR64RegisterClass; - else if (X86::VR128RegisterClass->hasType(VT)) - Res.second = X86::VR128RegisterClass; + + if (VT == MVT::f32 || VT == MVT::i32) + Res.second = &X86::FR32RegClass; + else if (VT == MVT::f64 || VT == MVT::i64) + Res.second = &X86::FR64RegClass; + else if (X86::VR128RegClass.hasType(VT)) + Res.second = &X86::VR128RegClass; + else if (X86::VR256RegClass.hasType(VT)) + Res.second = &X86::VR256RegClass; } return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 09116e8..9123ebd 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -137,10 +137,6 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. If you think this is too close to the previous /// mnemonic, so do I; blame Intel. @@ -207,6 +203,10 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, + // TLSBASEADDR - Thread Local Storage. A call to get the start address + // of the TLS block for the current module. + TLSBASEADDR, + // TLSCALL - Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. TLSCALL, @@ -242,9 +242,6 @@ namespace llvm { // PCMP* - Vector integer comparisons. PCMPEQ, PCMPGT, - // VPCOM, VPCOMU - XOP Vector integer comparisons. - VPCOM, VPCOMU, - // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, @@ -293,6 +290,14 @@ namespace llvm { // PMULUDQ - Vector multiply packed unsigned doubleword integers PMULUDQ, + // FMA nodes + FMADD, + FNMADD, + FMSUB, + FNMSUB, + FMADDSUB, + FMSUBADD, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -315,6 +320,19 @@ namespace llvm { SFENCE, LFENCE, + // FNSTSW16r - Store FP status word into i16 register. + FNSTSW16r, + + // SAHF - Store contents of %ah into %eflags. + SAHF, + + // RDRAND - Get a random integer and indicate whether it is valid in CF. + RDRAND, + + // PCMP*STRI + PCMPISTRI, + PCMPESTRI, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -558,6 +576,18 @@ namespace llvm { /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + + /// isLegalAddImmediate - Return true if the specified immediate is legal + /// add immediate, that is the target has add instructions which can + /// add a register and the immediate without having to materialize + /// the immediate into a register. + virtual bool isLegalAddImmediate(int64_t Imm) const; + /// isTruncateFree - Return true if it's free to truncate a value of /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. @@ -575,6 +605,12 @@ namespace llvm { virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. @@ -634,7 +670,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address @@ -761,6 +798,7 @@ namespace llvm { SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; @@ -797,12 +835,7 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue @@ -822,9 +855,9 @@ namespace llvm { virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const; + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned NewOp) const; @@ -909,10 +942,14 @@ namespace llvm { /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SelectionDAG &DAG) const; + + /// Convert a comparison if required by the subtarget. + SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; }; namespace X86 { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 0eee083..f790611 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, X86xor_flag, xor, 1, 0>; defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, X86add_flag, add, 1, 1>; +let isCompare = 1 in { defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, X86sub_flag, sub, 0, 0>; +} // Arithmetic. let Uses = [EFLAGS] in { @@ -1143,7 +1145,9 @@ let Uses = [EFLAGS] in { 0, 0>; } +let isCompare = 1 in { defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; +} //===----------------------------------------------------------------------===// @@ -1154,7 +1158,7 @@ defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>; def X86testpat : PatFrag<(ops node:$lhs, node:$rhs), (X86cmp (and_su node:$lhs, node:$rhs), 0)>; -let Defs = [EFLAGS] in { +let isCompare = 1, Defs = [EFLAGS] in { let isCommutable = 1 in { def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>; def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index fa1d676..aaef4a4 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -55,11 +55,11 @@ struct X86AddressMode { : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } - - + + void getFullAddress(SmallVectorImpl &MO) { assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); - + if (BaseType == X86AddressMode::RegBase) MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, false, false, false, 0, false)); @@ -67,16 +67,16 @@ struct X86AddressMode { assert(BaseType == X86AddressMode::FrameIndexBase); MO.push_back(MachineOperand::CreateFI(Base.FrameIndex)); } - + MO.push_back(MachineOperand::CreateImm(Scale)); MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, false, false, false, 0, false)); - + if (GV) MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags)); else MO.push_back(MachineOperand::CreateImm(Disp)); - + MO.push_back(MachineOperand::CreateReg(0, false, false, false, false, false, 0, false)); } @@ -122,7 +122,7 @@ static inline const MachineInstrBuilder & addFullAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else { @@ -135,7 +135,7 @@ addFullAddress(const MachineInstrBuilder &MIB, MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else MIB.addImm(AM.Disp); - + return MIB.addReg(0); } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 6f9e849..99c2b8f 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -375,11 +375,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in + Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_base_addr32", + [(X86tlsbaseaddr tls32baseaddr:$sym)]>, + Requires<[In32BitMode]>; +} // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately @@ -389,11 +394,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in + Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_base_addr64", + [(X86tlsbaseaddr tls64baseaddr:$sym)]>, + Requires<[In64BitMode]>; +} // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the @@ -1008,8 +1018,8 @@ def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; // tailcall stuff -def : Pat<(X86tcret GR32_TC:$dst, imm:$off), - (TCRETURNri GR32_TC:$dst, imm:$off)>, +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In32BitMode]>; // FIXME: This is disabled for 32-bit PIC mode because the global base @@ -1623,6 +1633,12 @@ def : Pat<(sub GR16:$src1, i16immSExt8:$src2), def : Pat<(sub GR32:$src1, i32immSExt8:$src2), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; +// sub 0, reg +def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; +def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; +def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; +def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; + // mul reg, reg def : Pat<(mul GR16:$src1, GR16:$src2), (IMUL16rr GR16:$src1, GR16:$src2)>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index bf11fde..b0c27c8 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -18,16 +18,16 @@ // Return instructions. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP in { - def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), + def RET : I <0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)], IIC_RET>; - def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops), + def RETW : I <0xC3, RawFrm, (outs), (ins), "ret{w}", [], IIC_RET>, OpSize; - def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; - def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), @@ -148,12 +148,12 @@ let isCall = 1 in // registers are added manually. let Uses = [ESP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i32imm_pcrel:$dst,variable_ops), + (outs), (ins i32imm_pcrel:$dst), "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; - def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), + def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, Requires<[In32BitMode]>; - def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), + def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, Requires<[In32BitMode]>; @@ -174,7 +174,7 @@ let isCall = 1 in // callw for 16 bit code for the assembler. let isAsmParserOnly = 1 in def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, - (outs), (ins i16imm_pcrel:$dst, variable_ops), + (outs), (ins i16imm_pcrel:$dst), "callw\t$dst", []>, OpSize; } @@ -185,23 +185,23 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>; + (ins i32imm_pcrel:$dst, i32imm:$offset), []>; def TCRETURNri : PseudoI<(outs), - (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; let mayLoad = 1 in def TCRETURNmi : PseudoI<(outs), - (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>; + (ins i32mem_TC:$dst, i32imm:$offset), []>; // FIXME: The should be pseudo instructions that are lowered when going to // mcinst. def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i32imm_pcrel:$dst, variable_ops), + (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>; - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } @@ -218,14 +218,14 @@ let isCall = 1, Uses = [RSP] in { // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[In64BitMode]>; - def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)], IIC_CALL_RI>, Requires<[In64BitMode]>; - def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, Requires<[In64BitMode]>; @@ -240,7 +240,7 @@ let isCall = 1, isCodeGenOnly = 1 in let Defs = [RAX, R10, R11, RSP, EFLAGS], Uses = [RSP] in { def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, - (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, Requires<[IsWin64]>; } @@ -250,21 +250,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, let Uses = [RSP], usesCustomInserter = 1 in { def TCRETURNdi64 : PseudoI<(outs), - (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops), + (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; def TCRETURNri64 : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), - (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>; + (ins i64mem_TC:$dst, i32imm:$offset), []>; def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), - (ins i64i32imm_pcrel:$dst, variable_ops), + (ins i64i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", [], IIC_JMP_REL>; - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops), + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst), "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; let mayLoad = 1 in - def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), + def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; } diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 0d5490a..2eb454d 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in { // Sign/Zero extenders +let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; @@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, TB; +let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; @@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. +let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", @@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, TB; +} // MOVSX64rr8 always has a REX prefix and it has an 8-bit register // operand, which makes it a rare instruction with an 8-bit register diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index d57937b..265b4bb 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -15,83 +15,245 @@ // FMA3 - Intel 3 operand Fused Multiply-Add instructions //===----------------------------------------------------------------------===// +let Constraints = "$src1 = $dst" in { multiclass fma3p_rm opc, string OpcodeStr> { +let neverHasSideEffects = 1 in { def r : FMA3; + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; + let mayLoad = 1 in def m : FMA3; + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; def rY : FMA3; + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; + let mayLoad = 1 in def mY : FMA3; + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>; +} // neverHasSideEffects = 1 } +// Intrinsic for 213 pattern +multiclass fma3p_rm_int opc, string OpcodeStr, + PatFrag MemFrag128, PatFrag MemFrag256, + Intrinsic Int128, Intrinsic Int256, SDNode Op213, + ValueType OpVT128, ValueType OpVT256> { + def r_Int : FMA3; + + def r : FMA3; + + def m_Int : FMA3; + + def m : FMA3; + + + def rY_Int : FMA3; + + def rY : FMA3; + + def mY_Int : FMA3; + + def mY : FMA3; +} +} // Constraints = "$src1 = $dst" + multiclass fma3p_forms opc132, bits<8> opc213, bits<8> opc231, - string OpcodeStr, string PackTy> { - defm r132 : fma3p_rm; - defm r213 : fma3p_rm; - defm r231 : fma3p_rm; + string OpcodeStr, string PackTy, + PatFrag MemFrag128, PatFrag MemFrag256, + Intrinsic Int128, Intrinsic Int256, SDNode Op, + ValueType OpTy128, ValueType OpTy256> { + defm r213 : fma3p_rm_int ; + defm r132 : fma3p_rm ; + defm r231 : fma3p_rm ; } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">; - defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">; - defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">; - defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">; + defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, + memopv8f32, int_x86_fma_vfmadd_ps, + int_x86_fma_vfmadd_ps_256, X86Fmadd, + v4f32, v8f32>; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, + memopv8f32, int_x86_fma_vfmsub_ps, + int_x86_fma_vfmsub_ps_256, X86Fmsub, + v4f32, v8f32>; + defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", + memopv4f32, memopv8f32, + int_x86_fma_vfmaddsub_ps, + int_x86_fma_vfmaddsub_ps_256, X86Fmaddsub, + v4f32, v8f32>; + defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", + memopv4f32, memopv8f32, + int_x86_fma_vfmsubadd_ps, + int_x86_fma_vfmaddsub_ps_256, X86Fmsubadd, + v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W; - defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W; - defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W; - defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W; + defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, + memopv4f64, int_x86_fma_vfmadd_pd, + int_x86_fma_vfmadd_pd_256, X86Fmadd, v2f64, + v4f64>, VEX_W; + defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, + memopv4f64, int_x86_fma_vfmsub_pd, + int_x86_fma_vfmsub_pd_256, X86Fmsub, v2f64, + v4f64>, VEX_W; + defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", + memopv2f64, memopv4f64, + int_x86_fma_vfmaddsub_pd, + int_x86_fma_vfmaddsub_pd_256, X86Fmaddsub, + v2f64, v4f64>, VEX_W; + defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", + memopv2f64, memopv4f64, + int_x86_fma_vfmsubadd_pd, + int_x86_fma_vfmsubadd_pd_256, X86Fmsubadd, + v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { - defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">; - defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; + defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32, + memopv8f32, int_x86_fma_vfnmadd_ps, + int_x86_fma_vfnmadd_ps_256, X86Fnmadd, v4f32, + v8f32>; + defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32, + memopv8f32, int_x86_fma_vfnmsub_ps, + int_x86_fma_vfnmsub_ps_256, X86Fnmsub, v4f32, + v8f32>; } let ExeDomain = SSEPackedDouble in { - defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W; - defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; + defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, + memopv4f64, int_x86_fma_vfnmadd_pd, + int_x86_fma_vfnmadd_pd_256, X86Fnmadd, v2f64, + v4f64>, VEX_W; + defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", + memopv2f64, + memopv4f64, int_x86_fma_vfnmsub_pd, + int_x86_fma_vfnmsub_pd_256, X86Fnmsub, v2f64, + v4f64>, VEX_W; } -multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop> { - def r : FMA3; - def m : FMA3; +let Constraints = "$src1 = $dst" in { +multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, + RegisterClass RC> { +let neverHasSideEffects = 1 in { + def r : FMA3; + let mayLoad = 1 in + def m : FMA3; +} // neverHasSideEffects = 1 } +multiclass fma3s_rm_int opc, string OpcodeStr, Operand memop, + ComplexPattern mem_cpat, Intrinsic IntId, + RegisterClass RC, SDNode OpNode, ValueType OpVT> { + def r_Int : FMA3; + def m_Int : FMA3; + def r : FMA3; + let mayLoad = 1 in + def m : FMA3; +} +} // Constraints = "$src1 = $dst" + multiclass fma3s_forms opc132, bits<8> opc213, bits<8> opc231, - string OpcodeStr> { - defm SSr132 : fma3s_rm; - defm SSr213 : fma3s_rm; - defm SSr231 : fma3s_rm; - defm SDr132 : fma3s_rm, VEX_W; - defm SDr213 : fma3s_rm, VEX_W; - defm SDr231 : fma3s_rm, VEX_W; + string OpStr, Intrinsic IntF32, Intrinsic IntF64, + SDNode OpNode> { + defm SSr132 : fma3s_rm; + defm SSr231 : fma3s_rm; + defm SDr132 : fma3s_rm, + VEX_W; + defm SDr231 : fma3s_rm, + VEX_W; + defm SSr213 : fma3s_rm_int ; + defm SDr213 : fma3s_rm_int , VEX_W; } -defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">, VEX_LIG; -defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">, VEX_LIG; +defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, + int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, + int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; + +defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, + int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; +defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, + int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; -defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">, VEX_LIG; -defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">, VEX_LIG; //===----------------------------------------------------------------------===// // FMA4 - AMD 4 operand Fused Multiply-Add instructions @@ -178,43 +340,47 @@ let isCodeGenOnly = 1 in { } // isCodeGenOnly = 1 } +let Predicates = [HasFMA4] in { + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma4_vfmadd_ss>; + int_x86_fma_vfmadd_ss>; defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma4_vfmadd_sd>; -defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps, - int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>; -defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd, - int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>; + int_x86_fma_vfmadd_sd>; +defm VFMADDPS4 : fma4p<0x68, "vfmaddps", int_x86_fma_vfmadd_ps, + int_x86_fma_vfmadd_ps_256, memopv4f32, memopv8f32>; +defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", int_x86_fma_vfmadd_pd, + int_x86_fma_vfmadd_pd_256, memopv2f64, memopv4f64>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma4_vfmsub_ss>; + int_x86_fma_vfmsub_ss>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma4_vfmsub_sd>; -defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps, - int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd, - int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>; + int_x86_fma_vfmsub_sd>; +defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", int_x86_fma_vfmsub_ps, + int_x86_fma_vfmsub_ps_256, memopv4f32, memopv8f32>; +defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", int_x86_fma_vfmsub_pd, + int_x86_fma_vfmsub_pd_256, memopv2f64, memopv4f64>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma4_vfnmadd_ss>; + int_x86_fma_vfnmadd_ss>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma4_vfnmadd_sd>; -defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps, - int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>; -defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd, - int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>; + int_x86_fma_vfnmadd_sd>; +defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", int_x86_fma_vfnmadd_ps, + int_x86_fma_vfnmadd_ps_256, memopv4f32, memopv8f32>; +defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", int_x86_fma_vfnmadd_pd, + int_x86_fma_vfnmadd_pd_256, memopv2f64, memopv4f64>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma4_vfnmsub_ss>; + int_x86_fma_vfnmsub_ss>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma4_vfnmsub_sd>; -defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps, - int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>; -defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd, - int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>; -defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps, - int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>; -defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd, - int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>; -defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps, - int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>; -defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd, - int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>; + int_x86_fma_vfnmsub_sd>; +defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", int_x86_fma_vfnmsub_ps, + int_x86_fma_vfnmsub_ps_256, memopv4f32, memopv8f32>; +defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", int_x86_fma_vfnmsub_pd, + int_x86_fma_vfnmsub_pd_256, memopv2f64, memopv4f64>; +defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma_vfmaddsub_ps, + int_x86_fma_vfmaddsub_ps_256, memopv4f32, memopv8f32>; +defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma_vfmaddsub_pd, + int_x86_fma_vfmaddsub_pd_256, memopv2f64, memopv4f64>; +defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma_vfmsubadd_ps, + int_x86_fma_vfmsubadd_ps_256, memopv4f32, memopv8f32>; +defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma_vfmsubadd_pd, + int_x86_fma_vfmsubadd_pd_256, memopv2f64, memopv4f64>; +} // HasFMA4 + diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index a13887e..568726e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -27,6 +27,7 @@ def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>, SDTCisVT<2, OtherVT>]>; def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; +def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -41,6 +42,7 @@ def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, SDNPMemOperand]>; +def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem, @@ -203,6 +205,7 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), } } +let Defs = [FPSW] in { defm ADD : FPBinary_rr; defm SUB : FPBinary_rr; defm MUL : FPBinary_rr; @@ -213,6 +216,7 @@ defm SUBR: FPBinary; defm MUL : FPBinary; defm DIV : FPBinary; defm DIVR: FPBinary; +} class FPST0rInst o, string asm> : FPI, D8; @@ -257,6 +261,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW, def _F : FPI, D9; } +let Defs = [FPSW] in { defm CHS : FPUnary; defm ABS : FPUnary; defm SQRT: FPUnary; @@ -269,6 +274,7 @@ def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>; def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; } def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; +} // Defs = [FPSW] // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. @@ -316,6 +322,7 @@ multiclass FPCMov { Requires<[HasCMov]>; } +let Defs = [FPSW] in { let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov; defm CMOVBE : FPCMov; @@ -416,24 +423,40 @@ def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; } let mayLoad = 1 in { -def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">; -def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">; -def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">; -def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">; -def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">; -def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">; +def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src", + IIC_FLD>; +def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src", + IIC_FLD>; +def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src", + IIC_FLD80>; +def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src", + IIC_FILD>; +def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src", + IIC_FILD>; +def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src", + IIC_FILD>; } let mayStore = 1 in { -def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">; -def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">; -def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">; -def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">; -def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">; -def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">; -def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">; -def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">; -def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">; -def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">; +def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst", + IIC_FST>; +def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst", + IIC_FST>; +def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst", + IIC_FST>; +def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst", + IIC_FST>; +def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst", + IIC_FST80>; +def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst", + IIC_FIST>; +def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst", + IIC_FIST>; +def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst", + IIC_FIST>; +def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst", + IIC_FIST>; +def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst", + IIC_FIST>; } // FISTTP requires SSE3 even though it's a FPStack op. @@ -459,17 +482,23 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, } // Predicates = [HasSSE3] let mayStore = 1 in { -def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; -def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">; +def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst", + IIC_FST>; +def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst", + IIC_FST>; def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), - "fisttp{ll}\t$dst">; + "fisttp{ll}\t$dst", IIC_FST>; } // FP Stack manipulation instructions. -def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9; -def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD; -def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD; -def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9; +def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op", + IIC_FLD>, D9; +def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op", + IIC_FST>, DD; +def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op", + IIC_FST>, DD; +def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op", + IIC_FXCH>, D9; // Floating point constant loads. let isReMaterializable = 1 in { @@ -487,20 +516,21 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, [(set RFP80:$dst, fpimm1)]>; } -def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9; -def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9; +def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9; +def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9; // Floating point compares. -let Defs = [EFLAGS] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) + [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) + [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>; def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - []>; // FPSW = cmp ST(0) with ST(i) - + [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>; +} // Defs = [FPSW] + // CC = ST(0) cmp ST(i) +let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, @@ -509,85 +539,94 @@ def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; } -let Defs = [EFLAGS], Uses = [ST0] in { +let Defs = [FPSW], Uses = [ST0] in { def UCOM_Fr : FPI<0xE0, AddRegFrm, // FPSW = cmp ST(0) with ST(i) (outs), (ins RST:$reg), - "fucom\t$reg">, DD; + "fucom\t$reg", IIC_FUCOM>, DD; def UCOM_FPr : FPI<0xE8, AddRegFrm, // FPSW = cmp ST(0) with ST(i), pop (outs), (ins RST:$reg), - "fucomp\t$reg">, DD; + "fucomp\t$reg", IIC_FUCOM>, DD; def UCOM_FPPr : FPI<0xE9, RawFrm, // cmp ST(0) with ST(1), pop, pop (outs), (ins), - "fucompp">, DA; + "fucompp", IIC_FUCOM>, DA; +} +let Defs = [EFLAGS, FPSW], Uses = [ST0] in { def UCOM_FIr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i) (outs), (ins RST:$reg), - "fucomi\t$reg">, DB; + "fucomi\t$reg", IIC_FUCOMI>, DB; def UCOM_FIPr : FPI<0xE8, AddRegFrm, // CC = cmp ST(0) with ST(i), pop (outs), (ins RST:$reg), - "fucompi\t$reg">, DF; + "fucompi\t$reg", IIC_FUCOMI>, DF; } +let Defs = [EFLAGS, FPSW] in { def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcomi\t$reg">, DB; + "fcomi\t$reg", IIC_FCOMI>, DB; def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), - "fcompi\t$reg">, DF; + "fcompi\t$reg", IIC_FCOMI>, DF; +} // Floating point flag ops. -let Defs = [AX] in -def FNSTSW8r : I<0xE0, RawFrm, // AX = fp flags - (outs), (ins), "fnstsw %ax", []>, DF; +let Defs = [AX], Uses = [FPSW] in +def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags + (outs), (ins), "fnstsw %ax", + [(set AX, (X86fp_stsw FPSW))], IIC_FNSTSW>, DF; def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", - [(X86fp_cwd_get16 addr:$dst)]>; + [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>; let mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] - (outs), (ins i16mem:$dst), "fldcw\t$dst", []>; + (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>; // FPU control instructions -def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB; +let Defs = [FPSW] in +def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), - "ffree\t$reg">, DD; + "ffree\t$reg", IIC_FFREE>, DD; // Clear exceptions -def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB; +let Defs = [FPSW] in +def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB; // Operandless floating-point instructions for the disassembler. -def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; - -def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9; -def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9; -def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9; -def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9; -def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9; -def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9; -def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9; -def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9; -def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9; -def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9; -def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9; -def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9; -def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9; -def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9; -def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9; -def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9; -def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9; -def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9; -def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9; -def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9; -def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE; +def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>; + +def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9; +def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", [], IIC_FXAM>, D9; +def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", [], IIC_FLDL>, D9; +def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", [], IIC_FLDL>, D9; +def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", [], IIC_FLDL>, D9; +def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", [], IIC_FLDL>, D9; +def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", [], IIC_FLDL>, D9; +def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", [], IIC_F2XM1>, D9; +def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", [], IIC_FYL2X>, D9; +def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", [], IIC_FPTAN>, D9; +def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", [], IIC_FPATAN>, D9; +def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", [], IIC_FXTRACT>, D9; +def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", [], IIC_FPREM1>, D9; +def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", [], IIC_FPSTP>, D9; +def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", [], IIC_FPSTP>, D9; +def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", [], IIC_FPREM>, D9; +def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", [], IIC_FYL2XP1>, D9; +def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", [], IIC_FSINCOS>, D9; +def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", [], IIC_FRNDINT>, D9; +def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", [], IIC_FSCALE>, D9; +def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", [], IIC_FCOMPP>, DE; def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), - "fxsave\t$dst", []>, TB; + "fxsave\t$dst", [], IIC_FXSAVE>, TB; def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins), - "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "fxsaveq\t$dst", [], IIC_FXSAVE>, TB, REX_W, + Requires<[In64BitMode]>; def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstor\t$src", []>, TB; + "fxrstor\t$src", [], IIC_FXRSTOR>, TB; def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), - "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>; + "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W, + Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b387090..81b4f81 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -255,8 +255,9 @@ class Ii32PCRel o, Format f, dag outs, dag ins, string asm, // FPStack Instruction Templates: // FPI - Floating Point Instruction template. -class FPI o, Format F, dag outs, dag ins, string asm> - : I {} +class FPI o, Format F, dag outs, dag ins, string asm, + InstrItinClass itin = IIC_DEFAULT> + : I {} // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_ pattern, @@ -365,6 +366,7 @@ class VPSI o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. +// S2SI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. @@ -377,8 +379,11 @@ class SDI o, Format F, dag outs, dag ins, string asm, class SDIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8, XD, Requires<[HasSSE2]>; -class SSDIi8 o, Format F, dag outs, dag ins, string asm, - list pattern> +class S2SI o, Format F, dag outs, dag ins, string asm, + list pattern, InstrItinClass itin = IIC_DEFAULT> + : I, XS, Requires<[HasSSE2]>; +class S2SIi8 o, Format F, dag outs, dag ins, string asm, + list pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8, XS, Requires<[HasSSE2]>; class PDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = IIC_DEFAULT> @@ -392,6 +397,10 @@ class VSDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = IIC_DEFAULT> : I, XD, Requires<[HasAVX]>; +class VS2SI o, Format F, dag outs, dag ins, string asm, + list pattern, InstrItinClass itin = IIC_DEFAULT> + : I, XS, + Requires<[HasAVX]>; class VPDI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = IIC_DEFAULT> : I, TB, @@ -503,29 +512,29 @@ class AVX2AIi8 o, Format F, dag outs, dag ins, string asm, class AES8I o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> : I, T8, - Requires<[HasSSE2, HasAES]>; + Requires<[HasAES]>; class AESAI o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8, TA, - Requires<[HasSSE2, HasAES]>; + Requires<[HasAES]>; -// CLMUL Instruction Templates -class CLMULIi8 o, Format F, dag outs, dag ins, string asm, +// PCLMUL Instruction Templates +class PCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> : Ii8, TA, - OpSize, Requires<[HasSSE2, HasCLMUL]>; + OpSize, Requires<[HasPCLMUL]>; -class AVXCLMULIi8 o, Format F, dag outs, dag ins, string asm, +class AVXPCLMULIi8 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> : Ii8, TA, - OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>; + OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>; // FMA3 Instruction Templates class FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = IIC_DEFAULT> : I, T8, - OpSize, VEX_4V, Requires<[HasFMA3]>; + OpSize, VEX_4V, Requires<[HasFMA]>; // FMA4 Instruction Templates class FMA4 o, Format F, dag outs, dag ins, string asm, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 35801e4..d13167b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -71,9 +71,14 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; + +def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisOpSmallerThanOp<1, 0> ]>>; + def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; - + def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; @@ -102,13 +107,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; -def X86vpcom : SDNode<"X86ISD::VPCOM", - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; -def X86vpcomu : SDNode<"X86ISD::VPCOMU", - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; - def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>>; @@ -127,7 +125,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + +def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -162,9 +163,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; +def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; +def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; +def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; + +def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, + SDTCisVT<4, i8>]>; +def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, + SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, + SDTCisVT<6, i8>]>; + +def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; +def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; //===----------------------------------------------------------------------===// // SSE Complex Patterns @@ -304,7 +322,7 @@ def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), }]>; def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ + (st node:$val, node:$ptr), [{ if (StoreSDNode *ST = dyn_cast(N)) return ST->isNonTemporal() && !ST->isTruncatingStore() && ST->getAddressingMode() == ISD::UNINDEXED && @@ -313,7 +331,7 @@ def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), }]>; def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ + (st node:$val, node:$ptr), [{ if (StoreSDNode *ST = dyn_cast(N)) return ST->isNonTemporal() && ST->getAlignment() < 16; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index b12c1db..cca04e5 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -54,38 +55,39 @@ ReMatPICStubLoad("remat-pic-stub-load", enum { // Select which memory operand is being unfolded. - // (stored in bits 0 - 7) + // (stored in bits 0 - 3) TB_INDEX_0 = 0, TB_INDEX_1 = 1, TB_INDEX_2 = 2, - TB_INDEX_MASK = 0xff, - - // Minimum alignment required for load/store. - // Used for RegOp->MemOp conversion. - // (stored in bits 8 - 15) - TB_ALIGN_SHIFT = 8, - TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, - TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, - TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, - TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT, + TB_INDEX_3 = 3, + TB_INDEX_MASK = 0xf, // Do not insert the reverse map (MemOp -> RegOp) into the table. // This may be needed because there is a many -> one mapping. - TB_NO_REVERSE = 1 << 16, + TB_NO_REVERSE = 1 << 4, // Do not insert the forward map (RegOp -> MemOp) into the table. // This is needed for Native Client, which prohibits branch // instructions from using a memory operand. - TB_NO_FORWARD = 1 << 17, + TB_NO_FORWARD = 1 << 5, - TB_FOLDED_LOAD = 1 << 18, - TB_FOLDED_STORE = 1 << 19 + TB_FOLDED_LOAD = 1 << 6, + TB_FOLDED_STORE = 1 << 7, + + // Minimum alignment required for load/store. + // Used for RegOp->MemOp conversion. + // (stored in bits 8 - 15) + TB_ALIGN_SHIFT = 8, + TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, + TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, + TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; struct X86OpTblEntry { uint16_t RegOp; uint16_t MemOp; - uint32_t Flags; + uint16_t Flags; }; X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) @@ -408,20 +410,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 }, { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, + { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, @@ -492,14 +484,20 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // AVX 128-bit versions of foldable instructions { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 }, { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 }, - { X86::Int_VCVTDQ2PDrr, X86::Int_VCVTDQ2PDrm, TB_ALIGN_16 }, - { X86::Int_VCVTDQ2PSrr, X86::Int_VCVTDQ2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2DQrr, X86::Int_VCVTPD2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPD2PSrr, X86::Int_VCVTPD2PSrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2DQrr, X86::Int_VCVTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_VCVTPS2PDrr, X86::Int_VCVTPS2PDrm, 0 }, { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, 0 }, { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, 0 }, + { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, + { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, + { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, 0 }, + { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, + { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 }, + { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, + { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, 0 }, + { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, + { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, + { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 }, + { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 }, { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE }, { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE }, { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, @@ -535,6 +533,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPSr_Int, X86::VSQRTPSm_Int, TB_ALIGN_16 }, { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, + { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, + // AVX 256-bit foldable instructions { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, @@ -543,6 +543,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, { X86::VPERMILPDYri, X86::VPERMILPDYmi, TB_ALIGN_32 }, { X86::VPERMILPSYri, X86::VPERMILPSYmi, TB_ALIGN_32 }, + // AVX2 foldable instructions { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_32 }, { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_32 }, @@ -558,6 +559,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPDYr_Int, X86::VSQRTPDYm_Int, TB_ALIGN_32 }, { X86::VSQRTPSYr, X86::VSQRTPSYm, TB_ALIGN_32 }, { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, + { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, + { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -671,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rr, X86::IMUL64rm, 0 }, { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, @@ -808,17 +817,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 }, - { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm, 0 }, - { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr, X86::Int_VCVTTSD2SIrm, 0 }, - { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm, 0 }, - { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr, X86::Int_VCVTTSS2SIrm, 0 }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, TB_ALIGN_16 }, + { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, TB_ALIGN_16 }, { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, TB_ALIGN_16 }, { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, @@ -1122,6 +1121,158 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) // Index 2, folded load Flags | TB_INDEX_2 | TB_FOLDED_LOAD); } + + static const X86OpTblEntry OpTbl3[] = { + // FMA foldable instructions + { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 }, + { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 }, + { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 }, + { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, + { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, + { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, + { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, + { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, + + { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, + { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, + { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 }, + { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 }, + { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 }, + { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 }, + { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 }, + { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 }, + { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 }, + { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, + { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, + { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, + { X86::VFMADDPSr213r_Int, X86::VFMADDPSr213m_Int, TB_ALIGN_16 }, + { X86::VFMADDPDr213r_Int, X86::VFMADDPDr213m_Int, TB_ALIGN_16 }, + { X86::VFMADDPSr213rY_Int, X86::VFMADDPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFMADDPDr213rY_Int, X86::VFMADDPDr213mY_Int, TB_ALIGN_32 }, + + { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, + { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, + { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 }, + { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, + { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, + { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, + { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, + { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, + + { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, + { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, + { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 }, + { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 }, + { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 }, + { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 }, + { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 }, + { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 }, + { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 }, + { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, + { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, + { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, + { X86::VFNMADDPSr213r_Int, X86::VFNMADDPSr213m_Int, TB_ALIGN_16 }, + { X86::VFNMADDPDr213r_Int, X86::VFNMADDPDr213m_Int, TB_ALIGN_16 }, + { X86::VFNMADDPSr213rY_Int, X86::VFNMADDPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFNMADDPDr213rY_Int, X86::VFNMADDPDr213mY_Int, TB_ALIGN_32 }, + + { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, + { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, + { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 }, + { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, + { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, + { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, + { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, + { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, + + { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, + { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, + { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 }, + { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 }, + { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 }, + { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 }, + { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 }, + { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 }, + { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 }, + { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, + { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, + { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, + { X86::VFMSUBPSr213r_Int, X86::VFMSUBPSr213m_Int, TB_ALIGN_16 }, + { X86::VFMSUBPDr213r_Int, X86::VFMSUBPDr213m_Int, TB_ALIGN_16 }, + { X86::VFMSUBPSr213rY_Int, X86::VFMSUBPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFMSUBPDr213rY_Int, X86::VFMSUBPDr213mY_Int, TB_ALIGN_32 }, + + { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, + { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, + { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 }, + { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, + { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, + { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, + { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, + { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, + + { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, + { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, + { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 }, + { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 }, + { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 }, + { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 }, + { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 }, + { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 }, + { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 }, + { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, + { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, + { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, + { X86::VFNMSUBPSr213r_Int, X86::VFNMSUBPSr213m_Int, TB_ALIGN_16 }, + { X86::VFNMSUBPDr213r_Int, X86::VFNMSUBPDr213m_Int, TB_ALIGN_16 }, + { X86::VFNMSUBPSr213rY_Int, X86::VFNMSUBPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFNMSUBPDr213rY_Int, X86::VFNMSUBPDr213mY_Int, TB_ALIGN_32 }, + + { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, + { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, + { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 }, + { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 }, + { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 }, + { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 }, + { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, + { X86::VFMADDSUBPSr213r_Int, X86::VFMADDSUBPSr213m_Int, TB_ALIGN_16 }, + { X86::VFMADDSUBPDr213r_Int, X86::VFMADDSUBPDr213m_Int, TB_ALIGN_16 }, + { X86::VFMADDSUBPSr213rY_Int, X86::VFMADDSUBPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFMADDSUBPDr213rY_Int, X86::VFMADDSUBPDr213mY_Int, TB_ALIGN_32 }, + + { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, + { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, + { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 }, + { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 }, + { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 }, + { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 }, + { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, + { X86::VFMSUBADDPSr213r_Int, X86::VFMSUBADDPSr213m_Int, TB_ALIGN_16 }, + { X86::VFMSUBADDPDr213r_Int, X86::VFMSUBADDPDr213m_Int, TB_ALIGN_16 }, + { X86::VFMSUBADDPSr213rY_Int, X86::VFMSUBADDPSr213mY_Int, TB_ALIGN_32 }, + { X86::VFMSUBADDPDr213rY_Int, X86::VFMSUBADDPDr213mY_Int, TB_ALIGN_32 }, + }; + + for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { + unsigned RegOp = OpTbl3[i].RegOp; + unsigned MemOp = OpTbl3[i].MemOp; + unsigned Flags = OpTbl3[i].Flags; + AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, + RegOp, MemOp, + // Index 3, folded load + Flags | TB_INDEX_3 | TB_FOLDED_LOAD); + } + } void @@ -1312,6 +1463,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { + // Don't waste compile time scanning use-def chains of physregs. + if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) + return false; bool isPICBase = false; for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), E = MRI.def_end(); I != E; ++I) { @@ -1369,16 +1523,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, return false; const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - bool isPICBase = false; - for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), - E = MRI.def_end(); I != E; ++I) { - MachineInstr *DefMI = I.getOperand().getParent(); - if (DefMI->getOpcode() != X86::MOVPC32r) - return false; - assert(!isPICBase && "More than one PIC base?"); - isPICBase = true; - } - return isPICBase; + return regIsPICBase(BaseReg, MRI); } return false; } @@ -1782,12 +1927,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + const TargetRegisterClass *RC = MIOpc == X86::INC64r ? + (const TargetRegisterClass*)&X86::GR64_NOSPRegClass : + (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, - MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass : - X86::GR32_NOSPRegisterClass)) + !MF.getRegInfo().constrainRegClass(Src, RC)) return 0; NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1812,11 +1958,12 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); + const TargetRegisterClass *RC = MIOpc == X86::DEC64r ? + (const TargetRegisterClass*)&X86::GR64_NOSPRegClass : + (const TargetRegisterClass*)&X86::GR32_NOSPRegClass; // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src) && - !MF.getRegInfo().constrainRegClass(Src, - MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass : - X86::GR32_NOSPRegisterClass)) + !MF.getRegInfo().constrainRegClass(Src, RC)) return 0; NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) @@ -1844,10 +1991,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, const TargetRegisterClass *RC; if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) { Opc = X86::LEA64r; - RC = X86::GR64_NOSPRegisterClass; + RC = &X86::GR64_NOSPRegClass; } else { Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - RC = X86::GR32_NOSPRegisterClass; + RC = &X86::GR32_NOSPRegClass; } @@ -1863,6 +2010,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, Src2, isKill2); + + // Preserve undefness of the operands. + bool isUndef = MI->getOperand(1).isUndef(); + bool isUndef2 = MI->getOperand(2).isUndef(); + NewMI->getOperand(1).setIsUndef(isUndef); + NewMI->getOperand(3).setIsUndef(isUndef2); + if (LV && isKill2) LV->replaceKillInstruction(Src2, MI, NewMI); break; @@ -2079,7 +2233,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } } -static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { +static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; case X86::JE_4: return X86::COND_E; @@ -2101,6 +2255,84 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { } } +/// getCondFromSETOpc - return condition code of a SET opcode. +static X86::CondCode getCondFromSETOpc(unsigned Opc) { + switch (Opc) { + default: return X86::COND_INVALID; + case X86::SETAr: case X86::SETAm: return X86::COND_A; + case X86::SETAEr: case X86::SETAEm: return X86::COND_AE; + case X86::SETBr: case X86::SETBm: return X86::COND_B; + case X86::SETBEr: case X86::SETBEm: return X86::COND_BE; + case X86::SETEr: case X86::SETEm: return X86::COND_E; + case X86::SETGr: case X86::SETGm: return X86::COND_G; + case X86::SETGEr: case X86::SETGEm: return X86::COND_GE; + case X86::SETLr: case X86::SETLm: return X86::COND_L; + case X86::SETLEr: case X86::SETLEm: return X86::COND_LE; + case X86::SETNEr: case X86::SETNEm: return X86::COND_NE; + case X86::SETNOr: case X86::SETNOm: return X86::COND_NO; + case X86::SETNPr: case X86::SETNPm: return X86::COND_NP; + case X86::SETNSr: case X86::SETNSm: return X86::COND_NS; + case X86::SETOr: case X86::SETOm: return X86::COND_O; + case X86::SETPr: case X86::SETPm: return X86::COND_P; + case X86::SETSr: case X86::SETSm: return X86::COND_S; + } +} + +/// getCondFromCmovOpc - return condition code of a CMov opcode. +static X86::CondCode getCondFromCMovOpc(unsigned Opc) { + switch (Opc) { + default: return X86::COND_INVALID; + case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: + case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: + return X86::COND_A; + case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: + case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: + return X86::COND_AE; + case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: + case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: + return X86::COND_B; + case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: + case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: + return X86::COND_BE; + case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: + case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: + return X86::COND_E; + case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: + case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: + return X86::COND_G; + case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: + case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: + return X86::COND_GE; + case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: + case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: + return X86::COND_L; + case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: + case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: + return X86::COND_LE; + case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: + case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: + return X86::COND_NE; + case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: + case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: + return X86::COND_NO; + case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: + case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: + return X86::COND_NP; + case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: + case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: + return X86::COND_NS; + case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: + case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: + return X86::COND_O; + case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: + case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: + return X86::COND_P; + case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: + case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: + return X86::COND_S; + } +} + unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); @@ -2147,6 +2379,101 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } } +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +static X86::CondCode getSwappedCondition(X86::CondCode CC) { + switch (CC) { + default: return X86::COND_INVALID; + case X86::COND_E: return X86::COND_E; + case X86::COND_NE: return X86::COND_NE; + case X86::COND_L: return X86::COND_G; + case X86::COND_LE: return X86::COND_GE; + case X86::COND_G: return X86::COND_L; + case X86::COND_GE: return X86::COND_LE; + case X86::COND_B: return X86::COND_A; + case X86::COND_BE: return X86::COND_AE; + case X86::COND_A: return X86::COND_B; + case X86::COND_AE: return X86::COND_BE; + } +} + +/// getSETFromCond - Return a set opcode for the given condition and +/// whether it has memory operand. +static unsigned getSETFromCond(X86::CondCode CC, + bool HasMemoryOperand) { + static const unsigned Opc[16][2] = { + { X86::SETAr, X86::SETAm }, + { X86::SETAEr, X86::SETAEm }, + { X86::SETBr, X86::SETBm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETEr, X86::SETEm }, + { X86::SETGr, X86::SETGm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETLr, X86::SETLm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETNEr, X86::SETNEm }, + { X86::SETNOr, X86::SETNOm }, + { X86::SETNPr, X86::SETNPm }, + { X86::SETNSr, X86::SETNSm }, + { X86::SETOr, X86::SETOm }, + { X86::SETPr, X86::SETPm }, + { X86::SETSr, X86::SETSm } + }; + + assert(CC < 16 && "Can only handle standard cond codes"); + return Opc[CC][HasMemoryOperand ? 1 : 0]; +} + +/// getCMovFromCond - Return a cmov opcode for the given condition, +/// register size in bytes, and operand type. +static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { + static const unsigned Opc[32][3] = { + { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, + { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, + { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, + { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr }, + { X86::CMOVE16rr, X86::CMOVE32rr, X86::CMOVE64rr }, + { X86::CMOVG16rr, X86::CMOVG32rr, X86::CMOVG64rr }, + { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr }, + { X86::CMOVL16rr, X86::CMOVL32rr, X86::CMOVL64rr }, + { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr }, + { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr }, + { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr }, + { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr }, + { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, + { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, + { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, + { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, + { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, + { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, + { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, + { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, + { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, + { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, + { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, + { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, + { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, + { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, + { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, + { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, + { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, + { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, + { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, + { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } + }; + + assert(CC < 16 && "Can only handle standard cond codes"); + unsigned Idx = HasMemoryOperand ? 16+CC : CC; + switch(RegBytes) { + default: llvm_unreachable("Illegal register size!"); + case 2: return Opc[Idx][0]; + case 4: return Opc[Idx][1]; + case 8: return Opc[Idx][2]; + } +} + bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { if (!MI->isTerminator()) return false; @@ -2213,7 +2540,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // Handle conditional branches. - X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); + X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. @@ -2311,7 +2638,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I->isDebugValue()) continue; if (I->getOpcode() != X86::JMP_4 && - GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) + getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. I->eraseFromParent(); @@ -2371,6 +2698,56 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } +bool X86InstrInfo:: +canInsertSelect(const MachineBasicBlock &MBB, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, int &FalseCycles) const { + // Not all subtargets have cmov instructions. + if (!TM.getSubtarget().hasCMov()) + return false; + if (Cond.size() != 1) + return false; + // We cannot do the composite conditions, at least not in SSA form. + if ((X86::CondCode)Cond[0].getImm() > X86::COND_S) + return false; + + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) + return false; + + // We have cmov instructions for 16, 32, and 64 bit general purpose registers. + if (X86::GR16RegClass.hasSubClassEq(RC) || + X86::GR32RegClass.hasSubClassEq(RC) || + X86::GR64RegClass.hasSubClassEq(RC)) { + // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy + // Bridge. Probably Ivy Bridge as well. + CondCycles = 2; + TrueCycles = 2; + FalseCycles = 2; + return true; + } + + // Can't do vectors. + return false; +} + +void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + assert(Cond.size() == 1 && "Invalid Cond array"); + unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), + MRI.getRegClass(DstReg)->getSize(), + false/*HasMemoryOperand*/); + BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); +} + /// isHReg - Test if the given register is a physical h register. static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); @@ -2637,6 +3014,464 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, NewMIs.push_back(MIB); } +bool X86InstrInfo:: +analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const { + switch (MI->getOpcode()) { + default: break; + case X86::CMP64ri32: + case X86::CMP64ri8: + case X86::CMP32ri: + case X86::CMP32ri8: + case X86::CMP16ri: + case X86::CMP16ri8: + case X86::CMP8ri: + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(1).getImm(); + return true; + // A SUB can be used to perform comparison. + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(2).getImm(); + return true; + case X86::CMP64rr: + case X86::CMP32rr: + case X86::CMP16rr: + case X86::CMP8rr: + SrcReg = MI->getOperand(0).getReg(); + SrcReg2 = MI->getOperand(1).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::TEST8rr: + case X86::TEST16rr: + case X86::TEST32rr: + case X86::TEST64rr: + SrcReg = MI->getOperand(0).getReg(); + if (MI->getOperand(1).getReg() != SrcReg) return false; + // Compare against zero. + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = 0; + return true; + } + return false; +} + +/// isRedundantFlagInstr - check whether the first instruction, whose only +/// purpose is to update flags, can be made redundant. +/// CMPrr can be made redundant by SUBrr if the operands are the same. +/// This function can be extended later on. +/// SrcReg, SrcRegs: register operands for FlagI. +/// ImmValue: immediate for FlagI if it takes an immediate. +inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg, + unsigned SrcReg2, int ImmValue, + MachineInstr *OI) { + if (((FlagI->getOpcode() == X86::CMP64rr && + OI->getOpcode() == X86::SUB64rr) || + (FlagI->getOpcode() == X86::CMP32rr && + OI->getOpcode() == X86::SUB32rr)|| + (FlagI->getOpcode() == X86::CMP16rr && + OI->getOpcode() == X86::SUB16rr)|| + (FlagI->getOpcode() == X86::CMP8rr && + OI->getOpcode() == X86::SUB8rr)) && + ((OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getReg() == SrcReg2) || + (OI->getOperand(1).getReg() == SrcReg2 && + OI->getOperand(2).getReg() == SrcReg))) + return true; + + if (((FlagI->getOpcode() == X86::CMP64ri32 && + OI->getOpcode() == X86::SUB64ri32) || + (FlagI->getOpcode() == X86::CMP64ri8 && + OI->getOpcode() == X86::SUB64ri8) || + (FlagI->getOpcode() == X86::CMP32ri && + OI->getOpcode() == X86::SUB32ri) || + (FlagI->getOpcode() == X86::CMP32ri8 && + OI->getOpcode() == X86::SUB32ri8) || + (FlagI->getOpcode() == X86::CMP16ri && + OI->getOpcode() == X86::SUB16ri) || + (FlagI->getOpcode() == X86::CMP16ri8 && + OI->getOpcode() == X86::SUB16ri8) || + (FlagI->getOpcode() == X86::CMP8ri && + OI->getOpcode() == X86::SUB8ri)) && + OI->getOperand(1).getReg() == SrcReg && + OI->getOperand(2).getImm() == ImmValue) + return true; + return false; +} + +/// isDefConvertible - check whether the definition can be converted +/// to remove a comparison against zero. +inline static bool isDefConvertible(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return false; + case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: + case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: + case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: + case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm: + case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm: + case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri: + case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8: + case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr: + case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: + case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: + case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: + case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: + case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: + case X86::AND16rr: case X86::AND8rr: case X86::AND64rm: + case X86::AND32rm: case X86::AND16rm: case X86::AND8rm: + case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri: + case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8: + case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr: + case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm: + case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm: + case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri: + case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8: + case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: + case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: + case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: + return true; + } +} + +/// optimizeCompareInstr - Check if there exists an earlier instruction that +/// operates on the same source operands and sets flags in the same way as +/// Compare; remove Compare if possible. +bool X86InstrInfo:: +optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, + int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const { + // Check whether we can replace SUB with CMP. + unsigned NewOpcode = 0; + switch (CmpInstr->getOpcode()) { + default: break; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: { + if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) + return false; + // There is no use of the destination register, we can replace SUB with CMP. + switch (CmpInstr->getOpcode()) { + default: llvm_unreachable(0); + case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; + case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; + case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; + case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; + case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; + case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; + case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; + case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; + case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; + case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; + case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; + case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; + case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; + case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; + case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; + } + CmpInstr->setDesc(get(NewOpcode)); + CmpInstr->RemoveOperand(0); + // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. + if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || + NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) + return false; + } + } + + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) return false; + + // CmpInstr is the first instruction of the BB. + MachineBasicBlock::iterator I = CmpInstr, Def = MI; + + // If we are comparing against zero, check whether we can use MI to update + // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. + bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); + if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() || + !isDefConvertible(MI))) + return false; + + // We are searching for an earlier instruction that can make CmpInstr + // redundant and that instruction will be saved in Sub. + MachineInstr *Sub = NULL; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // We iterate backward, starting from the instruction before CmpInstr and + // stop when reaching the definition of a source register or done with the BB. + // RI points to the instruction before CmpInstr. + // If the definition is in this basic block, RE points to the definition; + // otherwise, RE is the rend of the basic block. + MachineBasicBlock::reverse_iterator + RI = MachineBasicBlock::reverse_iterator(I), + RE = CmpInstr->getParent() == MI->getParent() ? + MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ : + CmpInstr->getParent()->rend(); + MachineInstr *Movr0Inst = 0; + for (; RI != RE; ++RI) { + MachineInstr *Instr = &*RI; + // Check whether CmpInstr can be made redundant by the current instruction. + if (!IsCmpZero && + isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) { + Sub = Instr; + break; + } + + if (Instr->modifiesRegister(X86::EFLAGS, TRI) || + Instr->readsRegister(X86::EFLAGS, TRI)) { + // This instruction modifies or uses EFLAGS. + + // MOV32r0 etc. are implemented with xor which clobbers condition code. + // They are safe to move up, if the definition to EFLAGS is dead and + // earlier instructions do not read or write EFLAGS. + if (!Movr0Inst && (Instr->getOpcode() == X86::MOV8r0 || + Instr->getOpcode() == X86::MOV16r0 || + Instr->getOpcode() == X86::MOV32r0 || + Instr->getOpcode() == X86::MOV64r0) && + Instr->registerDefIsDead(X86::EFLAGS, TRI)) { + Movr0Inst = Instr; + continue; + } + + // We can't remove CmpInstr. + return false; + } + } + + // Return false if no candidates exist. + if (!IsCmpZero && !Sub) + return false; + + bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && + Sub->getOperand(2).getReg() == SrcReg); + + // Scan forward from the instruction after CmpInstr for uses of EFLAGS. + // It is safe to remove CmpInstr if EFLAGS is redefined or killed. + // If we are done with the basic block, we need to check whether EFLAGS is + // live-out. + bool IsSafe = false; + SmallVector, 4> OpsToUpdate; + MachineBasicBlock::iterator E = CmpInstr->getParent()->end(); + for (++I; I != E; ++I) { + const MachineInstr &Instr = *I; + bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); + bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); + // We should check the usage if this instruction uses and updates EFLAGS. + if (!UseEFLAGS && ModifyEFLAGS) { + // It is safe to remove CmpInstr if EFLAGS is updated again. + IsSafe = true; + break; + } + if (!UseEFLAGS && !ModifyEFLAGS) + continue; + + // EFLAGS is used by this instruction. + X86::CondCode OldCC; + bool OpcIsSET = false; + if (IsCmpZero || IsSwapped) { + // We decode the condition code from opcode. + if (Instr.isBranch()) + OldCC = getCondFromBranchOpc(Instr.getOpcode()); + else { + OldCC = getCondFromSETOpc(Instr.getOpcode()); + if (OldCC != X86::COND_INVALID) + OpcIsSET = true; + else + OldCC = getCondFromCMovOpc(Instr.getOpcode()); + } + if (OldCC == X86::COND_INVALID) return false; + } + if (IsCmpZero) { + switch (OldCC) { + default: break; + case X86::COND_A: case X86::COND_AE: + case X86::COND_B: case X86::COND_BE: + case X86::COND_G: case X86::COND_GE: + case X86::COND_L: case X86::COND_LE: + case X86::COND_O: case X86::COND_NO: + // CF and OF are used, we can't perform this optimization. + return false; + } + } else if (IsSwapped) { + // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs + // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. + // We swap the condition code and synthesize the new opcode. + X86::CondCode NewCC = getSwappedCondition(OldCC); + if (NewCC == X86::COND_INVALID) return false; + + // Synthesize the new opcode. + bool HasMemoryOperand = Instr.hasOneMemOperand(); + unsigned NewOpc; + if (Instr.isBranch()) + NewOpc = GetCondBranchFromCond(NewCC); + else if(OpcIsSET) + NewOpc = getSETFromCond(NewCC, HasMemoryOperand); + else { + unsigned DstReg = Instr.getOperand(0).getReg(); + NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), + HasMemoryOperand); + } + + // Push the MachineInstr to OpsToUpdate. + // If it is safe to remove CmpInstr, the condition code of these + // instructions will be modified. + OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); + } + if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { + // It is safe to remove CmpInstr if EFLAGS is updated again or killed. + IsSafe = true; + break; + } + } + + // If EFLAGS is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if ((IsCmpZero || IsSwapped) && !IsSafe) { + MachineBasicBlock *MBB = CmpInstr->getParent(); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(X86::EFLAGS)) + return false; + } + + // The instruction to be updated is either Sub or MI. + Sub = IsCmpZero ? MI : Sub; + // Move Movr0Inst to the place right before Sub. + if (Movr0Inst) { + Sub->getParent()->remove(Movr0Inst); + Sub->getParent()->insert(MachineBasicBlock::iterator(Sub), Movr0Inst); + } + + // Make sure Sub instruction defines EFLAGS. + assert(Sub->getNumOperands() >= 2 && + Sub->getOperand(Sub->getNumOperands()-1).isReg() && + Sub->getOperand(Sub->getNumOperands()-1).getReg() == X86::EFLAGS && + "EFLAGS should be the last operand of SUB, ADD, OR, XOR, AND"); + Sub->getOperand(Sub->getNumOperands()-1).setIsDef(true); + CmpInstr->eraseFromParent(); + + // Modify the condition code of instructions in OpsToUpdate. + for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++) + OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second)); + return true; +} + +/// optimizeLoadInstr - Try to remove the load by folding it to a register +/// operand at the use. We fold the load instructions if load defines a virtual +/// register, the virtual register is used once in the same BB, and the +/// instructions in-between do not load or store, and have no side effects. +MachineInstr* X86InstrInfo:: +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + if (FoldAsLoadDefReg == 0) + return 0; + // To be conservative, if there exists another load, clear the load candidate. + if (MI->mayLoad()) { + FoldAsLoadDefReg = 0; + return 0; + } + + // Check whether we can move DefMI here. + DefMI = MRI->getVRegDef(FoldAsLoadDefReg); + assert(DefMI); + bool SawStore = false; + if (!DefMI->isSafeToMove(this, 0, SawStore)) + return 0; + + // We try to commute MI if possible. + unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; + for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { + // Collect information about virtual register operands of MI. + unsigned SrcOperandId = 0; + bool FoundSrcOperand = false; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != FoldAsLoadDefReg) + continue; + // Do not fold if we have a subreg use or a def or multiple uses. + if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) + return 0; + + SrcOperandId = i; + FoundSrcOperand = true; + } + if (!FoundSrcOperand) return 0; + + // Check whether we can fold the def into SrcOperandId. + SmallVector Ops; + Ops.push_back(SrcOperandId); + MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + if (FoldMI) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + + if (Idx == 1) { + // MI was changed but it didn't help, commute it back! + commuteInstruction(MI, false); + return 0; + } + + // Check whether we can commute MI and enable folding. + if (MI->isCommutable()) { + MachineInstr *NewMI = commuteInstruction(MI, false); + // Unable to commute. + if (!NewMI) return 0; + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + return 0; + } + } + } + return 0; +} + /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. This is /// used for mapping: @@ -2795,6 +3630,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (i == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } // If table selected... @@ -2809,7 +3646,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize(); + unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize(); if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -3202,7 +4039,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, UnfoldStore &= FoldedStore; const MCInstrDesc &MCID = get(Opc); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && !TM.getSubtarget().isUnalignedMemAccessFast()) @@ -3297,7 +4134,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI); + const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF); std::pair MMOs = MF.extractStoreMemRefs(MI->memoperands_begin(), @@ -3323,7 +4160,8 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedLoad = I->second.second & TB_FOLDED_LOAD; bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); - const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI); + MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); unsigned NumDefs = MCID.NumDefs; std::vector AddrOps; std::vector BeforeOps; @@ -3344,7 +4182,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; - MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { EVT VT = *RC->vt_begin(); std::pair VTs; const TargetRegisterClass *DstRC = 0; if (MCID.getNumDefs() > 0) { - DstRC = getRegClass(MCID, 0, &RI); + DstRC = getRegClass(MCID, 0, &RI, MF); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { @@ -3625,7 +4462,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // Create the register. The code to initialize it is inserted // later, by the CGBR pass (below). MachineRegisterInfo &RegInfo = MF->getRegInfo(); - GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } @@ -3835,7 +4672,7 @@ namespace { unsigned PC; if (TM->getSubtarget().isPICStyleGOT()) - PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + PC = RegInfo.createVirtualRegister(&X86::GR32RegClass); else PC = GlobalBaseReg; @@ -3869,3 +4706,117 @@ namespace { char CGBR::ID = 0; FunctionPass* llvm::createGlobalBaseRegPass() { return new CGBR(); } + +namespace { + struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + X86MachineFunctionInfo* MFI = MF.getInfo(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + if (TLSBaseAddrReg) + I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); + I != E; ++I) { + Changed |= VisitNode(*I, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const X86TargetMachine *TM = + static_cast(&MF->getTarget()); + const bool is64Bit = TM->getSubtarget().is64Bit(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to RAX/EAX. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + is64Bit ? X86::RAX : X86::EAX) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const X86TargetMachine *TM = + static_cast(&MF->getTarget()); + const bool is64Bit = TM->getSubtarget().is64Bit(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit + ? &X86::GR64RegClass + : &X86::GR32RegClass); + + // Insert a copy from RAX/EAX to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg) + .addReg(is64Bit ? X86::RAX : X86::EAX); + + return Copy; + } + + virtual const char *getPassName() const { + return "Local Dynamic TLS Access Clean-up"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char LDTLSCleanup::ID = 0; +FunctionPass* +llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index b23d756..b6f69af 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -128,8 +128,8 @@ class X86InstrInfo : public X86GenInstrInfo { X86TargetMachine &TM; const X86RegisterInfo RI; - /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, - /// RegOp2MemOpTable2 - Load / store folding opcode maps. + /// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, + /// RegOp2MemOpTable2, RegOp2MemOpTable3 - Load / store folding opcode maps. /// typedef DenseMap > RegOp2MemOpTableType; @@ -137,6 +137,7 @@ class X86InstrInfo : public X86GenInstrInfo { RegOp2MemOpTableType RegOp2MemOpTable0; RegOp2MemOpTableType RegOp2MemOpTable1; RegOp2MemOpTableType RegOp2MemOpTable2; + RegOp2MemOpTableType RegOp2MemOpTable3; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// @@ -144,9 +145,9 @@ class X86InstrInfo : public X86GenInstrInfo { std::pair > MemOp2RegOpTableType; MemOp2RegOpTableType MemOp2RegOpTable; - void AddTableEntry(RegOp2MemOpTableType &R2MTable, - MemOp2RegOpTableType &M2RTable, - unsigned RegOp, unsigned MemOp, unsigned Flags); + static void AddTableEntry(RegOp2MemOpTableType &R2MTable, + MemOp2RegOpTableType &M2RTable, + unsigned RegOp, unsigned MemOp, unsigned Flags); public: explicit X86InstrInfo(X86TargetMachine &tm); @@ -218,6 +219,14 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; + virtual bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl &Cond, + unsigned, unsigned, int&, int&, int&) const; + virtual void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -363,6 +372,33 @@ public: const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const; + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2 if having two register operands, and the value it + /// compares against in CmpValue. Return true if the comparison instruction + /// can be analyzed. + virtual bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, + int &CmpMask, int &CmpValue) const; + + /// optimizeCompareInstr - Check if there exists an earlier instruction that + /// operates on the same source operands and sets flags in the same way as + /// Compare; remove Compare if possible. + virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const; + + /// optimizeLoadInstr - Try to remove the load by folding it to a register + /// operand at the use. We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6a25312..d293156 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -63,6 +63,10 @@ def SDTX86SetCC_C : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; +def SDTX86sahf : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i8>]>; + +def SDTX86rdrand : SDTypeProfile<2, 0, [SDTCisInt<0>, SDTCisVT<1, i32>]>; + def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>, SDTCisVT<2, i8>]>; def SDTX86caspair : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -95,6 +99,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; @@ -131,6 +137,11 @@ def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; +def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; + +def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, + [SDNPHasChain, SDNPSideEffect]>; + def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -199,6 +210,9 @@ def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; @@ -278,6 +292,20 @@ def X86Mem256AsmOperand : AsmOperandClass { let Name = "Mem256"; let PredicateMethod = "isMem256"; } +// Gather mem operands +def X86MemVX32Operand : AsmOperandClass { + let Name = "MemVX32"; let PredicateMethod = "isMemVX32"; +} +def X86MemVY32Operand : AsmOperandClass { + let Name = "MemVY32"; let PredicateMethod = "isMemVY32"; +} +def X86MemVX64Operand : AsmOperandClass { + let Name = "MemVX64"; let PredicateMethod = "isMemVX64"; +} +def X86MemVY64Operand : AsmOperandClass { + let Name = "MemVY64"; let PredicateMethod = "isMemVY64"; +} + def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; let SuperClasses = [X86MemAsmOperand]; @@ -316,6 +344,20 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } + +// Gather mem operands +def vx32mem : X86MemOperand<"printi32mem">{ + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86MemVX32Operand; } +def vy32mem : X86MemOperand<"printi32mem">{ + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86MemVY32Operand; } +def vx64mem : X86MemOperand<"printi64mem">{ + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86MemVX64Operand; } +def vy64mem : X86MemOperand<"printi64mem">{ + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86MemVY64Operand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of @@ -328,7 +370,7 @@ def i8mem_NOREX : Operand { } // GPRs available for tailcall. -// It represents GR64_TC or GR64_TCW64. +// It represents GR32_TC, GR64_TC or GR64_TCW64. def ptr_rc_tailcall : PointerLikeRegClass<2>; // Special i32mem for addresses of load folding tail calls. These are not @@ -336,7 +378,8 @@ def ptr_rc_tailcall : PointerLikeRegClass<2>; // after callee-saved register are popped. def i32mem_TC : Operand { let PrintMethod = "printi32mem"; - let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc_tailcall, i8imm, ptr_rc_tailcall, + i32imm, i8imm); let ParserMatchClass = X86Mem32AsmOperand; let OperandType = "OPERAND_MEMORY"; } @@ -487,6 +530,9 @@ def lea32addr : ComplexPattern; +def tls32baseaddr : ComplexPattern; + def lea64addr : ComplexPattern; @@ -494,6 +540,9 @@ def lea64addr : ComplexPattern; +def tls64baseaddr : ComplexPattern; + //===----------------------------------------------------------------------===// // X86 Instruction Predicate Definitions. def HasCMov : Predicate<"Subtarget->hasCMov()">; @@ -514,8 +563,8 @@ def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; -def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; -def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; +def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">; +def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; @@ -680,25 +729,27 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ // Nop let neverHasSideEffects = 1 in { - def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; + def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>; def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero), - "nop{w}\t$zero", []>, TB, OpSize; + "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize; def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero), - "nop{l}\t$zero", []>, TB; + "nop{l}\t$zero", [], IIC_NOP>, TB; } // Constructing a stack frame. def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), - "enter\t$len, $lvl", []>; + "enter\t$len, $lvl", [], IIC_ENTER>; let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, - (outs), (ins), "leave", []>, Requires<[In32BitMode]>; + (outs), (ins), "leave", [], IIC_LEAVE>, + Requires<[In32BitMode]>; let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, - (outs), (ins), "leave", []>, Requires<[In64BitMode]>; + (outs), (ins), "leave", [], IIC_LEAVE>, + Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// // Miscellaneous Instructions. @@ -706,41 +757,49 @@ def LEAVE64 : I<0xC9, RawFrm, let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { let mayLoad = 1 in { -def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, - OpSize; -def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; -def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, - OpSize; -def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>, - OpSize; -def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; -def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>; - -def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, +def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], + IIC_POP_REG16>, OpSize; +def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], + IIC_POP_REG>; +def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], + IIC_POP_REG>, OpSize; +def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", [], + IIC_POP_MEM>, OpSize; +def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], + IIC_POP_REG>; +def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], + IIC_POP_MEM>; + +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; } let mayStore = 1 in { -def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, - OpSize; -def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; -def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, - OpSize; -def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>, +def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], + IIC_PUSH_REG>, OpSize; +def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], + IIC_PUSH_REG>; +def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[], + IIC_PUSH_REG>, OpSize; +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], + IIC_PUSH_MEM>, OpSize; -def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; -def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; +def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[], + IIC_PUSH_REG>; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], + IIC_PUSH_MEM>; def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), - "push{l}\t$imm", []>; + "push{l}\t$imm", [], IIC_PUSH_IMM>; def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{w}\t$imm", []>, OpSize; + "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize; def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), - "push{l}\t$imm", []>; + "push{l}\t$imm", [], IIC_PUSH_IMM>; -def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; -def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, +def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, + OpSize; +def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; } @@ -749,44 +808,48 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, - (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; + (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], + IIC_POP_REG>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], + IIC_POP_MEM>; } let mayStore = 1 in { def PUSH64r : I<0x50, AddRegFrm, - (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; -def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; + (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], + IIC_PUSH_REG>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], + IIC_PUSH_MEM>; } } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), - "push{q}\t$imm", []>; + "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), - "push{q}\t$imm", []>; + "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), - "push{q}\t$imm", []>; + "push{q}\t$imm", [], IIC_PUSH_IMM>; } let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in -def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, Requires<[In64BitMode]>; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in -def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, Requires<[In64BitMode]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], mayLoad=1, neverHasSideEffects=1 in { -def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], mayStore=1, neverHasSideEffects=1 in { -def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } @@ -794,84 +857,92 @@ let Constraints = "$src = $dst" in { // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", - [(set GR32:$dst, (bswap GR32:$src))]>, TB; + [(set GR32:$dst, (bswap GR32:$src))], IIC_BSWAP>, TB; def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", - [(set GR64:$dst, (bswap GR64:$src))]>, TB; + [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; } // Constraints = "$src = $dst" // Bit scan instructions. let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize; + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], + IIC_BSF>, TB, OpSize; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB, - OpSize; + [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], + IIC_BSF>, TB, OpSize; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], + IIC_BSF>, TB; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], + IIC_BSF>, TB; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], + IIC_BSF>, TB; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize; + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>, + TB, OpSize; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB, + [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], + IIC_BSR>, TB, OpSize; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], + IIC_BSR>, TB; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], + IIC_BSR>, TB; } // Defs = [EFLAGS] // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { -def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", []>; -def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", []>, OpSize; -def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", []>; -def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; +def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>; +def MOVSW : I<0xA5, RawFrm, (outs), (ins), "movsw", [], IIC_MOVS>, OpSize; +def MOVSD : I<0xA5, RawFrm, (outs), (ins), "movs{l|d}", [], IIC_MOVS>; +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", [], IIC_MOVS>; } // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in -def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", []>; +def STOSB : I<0xAA, RawFrm, (outs), (ins), "stosb", [], IIC_STOS>; let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in -def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", []>, OpSize; +def STOSW : I<0xAB, RawFrm, (outs), (ins), "stosw", [], IIC_STOS>, OpSize; let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in -def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", []>; +def STOSD : I<0xAB, RawFrm, (outs), (ins), "stos{l|d}", [], IIC_STOS>; let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in -def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", [], IIC_STOS>; -def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", []>; -def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", []>, OpSize; -def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", []>; -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; +def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scasb", [], IIC_SCAS>; +def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scasw", [], IIC_SCAS>, OpSize; +def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l|d}", [], IIC_SCAS>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", [], IIC_SCAS>; -def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", []>; -def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", []>, OpSize; -def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", []>; -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; +def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>; +def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize; +def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>; +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; //===----------------------------------------------------------------------===// @@ -880,64 +951,64 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), - "mov{b}\t{$src, $dst|$dst, $src}", []>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize; def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(set GR8:$dst, imm:$src)]>; + [(set GR8:$dst, imm:$src)], IIC_MOV>; def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, imm:$src)]>, OpSize; + [(set GR16:$dst, imm:$src)], IIC_MOV>, OpSize; def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, imm:$src)]>; + [(set GR32:$dst, imm:$src)], IIC_MOV>; def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), "movabs{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, imm:$src)]>; + [(set GR64:$dst, imm:$src)], IIC_MOV>; def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, i64immSExt32:$src)]>; + [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(store (i8 imm:$src), addr:$dst)]>; + [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(store (i16 imm:$src), addr:$dst)]>, OpSize; + [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize; def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(store (i32 imm:$src), addr:$dst)]>; + [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>; def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(store i64immSExt32:$src, addr:$dst)]>; + [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), - "mov{b}\t{$src, %al|AL, $src}", []>, + "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), - "mov{w}\t{$src, %ax|AL, $src}", []>, OpSize, + "mov{w}\t{$src, %ax|AL, $src}", [], IIC_MOV_MEM>, OpSize, Requires<[In32BitMode]>; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), - "mov{l}\t{$src, %eax|EAX, $src}", []>, + "mov{l}\t{$src, %eax|EAX, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), - "mov{b}\t{%al, $dst|$dst, AL}", []>, + "mov{b}\t{%al, $dst|$dst, AL}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), - "mov{w}\t{%ax, $dst|$dst, AL}", []>, OpSize, + "mov{w}\t{%ax, $dst|$dst, AL}", [], IIC_MOV_MEM>, OpSize, Requires<[In32BitMode]>; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), - "mov{l}\t{%eax, $dst|$dst, EAX}", []>, + "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; // FIXME: These definitions are utterly broken @@ -958,42 +1029,42 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), - "mov{b}\t{$src, $dst|$dst, $src}", []>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(set GR8:$dst, (loadi8 addr:$src))]>; + [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize; + [(set GR16:$dst, (loadi16 addr:$src))], IIC_MOV_MEM>, OpSize; def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (loadi32 addr:$src))]>; + [(set GR32:$dst, (loadi32 addr:$src))], IIC_MOV_MEM>; def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (load addr:$src))]>; + [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(store GR8:$src, addr:$dst)]>; + [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(store GR16:$src, addr:$dst)]>, OpSize; + [(store GR16:$src, addr:$dst)], IIC_MOV_MEM>, OpSize; def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(store GR32:$src, addr:$dst)]>; + [(store GR32:$src, addr:$dst)], IIC_MOV_MEM>; def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", - [(store GR64:$src, addr:$dst)]>; + [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1002,24 +1073,28 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], + IIC_MOV_MEM>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], + IIC_MOV_MEM>; } // Condition code ops, incl. set if equal/not equal/... -let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in -def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>; // flags = AH +let Defs = [EFLAGS], Uses = [AH] in +def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", + [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in -def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags +def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], + IIC_AHF>; // AH = flags //===----------------------------------------------------------------------===// @@ -1028,13 +1103,14 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Defs = [EFLAGS] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, + OpSize, TB; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, TB; def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -1045,31 +1121,33 @@ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), // (implicit EFLAGS)] - [] + [], IIC_BT_MR >, OpSize, TB, Requires<[FastBTMem]>; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi32 addr:$src1), GR32:$src2), // (implicit EFLAGS)] - [] + [], IIC_BT_MR >, TB, Requires<[FastBTMem]>; def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi64 addr:$src1), GR64:$src2), // (implicit EFLAGS)] - [] + [], IIC_BT_MR >, TB; def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, - OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], + IIC_BT_RI>, OpSize, TB; def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))], + IIC_BT_RI>, TB; def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], + IIC_BT_RI>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's @@ -1077,91 +1155,103 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) - ]>, OpSize, TB; + ], IIC_BT_MI>, OpSize, TB; def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) - ]>, TB; + ], IIC_BT_MI>, TB; def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), - i64immSExt8:$src2))]>, TB; + i64immSExt8:$src2))], IIC_BT_MI>, TB; def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), - "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, + OpSize, TB; def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), - "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, + OpSize, TB; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), - "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, + OpSize, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), - "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), - "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, + OpSize, TB; def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), - "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), - "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, + OpSize, TB; def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), - "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, + OpSize, TB; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), - "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, + OpSize, TB; def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), - "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), - "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, + OpSize, TB; def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2), - "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), - "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, + OpSize, TB; def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), - "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), - "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, + OpSize, TB; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), - "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), - "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, + OpSize, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), - "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), - "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; + "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, + OpSize, TB; def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2), - "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } // Defs = [EFLAGS] @@ -1175,89 +1265,106 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), let Constraints = "$val = $dst" in { def XCHG8rm : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "xchg{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>; + [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))], + IIC_XCHG_MEM>; def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr), "xchg{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>, + [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))], + IIC_XCHG_MEM>, OpSize; def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr), "xchg{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>; + [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))], + IIC_XCHG_MEM>; def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr), "xchg{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; + [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))], + IIC_XCHG_MEM>; def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), - "xchg{b}\t{$val, $src|$src, $val}", []>; + "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src), - "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize; + "xchg{w}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>, OpSize; def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src), - "xchg{l}\t{$val, $src|$src, $val}", []>; + "xchg{l}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), - "xchg{q}\t{$val, $src|$src, $val}", []>; + "xchg{q}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; } def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src), - "xchg{w}\t{$src, %ax|AX, $src}", []>, OpSize; + "xchg{w}\t{$src, %ax|AX, $src}", [], IIC_XCHG_REG>, OpSize; def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src), - "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In32BitMode]>; + "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>, + Requires<[In32BitMode]>; // Uses GR32_NOAX in 64-bit mode to prevent encoding using the 0x90 NOP encoding. // xchg %eax, %eax needs to clear upper 32-bits of RAX so is not a NOP. def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), - "xchg{l}\t{$src, %eax|EAX, $src}", []>, Requires<[In64BitMode]>; + "xchg{l}\t{$src, %eax|EAX, $src}", [], IIC_XCHG_REG>, + Requires<[In64BitMode]>; def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), - "xchg{q}\t{$src, %rax|RAX, $src}", []>; + "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>; def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), - "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB, + OpSize; def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "xadd{w}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB, + OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; + "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; } def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), - "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_REG8>, TB; def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), - "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_REG>, TB, OpSize; def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), - "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_REG>, TB; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_REG>, TB; let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), - "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_MEM8>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "cmpxchg{w}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_MEM>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{l}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_MEM>, TB; def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; + "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], + IIC_CMPXCHG_MEM>, TB; } let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), - "cmpxchg8b\t$dst", []>, TB; + "cmpxchg8b\t$dst", [], IIC_CMPXCHG_8B>, TB; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), - "cmpxchg16b\t$dst", []>, TB, Requires<[HasCmpxchg16b]>; + "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>, + TB, Requires<[HasCmpxchg16b]>; @@ -1281,69 +1388,75 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; // String manipulation instructions -def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; -def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; -def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>; -def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; +def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>; +def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize; +def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>; +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>; -def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>; -def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize; -def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>; +def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>; +def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize; +def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>; // Flag instructions -def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>; -def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>; -def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>; -def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>; -def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>; -def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>; -def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>; +def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>; +def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>; +def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; +def STI : I<0xFB, RawFrm, (outs), (ins), "sti", [], IIC_STI>; +def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", [], IIC_CLD>; +def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; +def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>; -def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB; +def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; // Table lookup instructions -def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>; +def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS -def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>; +def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, + Requires<[In32BitMode]>; // ASCII Adjust AX Before Division // sets AL, AH and EFLAGS and uses AL and AH def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src), - "aad\t$src", []>, Requires<[In32BitMode]>; + "aad\t$src", [], IIC_AAD>, Requires<[In32BitMode]>; // ASCII Adjust AX After Multiply // sets AL, AH and EFLAGS and uses AL def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src), - "aam\t$src", []>, Requires<[In32BitMode]>; + "aam\t$src", [], IIC_AAM>, Requires<[In32BitMode]>; // ASCII Adjust AL After Subtraction - sets // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS -def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>; +def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>, + Requires<[In32BitMode]>; // Decimal Adjust AL after Addition // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS -def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>; +def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, + Requires<[In32BitMode]>; // Decimal Adjust AL after Subtraction // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS -def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>; +def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, + Requires<[In32BitMode]>; // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "bound\t{$src, $dst|$dst, $src}", []>, OpSize, + "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, Requires<[In32BitMode]>; def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "bound\t{$src, $dst|$dst, $src}", []>, + "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, Requires<[In32BitMode]>; // Adjust RPL Field of Segment Selector def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst), - "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; + "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, + Requires<[In32BitMode]>; def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), - "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>; + "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, + Requires<[In32BitMode]>; //===----------------------------------------------------------------------===// // MOVBE Instructions @@ -1351,22 +1464,28 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), let Predicates = [HasMOVBE] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8; + [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, + OpSize, T8; def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movbe{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8; + [(set GR32:$dst, (bswap (loadi32 addr:$src)))], IIC_MOVBE>, + T8; def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movbe{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8; + [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, + T8; def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", - [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8; + [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, + OpSize, T8; def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movbe{l}\t{$src, $dst|$dst, $src}", - [(store (bswap GR32:$src), addr:$dst)]>, T8; + [(store (bswap GR32:$src), addr:$dst)], IIC_MOVBE>, + T8; def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "movbe{q}\t{$src, $dst|$dst, $src}", - [(store (bswap GR64:$src), addr:$dst)]>, T8; + [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, + T8; } //===----------------------------------------------------------------------===// @@ -1374,11 +1493,14 @@ let Predicates = [HasMOVBE] in { // let Predicates = [HasRDRAND], Defs = [EFLAGS] in { def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), - "rdrand{w}\t$dst", []>, OpSize, TB; + "rdrand{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize, TB; def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), - "rdrand{l}\t$dst", []>, TB; + "rdrand{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86rdrand))]>, TB; def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), - "rdrand{q}\t$dst", []>, TB; + "rdrand{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86rdrand))]>, TB; } //===----------------------------------------------------------------------===// @@ -1774,9 +1896,9 @@ def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>; // We accept "fnstsw %eax" even though it only writes %ax. -def : InstAlias<"fnstsw %eax", (FNSTSW8r)>; -def : InstAlias<"fnstsw %al" , (FNSTSW8r)>; -def : InstAlias<"fnstsw" , (FNSTSW8r)>; +def : InstAlias<"fnstsw %eax", (FNSTSW16r)>; +def : InstAlias<"fnstsw %al" , (FNSTSW16r)>; +def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but // this is compatible with what GAS does. diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 63f96b6..c8f40bb 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -20,71 +20,130 @@ // MMX Multiclasses //===----------------------------------------------------------------------===// +def MMX_INTALU_ITINS : OpndItins< + IIC_MMX_ALU_RR, IIC_MMX_ALU_RM +>; + +def MMX_INTALUQ_ITINS : OpndItins< + IIC_MMX_ALUQ_RR, IIC_MMX_ALUQ_RM +>; + +def MMX_PHADDSUBW : OpndItins< + IIC_MMX_PHADDSUBW_RR, IIC_MMX_PHADDSUBW_RM +>; + +def MMX_PHADDSUBD : OpndItins< + IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM +>; + +def MMX_PMUL_ITINS : OpndItins< + IIC_MMX_PMUL, IIC_MMX_PMUL +>; + +def MMX_PSADBW_ITINS : OpndItins< + IIC_MMX_PSADBW, IIC_MMX_PSADBW +>; + +def MMX_MISC_FUNC_ITINS : OpndItins< + IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG +>; + +def MMX_SHIFT_ITINS : ShiftOpndItins< + IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI +>; + +def MMX_UNPCK_H_ITINS : OpndItins< + IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM +>; + +def MMX_UNPCK_L_ITINS : OpndItins< + IIC_MMX_UNPCK_L, IIC_MMX_UNPCK_L +>; + +def MMX_PCK_ITINS : OpndItins< + IIC_MMX_PCK_RR, IIC_MMX_PCK_RM +>; + +def MMX_PSHUF_ITINS : OpndItins< + IIC_MMX_PSHUF, IIC_MMX_PSHUF +>; + +def MMX_CVT_PD_ITINS : OpndItins< + IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM +>; + +def MMX_CVT_PS_ITINS : OpndItins< + IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM +>; + let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - bit Commutable = 0> { + OpndItins itins, bit Commutable = 0> { def irr : MMXI { + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> { let isCommutable = Commutable; } def irm : MMXI; + (bitconvert (load_mmx addr:$src2))))], + itins.rm>; } multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2> { + Intrinsic IntId2, ShiftOpndItins itins> { def rr : MMXI; + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>; def rm : MMXI; + (bitconvert (load_mmx addr:$src2))))], + itins.rm>; def ri : MMXIi8; + [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>; } } /// Unary MMX instructions requiring SSSE3. multiclass SS3I_unop_rm_int_mm opc, string OpcodeStr, - Intrinsic IntId64> { + Intrinsic IntId64, OpndItins itins> { def rr64 : SS38I; + [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; def rm64 : SS38I; + (IntId64 (bitconvert (memopmmx addr:$src))))], + itins.rm>; } /// Binary MMX instructions requiring SSSE3. let ImmT = NoImm, Constraints = "$src1 = $dst" in { multiclass SS3I_binop_rm_int_mm opc, string OpcodeStr, - Intrinsic IntId64> { + Intrinsic IntId64, OpndItins itins> { let isCommutable = 0 in def rr64 : SS38I; + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; def rm64 : SS38I; + (bitconvert (memopmmx addr:$src2))))], itins.rm>; } } @@ -103,13 +162,13 @@ multiclass ssse3_palign_mm { multiclass sse12_cvt_pint opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d> { + string asm, OpndItins itins, Domain d> { def irr : PI; + itins.rr, d>; def irm : PI; + itins.rm, d>; } multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, @@ -139,22 +198,24 @@ def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (x86mmx (scalar_to_vector GR32:$src)))]>; + (x86mmx (scalar_to_vector GR32:$src)))], + IIC_MMX_MOV_MM_RM>; let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>; + [(set VR64:$dst, + (x86mmx (scalar_to_vector (loadi32 addr:$src))))], + IIC_MMX_MOV_MM_RM>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", []>; + "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_REG_MM>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", - []>; + [], IIC_MMX_MOV_MM_RM>; // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as @@ -163,197 +224,276 @@ def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR64:$dst, - (bitconvert VR64:$src))]>; + (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>; def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (bitconvert GR64:$src))]>; + (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), - "movq\t{$src, $dst|$dst, $src}", []>; + "movq\t{$src, $dst|$dst, $src}", [], + IIC_MMX_MOVQ_RR>; let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (load_mmx addr:$src))]>; + [(set VR64:$dst, (load_mmx addr:$src))], + IIC_MMX_MOVQ_RM>; def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(store (x86mmx VR64:$src), addr:$dst)]>; + [(store (x86mmx VR64:$src), addr:$dst)], + IIC_MMX_MOVQ_RM>; def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (bitconvert (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))))))]>; + (iPTR 0))))))], + IIC_MMX_MOVQ_RR>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), +def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector - (i64 (bitconvert (x86mmx VR64:$src))))))]>; + (i64 (bitconvert (x86mmx VR64:$src))))))], + IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), - (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), + (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], + IIC_MMX_MOVQ_RR>; def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), - (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>; + (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], + IIC_MMX_MOVQ_RR>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", - [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>; + [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)], + IIC_MMX_MOVQ_RM>; let AddedComplexity = 15 in // movd to MMX register zero-extends def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, - (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>; + (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))], + IIC_MMX_MOV_MM_RM>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (X86vzmovl (x86mmx - (scalar_to_vector (loadi32 addr:$src))))))]>; + (scalar_to_vector (loadi32 addr:$src))))))], + IIC_MMX_MOV_MM_RM>; // Arithmetic Instructions -defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>; -defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>; -defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>; +defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b, + MMX_INTALU_ITINS>; +defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w, + MMX_INTALU_ITINS>; +defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d, + MMX_INTALU_ITINS>; // -- Addition -defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>; -defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>; -defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>; -defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>; -defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>; -defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>; - -defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>; -defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>; - -defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>; -defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>; -defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>; +defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, + MMX_INTALU_ITINS, 1>; +defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, + MMX_INTALU_ITINS, 1>; +defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, + MMX_INTALU_ITINS, 1>; +defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, + MMX_INTALUQ_ITINS, 1>; +defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, + MMX_INTALU_ITINS, 1>; +defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, + MMX_INTALU_ITINS, 1>; + +defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, + MMX_INTALU_ITINS, 1>; +defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, + MMX_INTALU_ITINS, 1>; + +defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w, + MMX_PHADDSUBW>; +defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d, + MMX_PHADDSUBD>; +defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw, + MMX_PHADDSUBW>; // -- Subtraction -defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>; -defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>; -defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>; -defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>; - -defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>; -defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>; - -defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>; -defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>; - -defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>; -defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>; -defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>; +defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b, + MMX_INTALU_ITINS>; +defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w, + MMX_INTALU_ITINS, 1>; +defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d, + MMX_INTALU_ITINS, 1>; +defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q, + MMX_INTALUQ_ITINS, 1>; + +defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b, + MMX_INTALU_ITINS, 1>; +defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w, + MMX_INTALU_ITINS, 1>; + +defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b, + MMX_INTALU_ITINS, 1>; +defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w, + MMX_INTALU_ITINS, 1>; + +defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w, + MMX_PHADDSUBW>; +defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d, + MMX_PHADDSUBD>; +defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw, + MMX_PHADDSUBW>; // -- Multiplication -defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>; - -defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, 1>; -defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>; -defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>; +defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, + MMX_PMUL_ITINS, 1>; + +defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w, + MMX_PMUL_ITINS, 1>; +defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, + MMX_PMUL_ITINS, 1>; +defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, + MMX_PMUL_ITINS, 1>; let isCommutable = 1 in defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", - int_x86_ssse3_pmul_hr_sw>; + int_x86_ssse3_pmul_hr_sw, MMX_PMUL_ITINS>; // -- Miscellanea -defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>; +defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, + MMX_PMUL_ITINS, 1>; defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw>; -defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>; -defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>; - -defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>; -defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>; - -defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>; -defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>; - -defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>; - -defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>; -defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>; -defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>; + int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>; +defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, + MMX_MISC_FUNC_ITINS, 1>; +defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, + MMX_MISC_FUNC_ITINS, 1>; + +defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, + MMX_MISC_FUNC_ITINS, 1>; +defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, + MMX_MISC_FUNC_ITINS, 1>; + +defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, + MMX_MISC_FUNC_ITINS, 1>; +defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, + MMX_MISC_FUNC_ITINS, 1>; + +defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, + MMX_PSADBW_ITINS, 1>; + +defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b, + MMX_MISC_FUNC_ITINS>; +defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w, + MMX_MISC_FUNC_ITINS>; +defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d, + MMX_MISC_FUNC_ITINS>; let Constraints = "$src1 = $dst" in defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>; // Logical Instructions -defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>; -defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, 1>; -defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>; -defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn>; +defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, + MMX_INTALU_ITINS, 1>; +defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por, + MMX_INTALU_ITINS, 1>; +defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, + MMX_INTALU_ITINS, 1>; +defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, + MMX_INTALU_ITINS>; // Shift Instructions defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>; + int_x86_mmx_psrl_w, int_x86_mmx_psrli_w, + MMX_SHIFT_ITINS>; defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>; + int_x86_mmx_psrl_d, int_x86_mmx_psrli_d, + MMX_SHIFT_ITINS>; defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>; + int_x86_mmx_psrl_q, int_x86_mmx_psrli_q, + MMX_SHIFT_ITINS>; defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_mmx_psll_w, int_x86_mmx_pslli_w>; + int_x86_mmx_psll_w, int_x86_mmx_pslli_w, + MMX_SHIFT_ITINS>; defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_mmx_psll_d, int_x86_mmx_pslli_d>; + int_x86_mmx_psll_d, int_x86_mmx_pslli_d, + MMX_SHIFT_ITINS>; defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_mmx_psll_q, int_x86_mmx_pslli_q>; + int_x86_mmx_psll_q, int_x86_mmx_pslli_q, + MMX_SHIFT_ITINS>; defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_mmx_psra_w, int_x86_mmx_psrai_w>; + int_x86_mmx_psra_w, int_x86_mmx_psrai_w, + MMX_SHIFT_ITINS>; defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_mmx_psra_d, int_x86_mmx_psrai_d>; + int_x86_mmx_psra_d, int_x86_mmx_psrai_d, + MMX_SHIFT_ITINS>; // Comparison Instructions -defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>; -defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>; -defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>; - -defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>; -defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>; -defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; +defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b, + MMX_INTALU_ITINS>; +defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w, + MMX_INTALU_ITINS>; +defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d, + MMX_INTALU_ITINS>; + +defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b, + MMX_INTALU_ITINS>; +defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w, + MMX_INTALU_ITINS>; +defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d, + MMX_INTALU_ITINS>; // -- Unpack Instructions defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", - int_x86_mmx_punpckhbw>; + int_x86_mmx_punpckhbw, + MMX_UNPCK_H_ITINS>; defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", - int_x86_mmx_punpckhwd>; + int_x86_mmx_punpckhwd, + MMX_UNPCK_H_ITINS>; defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", - int_x86_mmx_punpckhdq>; + int_x86_mmx_punpckhdq, + MMX_UNPCK_H_ITINS>; defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", - int_x86_mmx_punpcklbw>; + int_x86_mmx_punpcklbw, + MMX_UNPCK_L_ITINS>; defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", - int_x86_mmx_punpcklwd>; + int_x86_mmx_punpcklwd, + MMX_UNPCK_L_ITINS>; defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq", - int_x86_mmx_punpckldq>; + int_x86_mmx_punpckldq, + MMX_UNPCK_L_ITINS>; // -- Pack Instructions -defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>; -defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>; -defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>; +defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb, + MMX_PCK_ITINS>; +defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw, + MMX_PCK_ITINS>; +defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb, + MMX_PCK_ITINS>; // -- Shuffle Instructions -defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>; +defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b, + MMX_PSHUF_ITINS>; def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, - (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>; + (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))], + IIC_MMX_PSHUF>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), - imm:$src2))]>; - + imm:$src2))], + IIC_MMX_PSHUF>; @@ -361,24 +501,24 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, // -- Conversion Instructions defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; + MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", - SSEPackedSingle>, TB; + MMX_CVT_PS_ITINS, SSEPackedSingle>, TB; defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", - SSEPackedDouble>, TB, OpSize; + MMX_CVT_PD_ITINS, SSEPackedDouble>, TB, OpSize; let Constraints = "$src1 = $dst" in { defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, int_x86_sse_cvtpi2ps, i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - SSEPackedSingle>, TB; + SSEPackedSingle>, TB; } // Extract / Insert @@ -386,14 +526,16 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, - (iPTR imm:$src2)))]>; + (iPTR imm:$src2)))], + IIC_MMX_PEXTR>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, - GR32:$src2, (iPTR imm:$src3)))]>; + GR32:$src2, (iPTR imm:$src3)))], + IIC_MMX_PINSRW>; def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), @@ -401,7 +543,8 @@ let Constraints = "$src1 = $dst" in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), - (iPTR imm:$src3)))]>; + (iPTR imm:$src3)))], + IIC_MMX_PINSRW>; } // Mask creation @@ -411,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), (int_x86_mmx_pmovmskb VR64:$src))]>; -// MMX to XMM for vector types -def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; - -def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), - (v2i64 (MOVQI2PQIrm addr:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq - (x86mmx (scalar_to_vector (loadi32 addr:$src))))), - (v2i64 (MOVDI2PDIrm addr:$src))>; - // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; @@ -439,11 +568,13 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), let Uses = [EDI] in def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", - [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>; + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)], + IIC_MMX_MASKMOV>; let Uses = [RDI] in def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", - [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; + [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)], + IIC_MMX_MASKMOV>; // 64-bit bit convert. def : Pat<(x86mmx (bitconvert (i64 GR64:$src))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 65e3c1e..e4c35b9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, // A vector extract of the first f32/f64 position is a subregister copy def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. @@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; def : Pat<(v8f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; def : Pat<(v4f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; // Bitcasts between 128-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -562,59 +562,57 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (VMOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4f32 (V_SET0)), - (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; } let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), @@ -628,70 +626,68 @@ let Predicates = [HasAVX] in { sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2f64 (V_SET0)), - (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2i64 (V_SET0)), - (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; // 256-bit variants def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), + sub_xmm)>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // 256-bit variants def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), + sub_xmm)>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem @@ -699,17 +695,13 @@ let Predicates = [HasAVX] in { // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } let Predicates = [HasSSE1] in { @@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; } let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSSrm already zeros the high parts of the register. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; // Shuffle with MOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } let Predicates = [HasSSE2] in { @@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in { } let AddedComplexity = 20 in { - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSDrm already zeros the high parts of the register. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } //===----------------------------------------------------------------------===// @@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, } multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d, OpndItins itins> { - def rr : PI; - def rm : PI; + X86MemOperand x86memop, string asm, Domain d, + OpndItins itins> { +let neverHasSideEffects = 1 in { + def rr : I; + let mayLoad = 1 in + def rm : I; +} } multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}", + "cvttss2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, @@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}", + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; @@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, VEX_4V, VEX_W, VEX_LIG; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, VEX_4V, VEX_LIG; -defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, - XD, VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; + let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, // and/or XMM operand(s). multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int, Operand memop, ComplexPattern mem_cpat, string asm, OpndItins itins> { def rr : SI; - def rm : SI; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; } multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, @@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, itins.rm>; } -defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; +defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, + int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}", + SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si", - SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}", + SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; + sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; @@ -1587,94 +1574,71 @@ let Constraints = "$src1 = $dst" in { "cvtsi2sd", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W; + "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; } /// SSE 1 Only // Aliases for intrinsics defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si", SSE_CVT_SS2SI_64>, - XS, VEX, VEX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX, VEX_W; + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, + XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, - XS, REX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si{q}", SSE_CVT_SD2SI>, - XD, REX_W; - -let Pattern = [] in { -defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; -defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, - "cvtss2si\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX; -} - -let Pattern = [] in { -defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS; -defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, REX_W; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + +defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; +defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; + +defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS; +defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, REX_W; + +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; + +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - TB; /* PD SSE3 form is avaiable */ -} - -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (VCVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (VCVTSS2SI64rm addr:$src)>; -} - -let Predicates = [HasSSE1] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (CVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (CVTSS2SI64rm addr:$src)>; -} + TB, Requires<[HasSSE2]>; /// SSE 2 Only // Convert scalar double to scalar single +let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -1685,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; +} def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; @@ -1700,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), XD, Requires<[HasSSE2, OptForSize]>; -defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar, 0>, - XS, VEX_4V; -let Constraints = "$src1 = $dst" in -defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar>, XS; +def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; +def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + +let Constraints = "$src1 = $dst" in { +def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; +def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix +let neverHasSideEffects = 1 in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1722,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let AddedComplexity = 1 in { // give AVX priority def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>; + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; -} + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; +} // AddedComplexity = 1 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1760,190 +1747,146 @@ def : Pat<(extloadf32 addr:$src), def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2]>; -} - -// Convert doubleword to packed single/double fp -// SSE2 instructions without OpSize prefix -def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vcvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))], - IIC_SSE_CVT_PS_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PS_RM>, - TB, Requires<[HasSSE2]>; - -// FIXME: why the non-intrinsic version is described as SSE3? -// SSE2 instructions with XS prefix -def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, VEX, Requires<[HasAVX]>; -def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, VEX, Requires<[HasAVX]>; -def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XS, Requires<[HasSSE2]>; -def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))], - IIC_SSE_CVT_PD_RM>, - XS, Requires<[HasSSE2]>; - + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; +} // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 VR256:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; -def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, - VEX; -def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>; - -// SSE2 packed instructions with XD prefix -def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, VEX, Requires<[HasAVX]>; -def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + +// Convert Packed Double FP to Packed DW Integers +let Predicates = [HasAVX] in { +// The assembler can recognize rr 256-bit instructions by seeing a ymm +// register, but the same isn't true when using memory operands instead. +// Provide other assembly rr and rm forms to address this explicitly. +def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, VEX, Requires<[HasAVX]>; -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, - XD, Requires<[HasSSE2]>; + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + VEX; +// XMM only +def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTPD2DQrr VR128:$dst, VR128:$src)>; +def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + +// YMM only +def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX; +def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, + VEX, VEX_L; +def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", + (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; +} + +def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], + IIC_SSE_CVT_PD_RM>; +def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix -def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), +def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 - (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; - -def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memop addr:$src)))], - IIC_SSE_CVT_PS_RM>; + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; +def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 + (memopv8f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; + +def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>; +def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_VCVTDQ2PSrr VR128:$src)>; + (VCVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_VCVTDQ2PSrm addr:$src)>; + (VCVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (VCVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (VCVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (VCVTTPS2DQrr VR128:$src)>; @@ -1963,9 +1906,14 @@ let Predicates = [HasAVX] in { let Predicates = [HasSSE2] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (Int_CVTDQ2PSrr VR128:$src)>; + (CVTDQ2PSrr VR128:$src)>; def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (Int_CVTDQ2PSrm addr:$src)>; + (CVTDQ2PSrm addr:$src)>; + + def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), + (CVTDQ2PSrr VR128:$src)>; + def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), + (CVTDQ2PSrm addr:$src)>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), (CVTTPS2DQrr VR128:$src)>; @@ -1978,183 +1926,186 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -let isCodeGenOnly = 1 in -def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; -def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. -def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttpd2dqx\t{$src, $dst|$dst, $src}", [], + "cvttpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dqy\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", + (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; + +let Predicates = [HasAVX] in { + def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), + (VCVTTPD2DQYrr VR256:$src)>; + def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), + (VCVTTPD2DQYrm addr:$src)>; +} // Predicates = [HasAVX] + +def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>; +def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], + IIC_SSE_CVT_PD_RM>; // Convert packed single to packed double let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; +let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], IIC_SSE_CVT_PD_RM>, TB, VEX; } + +let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; +let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; +} -def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, VEX, Requires<[HasAVX]>; -def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, VEX, Requires<[HasAVX]>; -def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, - TB, Requires<[HasSSE2]>; -def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2pd - (load addr:$src)))], - IIC_SSE_CVT_PD_RM>, - TB, Requires<[HasSSE2]>; +// Convert Packed DW Integers to Packed Double FP +let Predicates = [HasAVX] in { +let neverHasSideEffects = 1, mayLoad = 1 in +def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + []>, VEX; +def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; +def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 + (bitconvert (memopv2i64 addr:$src))))]>, VEX; +def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX; +} + +let neverHasSideEffects = 1, mayLoad = 1 in +def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", [], + IIC_SSE_CVT_PD_RR>; +def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], + IIC_SSE_CVT_PD_RM>; + +// AVX 256-bit register conversion intrinsics +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), + (VCVTDQ2PDYrr VR128:$src)>; + def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), + (VCVTDQ2PDYrm addr:$src)>; +} // Predicates = [HasAVX] // Convert packed double to packed single // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; // XMM only -def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>, VEX; +def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", + (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], + "cvtpd2psx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2psy\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; +def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", + (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; + def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; -def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; -def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2ps - (memop addr:$src)))], - IIC_SSE_CVT_PD_RM>; - // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. -def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), - (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), - (VCVTDQ2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - -// Match fround and fextend for 128/256-bit conversions -def : Pat<(v4f32 (fround (v4f64 VR256:$src))), - (VCVTPD2PSYrr VR256:$src)>; -def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), - (VCVTPD2PSYrm addr:$src)>; - -def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), - (VCVTPS2PDYrr VR128:$src)>; -def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), - (VCVTPS2PDYrm addr:$src)>; +let Predicates = [HasAVX] in { + def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), + (VCVTDQ2PSYrr VR256:$src)>; + def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), + (VCVTDQ2PSYrm addr:$src)>; + + // Match fround and fextend for 128/256-bit conversions + def : Pat<(v4f32 (fround (v4f64 VR256:$src))), + (VCVTPD2PSYrr VR256:$src)>; + def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), + (VCVTPD2PSYrm addr:$src)>; + + def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), + (VCVTPS2PDYrr VR128:$src)>; + def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), + (VCVTPS2PDYrm addr:$src)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions @@ -2587,17 +2538,13 @@ let Predicates = [HasAVX] in { OpSize, VEX; def : Pat<(i32 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>; // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), @@ -2622,17 +2569,17 @@ defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -3230,34 +3177,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))), let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (VSQRTSDr (f64 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), - sub_sd)>; + (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR64)), + VR128)>; def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse_rcp_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRCPSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } @@ -3336,13 +3279,6 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions IIC_SSE_MOVNT>, VEX; } -def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), - (VMOVNTDQYmr addr:$dst, VR256:$src)>; -def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src), - (VMOVNTPDYmr addr:$dst, VR256:$src)>; -def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src), - (VMOVNTPSYmr addr:$dst, VR256:$src)>; - let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", @@ -4610,7 +4546,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // Bitcast FR64 <-> GR64 // let Predicates = [HasAVX] in -def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, VEX; @@ -4623,7 +4559,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, VEX; -def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>; @@ -4897,80 +4833,6 @@ def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; //===---------------------------------------------------------------------===// -// SSE3 - Conversion Instructions -//===---------------------------------------------------------------------===// - -// Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX] in { -// The assembler can recognize rr 256-bit instructions by seeing a ymm -// register, but the same isn't true when using memory operands instead. -// Provide other assembly rr and rm forms to address this explicitly. -def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; - -// XMM only -def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; - -// YMM only -def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} - -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; - -def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), - (VCVTTPD2DQYrr VR256:$src)>; -def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), - (VCVTTPD2DQYrm addr:$src)>; - -// Convert Packed DW Integers to Packed Double FP -let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -} - -def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RM>; - -// AVX 256-bit register conversion intrinsics -def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - -def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - -def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (VCVTDQ2PDYrm addr:$src)>; - -//===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass sse3_replicate_sfp op, SDNode OpNode, string OpcodeStr, @@ -5580,16 +5442,14 @@ let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, Requires<[HasSSE3]>; -def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), - [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, - Requires<[HasSSE3]>; } let Uses = [EAX, ECX, EDX] in def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, TB, Requires<[HasSSE3]>; let Uses = [ECX, EAX] in -def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>, +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; @@ -5730,14 +5590,26 @@ let Predicates = [HasSSE41] in { (PMOVZXDQrm addr:$src)>; } +let Predicates = [HasAVX2] in { + let AddedComplexity = 15 in { + def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))), + (VPMOVZXDQYrr VR128:$src)>; + def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))), + (VPMOVZXWDYrr VR128:$src)>; + } + + def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; + def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; +} + let Predicates = [HasAVX] in { -def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; -def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; + def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; } let Predicates = [HasSSE41] in { -def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; -def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; + def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; + def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; } @@ -6608,15 +6480,15 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, memopv4f32, i128mem, 0>, VEX_4V; + VR128, memopv4f32, f128mem, 0>, VEX_4V; defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V; + int_x86_avx_blend_ps_256, VR256, memopv8f32, f256mem, 0>, VEX_4V; } let ExeDomain = SSEPackedDouble in { defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, memopv2f64, i128mem, 0>, VEX_4V; + VR128, memopv2f64, f128mem, 0>, VEX_4V; defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V; + int_x86_avx_blend_pd_256, VR256, memopv4f64, f256mem, 0>, VEX_4V; } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, VR128, memopv2i64, i128mem, 0>, VEX_4V; @@ -6625,10 +6497,10 @@ let Predicates = [HasAVX] in { } let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - VR128, memopv4f32, i128mem, 0>, VEX_4V; + VR128, memopv4f32, f128mem, 0>, VEX_4V; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - VR128, memopv2f64, i128mem, 0>, VEX_4V; + VR128, memopv2f64, f128mem, 0>, VEX_4V; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V; @@ -6647,10 +6519,10 @@ let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv4f32, i128mem>; + VR128, memopv4f32, f128mem>; let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv2f64, i128mem>; + VR128, memopv2f64, f128mem>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, VR128, memopv2i64, i128mem>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, @@ -6658,10 +6530,10 @@ let Constraints = "$src1 = $dst" in { } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, - VR128, memopv4f32, i128mem>; + VR128, memopv4f32, f128mem>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, - VR128, memopv2f64, i128mem>; + VR128, memopv2f64, f128mem>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators @@ -6687,15 +6559,15 @@ multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, let Predicates = [HasAVX] in { let ExeDomain = SSEPackedDouble in { -defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, memopv2f64, int_x86_sse41_blendvpd>; -defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, +defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, memopv4f64, int_x86_avx_blendv_pd_256>; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, memopv4f32, int_x86_sse41_blendvps>; -defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, +defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, memopv8f32, int_x86_avx_blendv_ps_256>; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, @@ -6766,7 +6638,7 @@ let Predicates = [HasAVX2] in { /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic IntId> { + X86MemOperand x86memop, Intrinsic IntId> { def rr0 : SS48I; let ExeDomain = SSEPackedSingle in -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, int_x86_sse41_pblendvb>; +// Aliases with the implicit xmm0 argument +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", + (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; + let Predicates = [HasSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), (v16i8 VR128:$src2))), @@ -6955,81 +6841,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri { +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpistri { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPISTRI : SS42AI_pcmpistri, - VEX; -defm VPCMPISTRIA : SS42AI_pcmpistri, - VEX; -defm VPCMPISTRIC : SS42AI_pcmpistri, - VEX; -defm VPCMPISTRIO : SS42AI_pcmpistri, - VEX; -defm VPCMPISTRIS : SS42AI_pcmpistri, - VEX; -defm VPCMPISTRIZ : SS42AI_pcmpistri, - VEX; -} - -defm PCMPISTRI : SS42AI_pcmpistri; -defm PCMPISTRIA : SS42AI_pcmpistri; -defm PCMPISTRIC : SS42AI_pcmpistri; -defm PCMPISTRIO : SS42AI_pcmpistri; -defm PCMPISTRIS : SS42AI_pcmpistri; -defm PCMPISTRIZ : SS42AI_pcmpistri; +let Predicates = [HasAVX] in +defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; +defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri { +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpestri { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPESTRI : SS42AI_pcmpestri, - VEX; -defm VPCMPESTRIA : SS42AI_pcmpestri, - VEX; -defm VPCMPESTRIC : SS42AI_pcmpestri, - VEX; -defm VPCMPESTRIO : SS42AI_pcmpestri, - VEX; -defm VPCMPESTRIS : SS42AI_pcmpestri, - VEX; -defm VPCMPESTRIZ : SS42AI_pcmpestri, - VEX; -} - -defm PCMPESTRI : SS42AI_pcmpestri; -defm PCMPESTRIA : SS42AI_pcmpestri; -defm PCMPESTRIC : SS42AI_pcmpestri; -defm PCMPESTRIO : SS42AI_pcmpestri; -defm PCMPESTRIS : SS42AI_pcmpestri; -defm PCMPESTRIZ : SS42AI_pcmpestri; +let Predicates = [HasAVX] in +defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; +defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions @@ -7204,52 +7051,50 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), OpSize; //===----------------------------------------------------------------------===// -// CLMUL Instructions +// PCLMUL Instructions //===----------------------------------------------------------------------===// -// Carry-less Multiplication instructions -let neverHasSideEffects = 1 in { // AVX carry-less Multiplication instructions -def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>; + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; -let mayLoad = 1 in -def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>; + [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, + (memopv2i64 addr:$src2), imm:$src3))]>; +// Carry-less Multiplication instructions let Constraints = "$src1 = $dst" in { -def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), +def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; + [(set VR128:$dst, + (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; -let mayLoad = 1 in -def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), +def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; + [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, + (memopv2i64 addr:$src2), imm:$src3))]>; } // Constraints = "$src1 = $dst" -} // neverHasSideEffects = 1 multiclass pclmul_alias { - def : InstAlias; - def : InstAlias; - def : InstAlias; - def : InstAlias; } @@ -7259,6 +7104,45 @@ defm : pclmul_alias<"lqhq", 0x10>; defm : pclmul_alias<"lqlq", 0x00>; //===----------------------------------------------------------------------===// +// SSE4A Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasSSE4A] in { + +let Constraints = "$src = $dst" in { +def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), + (ins VR128:$src, i8imm:$len, i8imm:$idx), + "extrq\t{$idx, $len, $src|$src, $len, $idx}", + [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, + imm:$idx))]>, TB, OpSize; +def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$mask), + "extrq\t{$mask, $src|$src, $mask}", + [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, + VR128:$mask))]>, TB, OpSize; + +def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), + "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", + [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, + VR128:$src2, imm:$len, imm:$idx))]>, XD; +def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src, VR128:$mask), + "insertq\t{$mask, $src|$src, $mask}", + [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, + VR128:$mask))]>, XD; +} + +def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), + "movntss\t{$src, $dst|$dst, $src}", + [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; + +def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movntsd\t{$src, $dst|$dst, $src}", + [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; +} + +//===----------------------------------------------------------------------===// // AVX Instructions //===----------------------------------------------------------------------===// @@ -7286,7 +7170,7 @@ let ExeDomain = SSEPackedSingle in { int_x86_avx_vbroadcast_ss_256>; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, +def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, int_x86_avx_vbroadcast_sd_256>; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; @@ -7298,8 +7182,8 @@ let ExeDomain = SSEPackedSingle in { int_x86_avx2_vbroadcast_ss_ps_256>; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; +def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7595,7 +7479,6 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, // Half precision conversion instructions //===----------------------------------------------------------------------===// multiclass f16c_ph2ps { -let Predicates = [HasAVX, HasF16C] in { def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", [(set RC:$dst, (Int VR128:$src))]>, @@ -7604,27 +7487,26 @@ let Predicates = [HasAVX, HasF16C] in { def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; } -} multiclass f16c_ps2ph { -let Predicates = [HasAVX, HasF16C] in { def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), (ins RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, TA, OpSize, VEX; - let neverHasSideEffects = 1, mayLoad = 1 in - def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst), - (ins RC:$src1, i32i8imm:$src2), + let neverHasSideEffects = 1, mayStore = 1 in + def mr : Ii8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, TA, OpSize, VEX; } -} -defm VCVTPH2PS : f16c_ph2ps; -defm VCVTPH2PSY : f16c_ph2ps; -defm VCVTPS2PH : f16c_ps2ph; -defm VCVTPS2PHY : f16c_ps2ph; +let Predicates = [HasAVX, HasF16C] in { + defm VCVTPH2PS : f16c_ph2ps; + defm VCVTPH2PSY : f16c_ph2ps; + defm VCVTPS2PH : f16c_ps2ph; + defm VCVTPS2PHY : f16c_ps2ph; +} //===----------------------------------------------------------------------===// // AVX2 Instructions @@ -7711,6 +7593,49 @@ let Predicates = [HasAVX2] in { (VPBROADCASTQrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), (VPBROADCASTQYrm addr:$src)>; + + def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBrr VR128:$src)>; + def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), + (VPBROADCASTBYrr VR128:$src)>; + def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWrr VR128:$src)>; + def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), + (VPBROADCASTWYrr VR128:$src)>; + def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDrr VR128:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), + (VPBROADCASTDYrr VR128:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), + (VPBROADCASTQYrr VR128:$src)>; + def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSrr VR128:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), + (VBROADCASTSSYrr VR128:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), + (VPBROADCASTQrr VR128:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), + (VBROADCASTSDYrr VR128:$src)>; + + // Provide fallback in case the load node that is used in the patterns above + // is used by additional users, which prevents the pattern selection. + let AddedComplexity = 20 in { + def : Pat<(v4f32 (X86VBroadcast FR32:$src)), + (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v8f32 (X86VBroadcast FR32:$src)), + (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; + def : Pat<(v4f64 (X86VBroadcast FR64:$src)), + (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; + + def : Pat<(v4i32 (X86VBroadcast GR32:$src)), + (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; + def : Pat<(v8i32 (X86VBroadcast GR32:$src)), + (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; + def : Pat<(v4i64 (X86VBroadcast GR64:$src)), + (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; + } } // AVX1 broadcast patterns @@ -7718,16 +7643,42 @@ let Predicates = [HasAVX] in { def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; + (VBROADCASTSDYrm addr:$src)>; def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; - + (VBROADCASTSDYrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), (VBROADCASTSSrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), (VBROADCASTSSrm addr:$src)>; + + // Provide fallback in case the load node that is used in the patterns above + // is used by additional users, which prevents the pattern selection. + let AddedComplexity = 20 in { + // 128bit broadcasts: + def : Pat<(v4f32 (X86VBroadcast FR32:$src)), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; + def : Pat<(v8f32 (X86VBroadcast FR32:$src)), + (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; + def : Pat<(v4f64 (X86VBroadcast FR64:$src)), + (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; + + def : Pat<(v4i32 (X86VBroadcast GR32:$src)), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; + def : Pat<(v8i32 (X86VBroadcast GR32:$src)), + (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; + def : Pat<(v4i64 (X86VBroadcast GR64:$src)), + (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; + } } //===----------------------------------------------------------------------===// @@ -7820,8 +7771,8 @@ let neverHasSideEffects = 1 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, - VEX_4V; + []>, VEX_4V; +let mayLoad = 1 in def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7954,3 +7905,30 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +multiclass avx2_gather opc, string OpcodeStr, RegisterClass RC256, + X86MemOperand memop128, X86MemOperand memop256> { + def rm : AVX28I, VEX_4VOp3; + def Yrm : AVX28I, VEX_4VOp3, VEX_L; +} + +let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; + defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; + defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; + defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; + defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; +} diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index bddba6c..ea716bf 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// let Defs = [RAX, RDX] in - def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; + def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, + TB; let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; @@ -26,14 +27,17 @@ let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { def UD2B : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB; } -def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>; -def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB; +def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", [], IIC_HLT>; +def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", [], IIC_RSM>, TB; // Interrupt and SysCall Instructions. let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", - [(int_x86_int (i8 3))]>; + [(int_x86_int (i8 3))], IIC_INT3>; + +def : Pat<(debugtrap), + (INT3)>; // The long form of "int $3" turns into int3 as a size optimization. // FIXME: This doesn't work because InstAlias can't match immediate constants. @@ -41,23 +45,25 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", - [(int_x86_int imm:$trap)]>; + [(int_x86_int imm:$trap)], IIC_INT>; -def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; -def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB; -def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB, +def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", [], IIC_SYSCALL>, TB; +def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", [], IIC_SYSCALL>, TB; +def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", [], IIC_SYSCALL>, TB, Requires<[In64BitMode]>; -def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; - -def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB; +def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", [], + IIC_SYS_ENTER_EXIT>, TB; + +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [], + IIC_SYS_ENTER_EXIT>, TB; def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB, Requires<[In64BitMode]>; -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; -def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>; -def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; +def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; +def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; @@ -66,73 +72,73 @@ def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, // let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), - "in{b}\t{%dx, %al|AL, DX}", []>; + "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; let Defs = [AX], Uses = [DX] in def IN16rr : I<0xED, RawFrm, (outs), (ins), - "in{w}\t{%dx, %ax|AX, DX}", []>, OpSize; + "in{w}\t{%dx, %ax|AX, DX}", [], IIC_IN_RR>, OpSize; let Defs = [EAX], Uses = [DX] in def IN32rr : I<0xED, RawFrm, (outs), (ins), - "in{l}\t{%dx, %eax|EAX, DX}", []>; + "in{l}\t{%dx, %eax|EAX, DX}", [], IIC_IN_RR>; let Defs = [AL] in def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port), - "in{b}\t{$port, %al|AL, $port}", []>; + "in{b}\t{$port, %al|AL, $port}", [], IIC_IN_RI>; let Defs = [AX] in def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{w}\t{$port, %ax|AX, $port}", []>, OpSize; + "in{w}\t{$port, %ax|AX, $port}", [], IIC_IN_RI>, OpSize; let Defs = [EAX] in def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port), - "in{l}\t{$port, %eax|EAX, $port}", []>; + "in{l}\t{$port, %eax|EAX, $port}", [], IIC_IN_RI>; let Uses = [DX, AL] in def OUT8rr : I<0xEE, RawFrm, (outs), (ins), - "out{b}\t{%al, %dx|DX, AL}", []>; + "out{b}\t{%al, %dx|DX, AL}", [], IIC_OUT_RR>; let Uses = [DX, AX] in def OUT16rr : I<0xEF, RawFrm, (outs), (ins), - "out{w}\t{%ax, %dx|DX, AX}", []>, OpSize; + "out{w}\t{%ax, %dx|DX, AX}", [], IIC_OUT_RR>, OpSize; let Uses = [DX, EAX] in def OUT32rr : I<0xEF, RawFrm, (outs), (ins), - "out{l}\t{%eax, %dx|DX, EAX}", []>; + "out{l}\t{%eax, %dx|DX, EAX}", [], IIC_OUT_RR>; let Uses = [AL] in def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port), - "out{b}\t{%al, $port|$port, AL}", []>; + "out{b}\t{%al, $port|$port, AL}", [], IIC_OUT_IR>; let Uses = [AX] in def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{w}\t{%ax, $port|$port, AX}", []>, OpSize; + "out{w}\t{%ax, $port|$port, AX}", [], IIC_OUT_IR>, OpSize; let Uses = [EAX] in def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), - "out{l}\t{%eax, $port|$port, EAX}", []>; + "out{l}\t{%eax, $port|$port, EAX}", [], IIC_OUT_IR>; -def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>; -def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>, OpSize; -def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>; +def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; +def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; +def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; //===----------------------------------------------------------------------===// // Moves to and from debug registers def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; //===----------------------------------------------------------------------===// // Moves to and from control registers def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -150,254 +156,265 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>; def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>; def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize; def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>; def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>; def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize; def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>; def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>; def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize; def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), - "mov{l}\t{$src, $dst|$dst, $src}", []>; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; //===----------------------------------------------------------------------===// // Segmentation support instructions. -def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize; def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize; // i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB; def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB; // i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB; def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), - "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB; def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize; def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize; def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB; def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB; -def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", + [], IIC_INVLPG>, TB; def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t$dst", []>, TB, OpSize; + "str{w}\t$dst", [], IIC_STR>, TB, OpSize; def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), - "str{l}\t$dst", []>, TB; + "str{l}\t$dst", [], IIC_STR>, TB; def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), - "str{q}\t$dst", []>, TB; + "str{q}\t$dst", [], IIC_STR>, TB; def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t$dst", []>, TB; + "str{w}\t$dst", [], IIC_STR>, TB; def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), - "ltr{w}\t$src", []>, TB; + "ltr{w}\t$src", [], IIC_LTR>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), - "ltr{w}\t$src", []>, TB; + "ltr{w}\t$src", [], IIC_LTR>, TB; def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), - "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%cs|CS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + OpSize; def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), - "push{l}\t{%cs|CS}", []>, Requires<[In32BitMode]>; + "push{l}\t{%cs|CS}", [], IIC_PUSH_CS>, Requires<[In32BitMode]>; def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), - "push{w}\t{%ss|SS}", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + OpSize; def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), - "push{l}\t{%ss|SS}", []>, Requires<[In32BitMode]>; + "push{l}\t{%ss|SS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), - "push{w}\t{%ds|DS}", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + OpSize; def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), - "push{l}\t{%ds|DS}", []>, Requires<[In32BitMode]>; + "push{l}\t{%ds|DS}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; def PUSHES16 : I<0x06, RawFrm, (outs), (ins), - "push{w}\t{%es|ES}", []>, Requires<[In32BitMode]>, OpSize; + "push{w}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>, + OpSize; def PUSHES32 : I<0x06, RawFrm, (outs), (ins), - "push{l}\t{%es|ES}", []>, Requires<[In32BitMode]>; + "push{l}\t{%es|ES}", [], IIC_PUSH_SR>, Requires<[In32BitMode]>; def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), - "push{w}\t{%fs|FS}", []>, OpSize, TB; + "push{w}\t{%fs|FS}", [], IIC_PUSH_SR>, OpSize, TB; def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), - "push{l}\t{%fs|FS}", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%fs|FS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>; def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), - "push{w}\t{%gs|GS}", []>, OpSize, TB; + "push{w}\t{%gs|GS}", [], IIC_PUSH_SR>, OpSize, TB; def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), - "push{l}\t{%gs|GS}", []>, TB, Requires<[In32BitMode]>; + "push{l}\t{%gs|GS}", [], IIC_PUSH_SR>, TB, Requires<[In32BitMode]>; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), - "push{q}\t{%fs|FS}", []>, TB; + "push{q}\t{%fs|FS}", [], IIC_PUSH_SR>, TB; def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), - "push{q}\t{%gs|GS}", []>, TB; + "push{q}\t{%gs|GS}", [], IIC_PUSH_SR>, TB; // No "pop cs" instruction. def POPSS16 : I<0x17, RawFrm, (outs), (ins), - "pop{w}\t{%ss|SS}", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ss|SS}", [], IIC_POP_SR_SS>, + OpSize, Requires<[In32BitMode]>; def POPSS32 : I<0x17, RawFrm, (outs), (ins), - "pop{l}\t{%ss|SS}", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ss|SS}", [], IIC_POP_SR_SS>, + Requires<[In32BitMode]>; def POPDS16 : I<0x1F, RawFrm, (outs), (ins), - "pop{w}\t{%ds|DS}", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%ds|DS}", [], IIC_POP_SR>, + OpSize, Requires<[In32BitMode]>; def POPDS32 : I<0x1F, RawFrm, (outs), (ins), - "pop{l}\t{%ds|DS}", []> , Requires<[In32BitMode]>; + "pop{l}\t{%ds|DS}", [], IIC_POP_SR>, + Requires<[In32BitMode]>; def POPES16 : I<0x07, RawFrm, (outs), (ins), - "pop{w}\t{%es|ES}", []>, OpSize, Requires<[In32BitMode]>; + "pop{w}\t{%es|ES}", [], IIC_POP_SR>, + OpSize, Requires<[In32BitMode]>; def POPES32 : I<0x07, RawFrm, (outs), (ins), - "pop{l}\t{%es|ES}", []> , Requires<[In32BitMode]>; + "pop{l}\t{%es|ES}", [], IIC_POP_SR>, + Requires<[In32BitMode]>; def POPFS16 : I<0xa1, RawFrm, (outs), (ins), - "pop{w}\t{%fs|FS}", []>, OpSize, TB; + "pop{w}\t{%fs|FS}", [], IIC_POP_SR>, OpSize, TB; def POPFS32 : I<0xa1, RawFrm, (outs), (ins), - "pop{l}\t{%fs|FS}", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%fs|FS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>; def POPFS64 : I<0xa1, RawFrm, (outs), (ins), - "pop{q}\t{%fs|FS}", []>, TB; + "pop{q}\t{%fs|FS}", [], IIC_POP_SR>, TB; def POPGS16 : I<0xa9, RawFrm, (outs), (ins), - "pop{w}\t{%gs|GS}", []>, OpSize, TB; + "pop{w}\t{%gs|GS}", [], IIC_POP_SR>, OpSize, TB; def POPGS32 : I<0xa9, RawFrm, (outs), (ins), - "pop{l}\t{%gs|GS}", []>, TB , Requires<[In32BitMode]>; + "pop{l}\t{%gs|GS}", [], IIC_POP_SR>, TB, Requires<[In32BitMode]>; def POPGS64 : I<0xa9, RawFrm, (outs), (ins), - "pop{q}\t{%gs|GS}", []>, TB; + "pop{q}\t{%gs|GS}", [], IIC_POP_SR>, TB; def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize; def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lds{l}\t{$src, $dst|$dst, $src}", []>; + "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>; def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize; def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lss{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lss{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize; + "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize; def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "les{l}\t{$src, $dst|$dst, $src}", []>; + "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>; def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize; def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src), - "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize; def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src), - "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB; + "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), - "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB; + "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB; def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), - "verr\t$seg", []>, TB; + "verr\t$seg", [], IIC_VERR>, TB; def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), - "verr\t$seg", []>, TB; + "verr\t$seg", [], IIC_VERR>, TB; def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), - "verw\t$seg", []>, TB; + "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), - "verw\t$seg", []>, TB; + "verw\t$seg", [], IIC_VERW_REG>, TB; //===----------------------------------------------------------------------===// // Descriptor-table support instructions def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; + "sgdtw\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), - "sgdt\t$dst", []>, TB; + "sgdt\t$dst", [], IIC_SGDT>, TB; def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), - "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>; + "sidtw\t$dst", [], IIC_SIDT>, TB, OpSize, Requires<[In32BitMode]>; def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins), "sidt\t$dst", []>, TB; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), - "sldt{w}\t$dst", []>, TB, OpSize; + "sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize; def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{w}\t$dst", []>, TB; + "sldt{w}\t$dst", [], IIC_SLDT>, TB; def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), - "sldt{l}\t$dst", []>, TB; + "sldt{l}\t$dst", [], IIC_SLDT>, TB; // LLDT is not interpreted specially in 64-bit mode because there is no sign // extension. def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), - "sldt{q}\t$dst", []>, TB; + "sldt{q}\t$dst", [], IIC_SLDT>, TB; def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), - "sldt{q}\t$dst", []>, TB; + "sldt{q}\t$dst", [], IIC_SLDT>, TB; def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; + "lgdtw\t$src", [], IIC_LGDT>, TB, OpSize, Requires<[In32BitMode]>; def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src), - "lgdt\t$src", []>, TB; + "lgdt\t$src", [], IIC_LGDT>, TB; def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>; + "lidtw\t$src", [], IIC_LIDT>, TB, OpSize, Requires<[In32BitMode]>; def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src), - "lidt\t$src", []>, TB; + "lidt\t$src", [], IIC_LIDT>, TB; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), - "lldt{w}\t$src", []>, TB; + "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), - "lldt{w}\t$src", []>, TB; + "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; //===----------------------------------------------------------------------===// // Specialized register support -def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB; -def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB; -def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB; +def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; +def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; +def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), - "smsw{w}\t$dst", []>, OpSize, TB; + "smsw{w}\t$dst", [], IIC_SMSW>, OpSize, TB; def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), - "smsw{l}\t$dst", []>, TB; + "smsw{l}\t$dst", [], IIC_SMSW>, TB; // no m form encodable; use SMSW16m def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), - "smsw{q}\t$dst", []>, TB; + "smsw{q}\t$dst", [], IIC_SMSW>, TB; // For memory operands, there is only a 16-bit form def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins), - "smsw{w}\t$dst", []>, TB; + "smsw{w}\t$dst", [], IIC_SMSW>, TB; def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), - "lmsw{w}\t$src", []>, TB; + "lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB; def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), - "lmsw{w}\t$src", []>, TB; + "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; -def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; +def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; //===----------------------------------------------------------------------===// // Cache instructions -def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; -def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; +def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; //===----------------------------------------------------------------------===// // XSAVE instructions diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 6a8f0c8..6d3548f 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -17,17 +17,17 @@ // 66 0F 38 80 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invept\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 66 0F 38 81 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invvpid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; // 0F 01 C1 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 65bbcb5..8ec2c68 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -15,7 +15,7 @@ multiclass xop2op opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rr : IXOP, VEX; - def rm : IXOP, VEX; } @@ -36,27 +36,19 @@ let isAsmParserOnly = 1 in { defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>; defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>; defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>; - defm VFRCZPS : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>; - defm VFRCZPD : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>; } // Scalar load 2 addr operand instructions -let Constraints = "$src1 = $dst" in { multiclass xop2opsld opc, string OpcodeStr, Intrinsic Int, Operand memop, ComplexPattern mem_cpat> { - def rr : IXOP, VEX; - def rm : IXOP, VEX; + def rr : IXOP, VEX; + def rm : IXOP, VEX; } -} // Constraints = "$src1 = $dst" - let isAsmParserOnly = 1 in { defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, ssmem, sse_load_f32>; @@ -64,12 +56,26 @@ let isAsmParserOnly = 1 in { sdmem, sse_load_f64>; } +multiclass xop2op128 opc, string OpcodeStr, Intrinsic Int, + PatFrag memop> { + def rr : IXOP, VEX; + def rm : IXOP, VEX; +} + +let isAsmParserOnly = 1 in { + defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>; + defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>; +} multiclass xop2op256 opc, string OpcodeStr, Intrinsic Int, PatFrag memop> { def rrY : IXOP, VEX, VEX_L; + [(set VR256:$dst, (Int VR256:$src))]>, VEX; def rmY : IXOP, VEX; @@ -88,13 +94,13 @@ multiclass xop3op opc, string OpcodeStr, Intrinsic Int> { !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3; def rm : IXOP, VEX_4V, VEX_W; def mr : IXOP, @@ -116,25 +122,23 @@ let isAsmParserOnly = 1 in { defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>; } -multiclass xop3opimm opc, string OpcodeStr> { - let neverHasSideEffects = 1 in { - def ri : IXOPi8, VEX; - let mayLoad = 1 in - def mi : IXOPi8, VEX; - } +multiclass xop3opimm opc, string OpcodeStr, Intrinsic Int> { + def ri : IXOPi8, VEX; + def mi : IXOPi8, VEX; } let isAsmParserOnly = 1 in { - defm VPROTW : xop3opimm<0xC1, "vprotw">; - defm VPROTQ : xop3opimm<0xC3, "vprotq">; - defm VPROTD : xop3opimm<0xC2, "vprotd">; - defm VPROTB : xop3opimm<0xC0, "vprotb">; + defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>; + defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>; + defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>; + defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>; } // Instruction where second source can be memory, but third must be register @@ -146,7 +150,7 @@ multiclass xop4opm2 opc, string OpcodeStr, Intrinsic Int> { [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM; def rm : IXOPi8 opc, string OpcodeStr, SDNode OpNode, - ValueType VT> { +multiclass xop4opimm opc, string OpcodeStr, Intrinsic Int> { def ri : IXOPi8, VEX_4V; + [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, imm:$src3))]>, + VEX_4V; def mi : IXOPi8, VEX_4V; + (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)), + imm:$src3))]>, VEX_4V; } let isAsmParserOnly = 1 in { - defm VPCOMB : xop4opimm<0xCC, "vpcomb", X86vpcom, v16i8>; - defm VPCOMW : xop4opimm<0xCD, "vpcomw", X86vpcom, v8i16>; - defm VPCOMD : xop4opimm<0xCE, "vpcomd", X86vpcom, v4i32>; - defm VPCOMQ : xop4opimm<0xCF, "vpcomq", X86vpcom, v2i64>; - defm VPCOMUB : xop4opimm<0xEC, "vpcomub", X86vpcomu, v16i8>; - defm VPCOMUW : xop4opimm<0xED, "vpcomuw", X86vpcomu, v8i16>; - defm VPCOMUD : xop4opimm<0xEE, "vpcomud", X86vpcomu, v4i32>; - defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", X86vpcomu, v2i64>; + defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>; + defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>; + defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>; + defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>; + defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>; + defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>; + defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>; + defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>; } // Instruction where either second or third source can be memory @@ -207,7 +210,7 @@ multiclass xop4op opc, string OpcodeStr, Intrinsic Int> { [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM; def rm : IXOPi8 opc, string OpcodeStr, Intrinsic Int> { (bitconvert (memopv2i64 addr:$src3))))]>, VEX_4V, VEX_I8IMM, VEX_W, MemOp4; def mr : IXOPi8 opc, string OpcodeStr, Intrinsic Int> { [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_4V, VEX_I8IMM; def rmY : IXOPi8 Name; - + if (!MO.isGlobal()) { assert(MO.isSymbol()); Name += MAI.getGlobalPrefix(); Name += MO.getSymbolName(); - } else { + } else { const GlobalValue *GV = MO.getGlobal(); bool isImplicitlyPrivate = false; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || @@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) isImplicitlyPrivate = true; - + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); } @@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { getMachOMMI().getFnStubEntry(Sym); if (StubSym.getPointer()) return Sym; - + if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: @@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // lot of extra uniquing. const MCExpr *Expr = 0; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - + switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); case X86II::MO_NO_FLAG: // No flag. @@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: break; - + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); @@ -156,10 +156,14 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, break; case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; + case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break; + case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break; + case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break; case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break; + case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break; case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break; case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break; case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break; @@ -169,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, + Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI() && MAI.hasSetDirective()) { @@ -183,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, } break; } - + if (Expr == 0) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - + if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), @@ -207,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { // Convert registers in the addr mode according to subreg64. for (unsigned i = 0; i != 4; ++i) { if (!MI->getOperand(OpNo+i).isReg()) continue; - + unsigned Reg = MI->getOperand(OpNo+i).getReg(); if (Reg == 0) continue; - + MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } } @@ -276,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, return; // Check whether this is an absolute address. - // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // FIXME: We know TLVP symbol refs aren't, but there should be a better way // to do this here. bool Absolute = true; if (Inst.getOperand(AddrOp).isExpr()) { @@ -285,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) Absolute = false; } - + if (Absolute && (Inst.getOperand(AddrBase + 0).getReg() != 0 || Inst.getOperand(AddrBase + 2).getReg() != 0 || @@ -302,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - + MCOperand MCOp; switch (MO.getType()) { default: @@ -341,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // Ignore call clobbers. continue; } - + OutMI.addOperand(MCOp); } - + // Handle a few special cases to eliminate operand modifiers. ReSimplify: switch (OutMI.getOpcode()) { @@ -421,7 +425,7 @@ ReSimplify: case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; break; } - + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); OutMI.setOpcode(Opcode); @@ -441,7 +445,7 @@ ReSimplify: case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; - + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -550,17 +554,38 @@ ReSimplify: static void LowerTlsAddr(MCStreamer &OutStreamer, X86MCInstLower &MCInstLowering, const MachineInstr &MI) { - bool is64Bits = MI.getOpcode() == X86::TLS_addr64; + + bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || + MI.getOpcode() == X86::TLS_base_addr64; + + bool needsPadding = MI.getOpcode() == X86::TLS_addr64; + MCContext &context = OutStreamer.getContext(); - if (is64Bits) { + if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); } + + MCSymbolRefExpr::VariantKind SRVK; + switch (MI.getOpcode()) { + case X86::TLS_addr32: + case X86::TLS_addr64: + SRVK = MCSymbolRefExpr::VK_TLSGD; + break; + case X86::TLS_base_addr32: + SRVK = MCSymbolRefExpr::VK_TLSLDM; + break; + case X86::TLS_base_addr64: + SRVK = MCSymbolRefExpr::VK_TLSLD; + break; + default: + llvm_unreachable("unexpected opcode"); + } + MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); - const MCSymbolRefExpr *symRef = - MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context); + const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context); MCInst LEA; if (is64Bits) { @@ -571,6 +596,14 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, LEA.addOperand(MCOperand::CreateReg(0)); // index LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg + } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) { + LEA.setOpcode(X86::LEA32r); + LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest + LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // base + LEA.addOperand(MCOperand::CreateImm(1)); // scale + LEA.addOperand(MCOperand::CreateReg(0)); // index + LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp + LEA.addOperand(MCOperand::CreateReg(0)); // seg } else { LEA.setOpcode(X86::LEA32r); LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest @@ -582,7 +615,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, } OutStreamer.EmitInstruction(LEA); - if (is64Bits) { + if (needsPadding) { MCInst prefix; prefix.setOpcode(X86::DATA16_PREFIX); OutStreamer.EmitInstruction(prefix); @@ -609,8 +642,6 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, } void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { - OutStreamer.EmitCodeRegion(); - X86MCInstLower MCInstLowering(Mang, *MF, *this); switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: @@ -646,6 +677,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TLS_addr32: case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: return LowerTlsAddr(OutStreamer, MCInstLowering, *MI); case X86::MOVPC32r: { @@ -655,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // call "L1$pb" // "L1$pb": // popl %esi - + // Emit the call. MCSymbol *PICBase = MF->getPICBaseSymbol(); TmpInst.setOpcode(X86::CALLpcrel32); @@ -664,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); OutStreamer.EmitInstruction(TmpInst); - + // Emit the label. OutStreamer.EmitLabel(PICBase); - + // popl $reg TmpInst.setOpcode(X86::POP32r); TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); OutStreamer.EmitInstruction(TmpInst); return; } - + case X86::ADD32ri: { // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) break; - + // Okay, we have something like: // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) - + // For this, we want to print something like: // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. MCSymbol *DotSym = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(DotSym); - + // Now that we have emitted the label, lower the complex operand expression. MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); - - DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + + DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), DotExpr, OutContext); - + MCInst TmpInst; TmpInst.setOpcode(X86::ADD32ri); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); @@ -710,9 +743,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); } - diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 40df3db..b4d4cfd 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class Mangler; class TargetMachine; class X86AsmPrinter; - + /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; @@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower { public: X86MCInstLower(Mangler *mang, const MachineFunction &MF, X86AsmPrinter &asmprinter); - + void Lower(const MachineInstr *MI, MCInst &OutMI) const; MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - + private: MachineModuleInfoMachO &getMachOMMI() const; }; diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index c747109..78d20ce 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); /// ForceFramePointer - True if the function is required to use of frame - /// pointer for reasons other than it containing dynamic allocation or + /// pointer for reasons other than it containing dynamic allocation or /// that FP eliminatation is turned off. For example, Cygwin main function /// contains stack pointer re-alignment code which requires FP. bool ForceFramePointer; @@ -66,6 +66,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// ArgumentStackSize - The number of bytes on stack consumed by the arguments /// being passed on the stack. unsigned ArgumentStackSize; + /// NumLocalDynamics - Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; public: X86MachineFunctionInfo() : ForceFramePointer(false), @@ -79,8 +81,9 @@ public: RegSaveFrameIndex(0), VarArgsGPOffset(0), VarArgsFPOffset(0), - ArgumentStackSize(0) {} - + ArgumentStackSize(0), + NumLocalDynamics(0) {} + explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -93,9 +96,10 @@ public: RegSaveFrameIndex(0), VarArgsGPOffset(0), VarArgsFPOffset(0), - ArgumentStackSize(0) {} - - bool getForceFramePointer() const { return ForceFramePointer;} + ArgumentStackSize(0), + NumLocalDynamics(0) {} + + bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } @@ -130,6 +134,10 @@ public: unsigned getArgumentStackSize() const { return ArgumentStackSize; } void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } + + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index b56025f..877b8f6 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -50,6 +50,10 @@ ForceStackAlign("force-align-stack", " needed for the function."), cl::init(false), cl::Hidden); +cl::opt +EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), + cl::desc("Enable use of a base pointer for complex stack frames")); + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget().is64Bit() @@ -73,6 +77,10 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, StackPtr = X86::ESP; FramePtr = X86::EBP; } + // Use a callee-saved register as the base pointer. These registers must + // not conflict with any ABI requirements. For example, in 32-bit mode PIC + // requires GOT in the EBX register before function calls via PLT GOT pointer. + BasePtr = Is64Bit ? X86::RBX : X86::ESI; } /// getCompactUnwindRegNum - This function maps the register to the number for @@ -90,6 +98,12 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { return -1; } +bool +X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + // Only enable when post-RA scheduling is enabled and this is needed. + return TM.getSubtargetImpl()->postRAScheduler(); +} + int X86RegisterInfo::getSEHRegNum(unsigned i) const { int reg = X86_MC::getX86RegNum(i); @@ -146,7 +160,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ // The GR8_NOREX class is always used in a way that won't be constrained to a // sub-class, so sub-classes like GR8_ABCD_L are allowed to expand to the // full GR8 class. - if (RC == X86::GR8_NOREXRegisterClass) + if (RC == &X86::GR8_NOREXRegClass) return RC; const TargetRegisterClass *Super = RC; @@ -175,7 +189,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ } const TargetRegisterClass * -X86RegisterInfo::getPointerRegClass(unsigned Kind) const { +X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) + const { switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. @@ -238,7 +253,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } if (ghcCall) - return CSR_Ghc_SaveList; + return CSR_NoRegs_SaveList; if (Is64Bit) { if (IsWin64) return CSR_Win64_SaveList; @@ -254,7 +269,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) - return CSR_Ghc_RegMask; + return CSR_NoRegs_RegMask; if (!Is64Bit) return CSR_32_RegMask; if (IsWin64) @@ -268,21 +283,33 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Set the stack-pointer register and its aliases as reserved. Reserved.set(X86::RSP); - Reserved.set(X86::ESP); - Reserved.set(X86::SP); - Reserved.set(X86::SPL); + for (MCSubRegIterator I(X86::RSP, this); I.isValid(); ++I) + Reserved.set(*I); // Set the instruction pointer register and its aliases as reserved. Reserved.set(X86::RIP); - Reserved.set(X86::EIP); - Reserved.set(X86::IP); + for (MCSubRegIterator I(X86::RIP, this); I.isValid(); ++I) + Reserved.set(*I); // Set the frame-pointer register and its aliases as reserved if needed. if (TFI->hasFP(MF)) { Reserved.set(X86::RBP); - Reserved.set(X86::EBP); - Reserved.set(X86::BP); - Reserved.set(X86::BPL); + for (MCSubRegIterator I(X86::RBP, this); I.isValid(); ++I) + Reserved.set(*I); + } + + // Set the base-pointer register and its aliases as reserved if needed. + if (hasBasePointer(MF)) { + CallingConv::ID CC = MF.getFunction()->getCallingConv(); + const uint32_t* RegMask = getCallPreservedMask(CC); + if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister())) + report_fatal_error( + "Stack realignment in presence of dynamic allocas is not supported with" + "this calling convention."); + + Reserved.set(getBaseRegister()); + for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I) + Reserved.set(*I); } // Mark the segment registers as reserved. @@ -293,6 +320,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::FS); Reserved.set(X86::GS); + // Mark the floating point stack registers as reserved. + Reserved.set(X86::ST0); + Reserved.set(X86::ST1); + Reserved.set(X86::ST2); + Reserved.set(X86::ST3); + Reserved.set(X86::ST4); + Reserved.set(X86::ST5); + Reserved.set(X86::ST6); + Reserved.set(X86::ST7); + // Reserve the registers that only exist in 64-bit mode. if (!Is64Bit) { // These 8-bit registers are part of the x86-64 extension even though their @@ -308,14 +345,13 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { X86::R8, X86::R9, X86::R10, X86::R11, X86::R12, X86::R13, X86::R14, X86::R15 }; - for (const uint16_t *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI) - Reserved.set(Reg); + for (MCRegAliasIterator AI(GPR64[n], this, true); AI.isValid(); ++AI) + Reserved.set(*AI); // XMM8, XMM9, ... assert(X86::XMM15 == X86::XMM8+7); - for (const uint16_t *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI; - ++AI) - Reserved.set(Reg); + for (MCRegAliasIterator AI(X86::XMM8 + n, this, true); AI.isValid(); ++AI) + Reserved.set(*AI); } } @@ -326,10 +362,36 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// +bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + if (!EnableBasePointer) + return false; + + // When we need stack realignment and there are dynamic allocas, we can't + // reference off of the stack pointer, so we reserve a base pointer. + if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + return true; + + return false; +} + bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (MF.getTarget().Options.RealignStack && - !MFI->hasVarSizedObjects()); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + if (!MF.getTarget().Options.RealignStack) + return false; + + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(FramePtr)) + return false; + + // If a base pointer is necessary. Check that it isn't too late to reserve + // it. + if (MFI->hasVarSizedObjects()) + return MRI->canReserveReg(BasePtr); + return true; } bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { @@ -339,13 +401,6 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); - // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas. - // FIXME: It's more complicated than this... - if (0 && requiresRealignment && MFI->hasVarSizedObjects()) - report_fatal_error( - "Stack realignment in presence of dynamic allocas is not supported"); - // If we've requested that we force align the stack do so now. if (ForceStackAlign) return canRealignStack(MF); @@ -485,7 +540,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Opc = MI.getOpcode(); bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; - if (needsStackRealignment(MF)) + if (hasBasePointer(MF)) + BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); + else if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else if (AfterFPPop) BasePtr = StackPtr; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index bee0393..1bc32cb 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -50,6 +50,11 @@ private: /// unsigned FramePtr; + /// BasePtr - X86 physical register used as a base ptr in complex stack + /// frames. I.e., when we need a 3rd base, not just SP and FP, due to + /// variable size stack objects. + unsigned BasePtr; + public: X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii); @@ -65,7 +70,8 @@ public: int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const; /// Code Generation virtual methods... - /// + /// + virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the @@ -82,7 +88,8 @@ public: /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. - const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; /// getCrossCopyRegClass - Returns a legal register class to copy a register /// in the specified class to or from. Returns NULL if it is possible to copy @@ -104,6 +111,8 @@ public: /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const; + bool hasBasePointer(const MachineFunction &MF) const; + bool canRealignStack(const MachineFunction &MF) const; bool needsStackRealignment(const MachineFunction &MF) const; @@ -121,6 +130,7 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } + unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 5263a49..edc7184 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -23,9 +23,6 @@ let Namespace = "X86" in { def sub_8bit_hi : SubRegIndex; def sub_16bit : SubRegIndex; def sub_32bit : SubRegIndex; - - def sub_ss : SubRegIndex; - def sub_sd : SubRegIndex; def sub_xmm : SubRegIndex; @@ -163,8 +160,6 @@ let Namespace = "X86" in { def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. - // The sub_ss and sub_sd subregs are the same registers with another regclass. - let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -184,7 +179,7 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - }} + } // CostPerUse // YMM Registers, used by AVX instructions let SubRegIndices = [sub_xmm] in { @@ -223,6 +218,9 @@ let Namespace = "X86" in { def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>; def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>; + // Floating-point status word + def FPSW : Register<"fpsw">; + // Status flags register def EFLAGS : Register<"flags">; @@ -296,26 +294,18 @@ def GR8 : RegisterClass<"X86", [i8], 8, def GR16 : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, SI, DI, BX, BP, SP, - R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; -} + R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>; def GR32 : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, - R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; -} + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>; // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. def GR64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP, RSP, RIP)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), - (GR16 sub_16bit), - (GR32 sub_32bit)]; -} + RBX, R14, R15, R12, R13, RBP, RSP, RIP)>; // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment @@ -336,30 +326,12 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>; // operations. def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>; def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>; -def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> { - let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; -} -def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> { - let SubRegClasses = [(GR8_ABCD_L sub_8bit), - (GR8_ABCD_H sub_8bit_hi), - (GR16_ABCD sub_16bit)]; -} -def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> { - let SubRegClasses = [(GR8_ABCD_L sub_8bit), - (GR8_ABCD_H sub_8bit_hi), - (GR16_ABCD sub_16bit), - (GR32_ABCD sub_32bit)]; -} -def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; -} +def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>; +def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>; +def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>; +def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>; def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, - R8, R9, R11, RIP)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), - (GR16 sub_16bit), - (GR32_TC sub_32bit)]; -} - + R8, R9, R11, RIP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, R8, R9, R11)>; @@ -373,64 +345,36 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, } // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, - (add AX, CX, DX, SI, DI, BX, BP, SP)> { - let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; -} + (add AX, CX, DX, SI, DI, BX, BP, SP)>; // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, - (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> { - let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), - (GR16_NOREX sub_16bit)]; -} + (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>; // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, - (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> { - let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), - (GR16_NOREX sub_16bit), - (GR32_NOREX sub_32bit)]; -} + (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>; // GR32_NOAX - GR32 registers except EAX. Used by AddRegFrm of XCHG32 in 64-bit // mode to prevent encoding using the 0x90 NOP encoding. xchg %eax, %eax needs // to clear upper 32-bits of RAX so is not a NOP. -def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; -} +def GR32_NOAX : RegisterClass<"X86", [i32], 32, (sub GR32, EAX)>; // GR32_NOSP - GR32 registers except ESP. -def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; -} +def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>; // GR64_NOSP - GR64 registers except RSP (and RIP). -def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> { - let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), - (GR16 sub_16bit), - (GR32_NOSP sub_32bit)]; -} +def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>; // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except // ESP. def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32, - (and GR32_NOREX, GR32_NOSP)> { - let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), - (GR16_NOREX sub_16bit)]; -} + (and GR32_NOREX, GR32_NOSP)>; // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, - (and GR64_NOREX, GR64_NOSP)> { - let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), - (GR16_NOREX sub_16bit), - (GR32_NOREX_NOSP sub_32bit)]; -} + (and GR64_NOREX, GR64_NOSP)>; // A class to support the 'A' assembler constraint: EAX then EDX. -def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> { - let SubRegClasses = [(GR8_ABCD_L sub_8bit), - (GR8_ABCD_H sub_8bit_hi), - (GR16_ABCD sub_16bit)]; -} +def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; @@ -458,17 +402,16 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> { // Generic vector registers: VR64 and VR128. def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (add FR32)> { - let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; -} - + 128, (add FR32)>; def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - 256, (sequence "YMM%u", 0, 15)> { - let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; -} + 256, (sequence "YMM%u", 0, 15)>; // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> { let CopyCost = -1; // Don't allow copying of status registers. let isAllocatable = 0; } +def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index 857becf..0333056 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -21,7 +21,7 @@ namespace llvm { /// RelocationType - An enum for the x86 relocation codes. Note that /// the terminology here doesn't follow x86 convention - word means /// 32-bit and dword means 64-bit. The relocations will be treated - /// by JIT or ObjectCode emitters, this is transparent to the x86 code + /// by JIT or ObjectCode emitters, this is transparent to the x86 code /// emitter but JIT and ObjectCode will treat them differently enum RelocationType { /// reloc_pcrel_word - PC relative relocation, add the relocated value to diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 17f4efd..c14407f 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for X86 +// Instruction Itinerary classes used for X86 def IIC_DEFAULT : InstrItinClass; def IIC_ALU_MEM : InstrItinClass; def IIC_ALU_NONMEM : InstrItinClass; @@ -253,6 +253,42 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass; def IIC_SSE_CVT_SD2SI_RM : InstrItinClass; def IIC_SSE_CVT_SD2SI_RR : InstrItinClass; +// MMX +def IIC_MMX_MOV_MM_RM : InstrItinClass; +def IIC_MMX_MOV_REG_MM : InstrItinClass; +def IIC_MMX_MOVQ_RM : InstrItinClass; +def IIC_MMX_MOVQ_RR : InstrItinClass; + +def IIC_MMX_ALU_RM : InstrItinClass; +def IIC_MMX_ALU_RR : InstrItinClass; +def IIC_MMX_ALUQ_RM : InstrItinClass; +def IIC_MMX_ALUQ_RR : InstrItinClass; +def IIC_MMX_PHADDSUBW_RM : InstrItinClass; +def IIC_MMX_PHADDSUBW_RR : InstrItinClass; +def IIC_MMX_PHADDSUBD_RM : InstrItinClass; +def IIC_MMX_PHADDSUBD_RR : InstrItinClass; +def IIC_MMX_PMUL : InstrItinClass; +def IIC_MMX_MISC_FUNC_MEM : InstrItinClass; +def IIC_MMX_MISC_FUNC_REG : InstrItinClass; +def IIC_MMX_PSADBW : InstrItinClass; +def IIC_MMX_SHIFT_RI : InstrItinClass; +def IIC_MMX_SHIFT_RM : InstrItinClass; +def IIC_MMX_SHIFT_RR : InstrItinClass; +def IIC_MMX_UNPCK_H_RM : InstrItinClass; +def IIC_MMX_UNPCK_H_RR : InstrItinClass; +def IIC_MMX_UNPCK_L : InstrItinClass; +def IIC_MMX_PCK_RM : InstrItinClass; +def IIC_MMX_PCK_RR : InstrItinClass; +def IIC_MMX_PSHUF : InstrItinClass; +def IIC_MMX_PEXTR : InstrItinClass; +def IIC_MMX_PINSRW : InstrItinClass; +def IIC_MMX_MASKMOV : InstrItinClass; + +def IIC_MMX_CVT_PD_RR : InstrItinClass; +def IIC_MMX_CVT_PD_RM : InstrItinClass; +def IIC_MMX_CVT_PS_RR : InstrItinClass; +def IIC_MMX_CVT_PS_RM : InstrItinClass; + def IIC_CMPX_LOCK : InstrItinClass; def IIC_CMPX_LOCK_8 : InstrItinClass; def IIC_CMPX_LOCK_8B : InstrItinClass; @@ -261,13 +297,185 @@ def IIC_CMPX_LOCK_16B : InstrItinClass; def IIC_XADD_LOCK_MEM : InstrItinClass; def IIC_XADD_LOCK_MEM8 : InstrItinClass; +def IIC_FILD : InstrItinClass; +def IIC_FLD : InstrItinClass; +def IIC_FLD80 : InstrItinClass; +def IIC_FST : InstrItinClass; +def IIC_FST80 : InstrItinClass; +def IIC_FIST : InstrItinClass; +def IIC_FLDZ : InstrItinClass; +def IIC_FUCOM : InstrItinClass; +def IIC_FUCOMI : InstrItinClass; +def IIC_FCOMI : InstrItinClass; +def IIC_FNSTSW : InstrItinClass; +def IIC_FNSTCW : InstrItinClass; +def IIC_FLDCW : InstrItinClass; +def IIC_FNINIT : InstrItinClass; +def IIC_FFREE : InstrItinClass; +def IIC_FNCLEX : InstrItinClass; +def IIC_WAIT : InstrItinClass; +def IIC_FXAM : InstrItinClass; +def IIC_FNOP : InstrItinClass; +def IIC_FLDL : InstrItinClass; +def IIC_F2XM1 : InstrItinClass; +def IIC_FYL2X : InstrItinClass; +def IIC_FPTAN : InstrItinClass; +def IIC_FPATAN : InstrItinClass; +def IIC_FXTRACT : InstrItinClass; +def IIC_FPREM1 : InstrItinClass; +def IIC_FPSTP : InstrItinClass; +def IIC_FPREM : InstrItinClass; +def IIC_FYL2XP1 : InstrItinClass; +def IIC_FSINCOS : InstrItinClass; +def IIC_FRNDINT : InstrItinClass; +def IIC_FSCALE : InstrItinClass; +def IIC_FCOMPP : InstrItinClass; +def IIC_FXSAVE : InstrItinClass; +def IIC_FXRSTOR : InstrItinClass; + +def IIC_FXCH : InstrItinClass; + +// System instructions +def IIC_CPUID : InstrItinClass; +def IIC_INT : InstrItinClass; +def IIC_INT3 : InstrItinClass; +def IIC_INVD : InstrItinClass; +def IIC_INVLPG : InstrItinClass; +def IIC_IRET : InstrItinClass; +def IIC_HLT : InstrItinClass; +def IIC_LXS : InstrItinClass; +def IIC_LTR : InstrItinClass; +def IIC_RDTSC : InstrItinClass; +def IIC_RSM : InstrItinClass; +def IIC_SIDT : InstrItinClass; +def IIC_SGDT : InstrItinClass; +def IIC_SLDT : InstrItinClass; +def IIC_STR : InstrItinClass; +def IIC_SWAPGS : InstrItinClass; +def IIC_SYSCALL : InstrItinClass; +def IIC_SYS_ENTER_EXIT : InstrItinClass; +def IIC_IN_RR : InstrItinClass; +def IIC_IN_RI : InstrItinClass; +def IIC_OUT_RR : InstrItinClass; +def IIC_OUT_IR : InstrItinClass; +def IIC_INS : InstrItinClass; +def IIC_MOV_REG_DR : InstrItinClass; +def IIC_MOV_DR_REG : InstrItinClass; +def IIC_MOV_REG_CR : InstrItinClass; +def IIC_MOV_CR_REG : InstrItinClass; +def IIC_MOV_REG_SR : InstrItinClass; +def IIC_MOV_MEM_SR : InstrItinClass; +def IIC_MOV_SR_REG : InstrItinClass; +def IIC_MOV_SR_MEM : InstrItinClass; +def IIC_LAR_RM : InstrItinClass; +def IIC_LAR_RR : InstrItinClass; +def IIC_LSL_RM : InstrItinClass; +def IIC_LSL_RR : InstrItinClass; +def IIC_LGDT : InstrItinClass; +def IIC_LIDT : InstrItinClass; +def IIC_LLDT_REG : InstrItinClass; +def IIC_LLDT_MEM : InstrItinClass; +def IIC_PUSH_CS : InstrItinClass; +def IIC_PUSH_SR : InstrItinClass; +def IIC_POP_SR : InstrItinClass; +def IIC_POP_SR_SS : InstrItinClass; +def IIC_VERR : InstrItinClass; +def IIC_VERW_REG : InstrItinClass; +def IIC_VERW_MEM : InstrItinClass; +def IIC_WRMSR : InstrItinClass; +def IIC_RDMSR : InstrItinClass; +def IIC_RDPMC : InstrItinClass; +def IIC_SMSW : InstrItinClass; +def IIC_LMSW_REG : InstrItinClass; +def IIC_LMSW_MEM : InstrItinClass; +def IIC_ENTER : InstrItinClass; +def IIC_LEAVE : InstrItinClass; +def IIC_POP_MEM : InstrItinClass; +def IIC_POP_REG16 : InstrItinClass; +def IIC_POP_REG : InstrItinClass; +def IIC_POP_F : InstrItinClass; +def IIC_POP_FD : InstrItinClass; +def IIC_POP_A : InstrItinClass; +def IIC_PUSH_IMM : InstrItinClass; +def IIC_PUSH_MEM : InstrItinClass; +def IIC_PUSH_REG : InstrItinClass; +def IIC_PUSH_F : InstrItinClass; +def IIC_PUSH_A : InstrItinClass; +def IIC_BSWAP : InstrItinClass; +def IIC_BSF : InstrItinClass; +def IIC_BSR : InstrItinClass; +def IIC_MOVS : InstrItinClass; +def IIC_STOS : InstrItinClass; +def IIC_SCAS : InstrItinClass; +def IIC_CMPS : InstrItinClass; +def IIC_MOV : InstrItinClass; +def IIC_MOV_MEM : InstrItinClass; +def IIC_AHF : InstrItinClass; +def IIC_BT_MI : InstrItinClass; +def IIC_BT_MR : InstrItinClass; +def IIC_BT_RI : InstrItinClass; +def IIC_BT_RR : InstrItinClass; +def IIC_BTX_MI : InstrItinClass; +def IIC_BTX_MR : InstrItinClass; +def IIC_BTX_RI : InstrItinClass; +def IIC_BTX_RR : InstrItinClass; +def IIC_XCHG_REG : InstrItinClass; +def IIC_XCHG_MEM : InstrItinClass; +def IIC_XADD_REG : InstrItinClass; +def IIC_XADD_MEM : InstrItinClass; +def IIC_CMPXCHG_MEM : InstrItinClass; +def IIC_CMPXCHG_REG : InstrItinClass; +def IIC_CMPXCHG_MEM8 : InstrItinClass; +def IIC_CMPXCHG_REG8 : InstrItinClass; +def IIC_CMPXCHG_8B : InstrItinClass; +def IIC_CMPXCHG_16B : InstrItinClass; +def IIC_LODS : InstrItinClass; +def IIC_OUTS : InstrItinClass; +def IIC_CLC : InstrItinClass; +def IIC_CLD : InstrItinClass; +def IIC_CLI : InstrItinClass; +def IIC_CMC : InstrItinClass; +def IIC_CLTS : InstrItinClass; +def IIC_STC : InstrItinClass; +def IIC_STI : InstrItinClass; +def IIC_STD : InstrItinClass; +def IIC_XLAT : InstrItinClass; +def IIC_AAA : InstrItinClass; +def IIC_AAD : InstrItinClass; +def IIC_AAM : InstrItinClass; +def IIC_AAS : InstrItinClass; +def IIC_DAA : InstrItinClass; +def IIC_DAS : InstrItinClass; +def IIC_BOUND : InstrItinClass; +def IIC_ARPL_REG : InstrItinClass; +def IIC_ARPL_MEM : InstrItinClass; +def IIC_MOVBE : InstrItinClass; + +def IIC_NOP : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[], [], []>; +// IssueWidth is analagous to the number of decode units. Core and its +// descendents, including Nehalem and SandyBridge have 4 decoders. +// Resources beyond the decoder operate on micro-ops and are bufferred +// so adjacent micro-ops don't directly compete. +// +// MinLatency=0 indicates that RAW dependencies can be decoded in the +// same cycle. +// +// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef +// indicates high latency opcodes. Alternatively, InstrItinData +// entries may be included here to define specific operand +// latencies. Since these latencies are not used for pipeline hazards, +// they do not need to be exact. +// +// The GenericModel contains no instruciton itineraries. +def GenericModel : SchedMachineModel { + let IssueWidth = 4; + let MinLatency = 0; + let LoadLatency = 4; + let HighLatency = 10; +} include "X86ScheduleAtom.td" - - - diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 77d4e56..8710261 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -106,7 +106,7 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, // set - InstrItinData] >, + InstrItinData] >, InstrItinData] >, // jcc InstrItinData] >, @@ -294,12 +294,237 @@ def AtomItineraries : ProcessorItineraries< InstrItinData] >, InstrItinData] >, + // MMX MOVs + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // other MMX + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // conversions + // from/to PD + InstrItinData] >, + InstrItinData] >, + // from/to PI + InstrItinData] >, + InstrItinData, + InstrStage<5, [Port1]>]>, + InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, InstrItinData] >, - InstrItinData] > + InstrItinData] >, + + InstrItinData, InstrStage<5, [Port1]>] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData, InstrStage<1, [Port1]>] >, + + // System instructions + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + // worst case for mov REG_CRx + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // LAR + InstrItinData] >, + InstrItinData] >, + // LSL + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // push control register, segment registers + InstrItinData] >, + InstrItinData] >, + // pop control register, segment registers + InstrItinData] >, + InstrItinData] >, + // VERR, VERW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // WRMSR, RDMSR + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + // SMSW, LMSW + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [Port1]>] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData, + InstrStage<1, [Port1]>] >, + InstrItinData] >, + InstrItinData, + InstrStage<1, [Port1]>] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + InstrItinData] >, + + InstrItinData] > ]>; +// Atom machine model. +def AtomModel : SchedMachineModel { + let IssueWidth = 2; // Allows 2 instructions per scheduling group. + let MinLatency = 1; // InstrStage cycles overrides MinLatency. + // OperandCycles may be used for expected latency. + let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. + let HighLatency = 30;// Expected, may be overriden by OperandCycles. + + let Itineraries = AtomItineraries; +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 9a04e35..00edcbc 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); - + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. @@ -62,13 +62,15 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - std::pair CallResult = - TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + TargetLowering:: + CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); + std::pair CallResult = + TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index ed1a409..9087852 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -39,10 +39,10 @@ unsigned char X86Subtarget:: ClassifyBlockAddressReference() const { if (isPICStyleGOT()) // 32-bit ELF targets. return X86II::MO_GOTOFF; - + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. return X86II::MO_PIC_BASE_OFFSET; - + // Direct static reference to label. return X86II::MO_NO_FLAG; } @@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Large model never uses stubs. if (TM.getCodeModel() == CodeModel::Large) return X86II::MO_NO_FLAG; - + if (isTargetDarwin()) { // If symbol visibility is hidden, the extra load is not needed if // target is x86-64 or the symbol is definitely defined in the current @@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { return X86II::MO_NO_FLAG; } - + if (isPICStyleGOT()) { // 32-bit ELF targets. // Extra load is needed for all externally visible. if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) return X86II::MO_GOTOFF; return X86II::MO_GOT; } - + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. // Determine whether we have a stub reference and/or whether the reference // is relative to the PIC base or not. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) @@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - + // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. if (isDecl || GV->hasCommonLinkage()) { // Hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; } - + // Otherwise, no stub. return X86II::MO_PIC_BASE_OFFSET; } - + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. // Determine whether we have a stub reference. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) return X86II::MO_NO_FLAG; - + // Unless we have a symbol with hidden visibility, we have to go through a // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. @@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Otherwise, no stub. return X86II::MO_NO_FLAG; } - + // Direct static reference to global. return X86II::MO_NO_FLAG; } @@ -196,33 +196,32 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} - // FIXME: AVX codegen support is not ready. - //if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } + if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - if (IsIntel && ((ECX >> 1) & 0x1)) { - HasCLMUL = true; - ToggleFeature(X86::FeatureCLMUL); + if ((ECX >> 1) & 0x1) { + HasPCLMUL = true; + ToggleFeature(X86::FeaturePCLMUL); } - if (IsIntel && ((ECX >> 12) & 0x1)) { - HasFMA3 = true; - ToggleFeature(X86::FeatureFMA3); + if ((ECX >> 12) & 0x1) { + HasFMA = true; + ToggleFeature(X86::FeatureFMA); } if (IsIntel && ((ECX >> 22) & 0x1)) { HasMOVBE = true; ToggleFeature(X86::FeatureMOVBE); } - if (IsIntel && ((ECX >> 23) & 0x1)) { + if ((ECX >> 23) & 0x1) { HasPOPCNT = true; ToggleFeature(X86::FeaturePOPCNT); } - if (IsIntel && ((ECX >> 25) & 0x1)) { + if ((ECX >> 25) & 0x1) { HasAES = true; ToggleFeature(X86::FeatureAES); } - if (IsIntel && ((ECX >> 29) & 0x1)) { + if ((ECX >> 29) & 0x1) { HasF16C = true; ToggleFeature(X86::FeatureF16C); } @@ -247,15 +246,22 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } // If it's Nehalem, unaligned memory access is fast. - // FIXME: Nehalem is family 6. Also include Westmere and later processors? - if (Family == 15 && Model == 26) { + // Include Westmere and Sandy Bridge as well. + // FIXME: add later processors. + if (IsIntel && ((Family == 6 && Model == 26) || + (Family == 6 && Model == 44) || + (Family == 6 && Model == 42))) { IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } // Set processor type. Currently only Atom is detected. - if (Family == 6 && Model == 28) { + if (Family == 6 && + (Model == 28 || Model == 38 || Model == 39 + || Model == 53 || Model == 54)) { X86ProcFamily = IntelAtom; + + UseLeaForSP = true; ToggleFeature(X86::FeatureLeaForSP); } @@ -289,9 +295,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } - if (IsIntel && MaxLevel >= 7) { + if (MaxLevel >= 7) { if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) { - if (EBX & 0x1) { + if (IsIntel && (EBX & 0x1)) { HasFSGSBase = true; ToggleFeature(X86::FeatureFSGSBase); } @@ -299,12 +305,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasBMI = true; ToggleFeature(X86::FeatureBMI); } - // FIXME: AVX2 codegen support is not ready. - //if ((EBX >> 5) & 0x1) { - // X86SSELevel = AVX2; - // ToggleFeature(X86::FeatureAVX2); - //} - if ((EBX >> 8) & 0x1) { + if (IsIntel && ((EBX >> 5) & 0x1)) { + X86SSELevel = AVX2; + ToggleFeature(X86::FeatureAVX2); + } + if (IsIntel && ((EBX >> 8) & 0x1)) { HasBMI2 = true; ToggleFeature(X86::FeatureBMI2); } @@ -313,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, + const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) @@ -325,8 +330,8 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasPOPCNT(false) , HasSSE4A(false) , HasAES(false) - , HasCLMUL(false) - , HasFMA3(false) + , HasPCLMUL(false) + , HasFMA(false) , HasFMA4(false) , HasXOP(false) , HasMOVBE(false) @@ -395,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } - if (X86ProcFamily == IntelAtom) { + if (X86ProcFamily == IntelAtom) PostRAScheduler = true; - InstrItins = getInstrItineraryForCPU(CPUName); - } + + InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. @@ -424,9 +429,7 @@ bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - //TODO: change back to ANTIDEP_CRITICAL when the - // X86 subtarget properly sets up post RA liveness. - Mode = TargetSubtargetInfo::ANTIDEP_NONE; + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 7fd832b..6841c5b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -55,7 +55,7 @@ protected: /// X86ProcFamily - X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; - + /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -85,11 +85,11 @@ protected: /// HasAES - Target has AES instructions bool HasAES; - /// HasCLMUL - Target has carry-less multiplication - bool HasCLMUL; + /// HasPCLMUL - Target has carry-less multiplication + bool HasPCLMUL; - /// HasFMA3 - Target has 3-operand fused multiply-add - bool HasFMA3; + /// HasFMA - Target has 3-operand fused multiply-add + bool HasFMA; /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; @@ -149,7 +149,7 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - + /// Instruction itineraries for scheduling InstrItineraryData InstrItins; @@ -203,8 +203,8 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAES() const { return HasAES; } - bool hasCLMUL() const { return HasCLMUL; } - bool hasFMA3() const { return HasFMA3; } + bool hasPCLMUL() const { return HasPCLMUL; } + bool hasFMA() const { return HasFMA; } bool hasFMA4() const { return HasFMA4; } bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } @@ -307,6 +307,8 @@ public: TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const; + bool postRAScheduler() const { return PostRAScheduler; } + /// getInstrItins = Return the instruction itineraries based on the /// subtarget selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 89c3884..b7ba568 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -140,39 +140,48 @@ public: } // namespace TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - return new X86PassConfig(this, PM); + X86PassConfig *PC = new X86PassConfig(this, PM); + + if (Subtarget.hasCMov()) + PC->enablePass(&EarlyIfConverterID); + + return PC; } bool X86PassConfig::addInstSelector() { // Install an instruction selector. - PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); + + // For ELF, cleanup any local-dynamic TLS accesses. + if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + addPass(createCleanupLocalDynamicTLSPass()); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!getX86Subtarget().is64Bit()) - PM->add(createGlobalBaseRegPass()); + addPass(createGlobalBaseRegPass()); return false; } bool X86PassConfig::addPreRegAlloc() { - PM->add(createX86MaxStackAlignmentHeuristicPass()); + addPass(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } bool X86PassConfig::addPostRegAlloc() { - PM->add(createX86FloatingPointStackifierPass()); + addPass(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } bool X86PassConfig::addPreEmitPass() { bool ShouldPrint = false; if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) { - PM->add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + addPass(createExecutionDependencyFixPass(&X86::VR128RegClass)); ShouldPrint = true; } if (getX86Subtarget().hasAVX() && UseVZeroUpper) { - PM->add(createX86IssueVZeroUpperPass()); + addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 718f35e..92aee0d 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,16 +9,19 @@ #include "X86TargetObjectFile.h" #include "X86TargetMachine.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" using namespace llvm; using namespace dwarf; -const MCExpr *X8664_MachoTargetObjectFile:: +const MCExpr *X86_64MachoTargetObjectFile:: getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { @@ -37,8 +40,14 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } -MCSymbol *X8664_MachoTargetObjectFile:: +MCSymbol *X86_64MachoTargetObjectFile:: getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI) const { return Mang->getSymbol(GV); } + +void +X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index a02a368..2d320c5 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -16,9 +16,9 @@ namespace llvm { - /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin + /// X86_64MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. - class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: virtual const MCExpr * getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, @@ -32,6 +32,12 @@ namespace llvm { MachineModuleInfo *MMI) const; }; + /// X86LinuxTargetObjectFile - This implementation is used for linux x86 + /// and x86-64. + class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF { + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + }; + } // end namespace llvm #endif diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 2fd78a7..80b75dc 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -145,7 +145,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { // to insert any VZEROUPPER instructions. This is constant-time, so it is // cheap in the common case of no ymm use. bool YMMUsed = false; - const TargetRegisterClass *RC = X86::VR256RegisterClass; + const TargetRegisterClass *RC = &X86::VR256RegClass; for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e; i++) { if (MRI.isPhysRegUsed(*i)) { @@ -205,7 +205,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, } - // The entry MBB for the function may set the inital state to dirty if + // The entry MBB for the function may set the initial state to dirty if // the function receives any YMM incoming arguments if (MBB == MF.begin()) { EntryState = ST_CLEAN; @@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, DebugLoc dl = I->getDebugLoc(); bool isControlFlow = MI->isCall() || MI->isReturn(); - // Shortcut: don't need to check regular instructions in dirty state. + // Shortcut: don't need to check regular instructions in dirty state. if (!isControlFlow && CurState == ST_DIRTY) continue; diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 0d59572..ca94f03 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -22,5 +22,7 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) +add_dependencies(LLVMXCoreCodeGen intrinsics_gen) + add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 8906b24..c76866f 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -18,9 +18,9 @@ #include "XCoreSubtarget.h" #include "XCoreTargetMachine.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -260,7 +260,17 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum, bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { - printOperand(MI, OpNo, O); + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); + } + +printOperand(MI, OpNo, O); return false; } diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 50fda58..a4e5647 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -78,8 +78,7 @@ static void storeToStack(MachineBasicBlock &MBB, //===----------------------------------------------------------------------===// XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), - STI(sti) { + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0) { // Do nothing } @@ -341,7 +340,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR); - const TargetRegisterClass *RC = XCore::GRRegsRegisterClass; + const TargetRegisterClass *RC = &XCore::GRRegsRegClass; XCoreFunctionInfo *XFI = MF.getInfo(); if (LRUsed) { MF.getRegInfo().setPhysRegUnused(XCore::LR); @@ -372,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, false)); } } - -void XCoreFrameLowering:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - -} diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index 4c51aa5..db1bbb6 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -22,7 +22,6 @@ namespace llvm { class XCoreSubtarget; class XCoreFrameLowering: public TargetFrameLowering { - const XCoreSubtarget &STI; public: XCoreFrameLowering(const XCoreSubtarget &STI); @@ -45,8 +44,6 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index fdf2b78..8643ffc 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -66,7 +66,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) Subtarget(*XTM.getSubtargetImpl()) { // Set up the register classes. - addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass); + addRegisterClass(MVT::i32, &XCore::GRRegsRegClass); // Compute derived properties from the register classes computeRegisterProperties(); @@ -485,12 +485,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Entry.Node = BasePtr; Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, IntPtrTy, false, false, + TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), Args, DAG, DL); + std::pair CallResult = LowerCallTo(CLI); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -547,12 +547,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const Entry.Node = Value; Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, + TargetLowering::CallLoweringInfo CLI(Chain, + Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__misaligned_store", getPointerTy()), Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -873,14 +874,19 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { /// XCore call implementation SDValue -XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, +XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + // XCore target does not yet support tail call optimization. isTailCall = false; @@ -913,7 +919,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // The ABI dictates there should be one stack slot available to the callee // on function entry (for saving lr). @@ -1036,7 +1042,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_XCore); @@ -1096,7 +1102,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_XCore); @@ -1121,8 +1127,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, llvm_unreachable(0); } case MVT::i32: - unsigned VReg = RegInfo.createVirtualRegister( - XCore::GRRegsRegisterClass); + unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } @@ -1172,8 +1177,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, offset -= StackSlotSize; SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); // Move argument from phys reg -> virt reg - unsigned VReg = RegInfo.createVirtualRegister( - XCore::GRRegsRegisterClass); + unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); RegInfo.addLiveIn(ArgRegs[i], VReg); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack @@ -1201,7 +1205,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, + bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; @@ -1222,7 +1226,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); @@ -1606,12 +1610,12 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::pair XCoreTargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; case 'r': - return std::make_pair(0U, XCore::GRRegsRegisterClass); + return std::make_pair(0U, &XCore::GRRegsRegClass); } } // Use the default implementation in TargetLowering to convert the register diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 0b63ecd..2874f00 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -151,7 +151,7 @@ namespace llvm { // Inline asm support std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + EVT VT) const; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; @@ -174,12 +174,7 @@ namespace llvm { SmallVectorImpl &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, - bool isVarArg, bool doesNotRet, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; virtual SDValue @@ -191,7 +186,7 @@ namespace llvm { virtual bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, + bool isVarArg, const SmallVectorImpl &ArgsFlags, LLVMContext &Context) const; }; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index b25a08d..ae646a2 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -741,14 +741,12 @@ let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { def BL_u10 : _FU10< - (outs), - (ins calltarget:$target, variable_ops), + (outs), (ins calltarget:$target), "bl $target", [(XCoreBranchLink immU10:$target)]>; def BL_lu10 : _FLU10< - (outs), - (ins calltarget:$target, variable_ops), + (outs), (ins calltarget:$target), "bl $target", [(XCoreBranchLink immU20:$target)]>; } @@ -796,7 +794,7 @@ def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size), def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size), "mkmsk $dst, $size", - [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>; + [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), -1))]>; def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", @@ -950,10 +948,10 @@ def ENDIN_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), // dgetreg def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i), "msync res[$i]", - [(int_xcore_msync GRRegs:$i)]>; + [(int_xcore_msync GRRegs:$i)]>; def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i), "mjoin res[$i]", - [(int_xcore_mjoin GRRegs:$i)]>; + [(int_xcore_mjoin GRRegs:$i)]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), @@ -988,7 +986,7 @@ def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src), let isCall=1, // All calls clobber the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR], Uses = [SP] in { -def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), +def BLA_1r : _F1R<(outs), (ins GRRegs:$addr), "bla $addr", [(XCoreBranchLink GRRegs:$addr)]>; } @@ -1038,7 +1036,7 @@ def GETET_0R : _F0R<(outs), (ins), def SSYNC_0r : _F0R<(outs), (ins), "ssync", - [(int_xcore_ssync)]>; + [(int_xcore_ssync)]>; let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index f3b4b4c..cdd0a08 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -92,6 +92,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { } bool +XCoreRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return requiresRegisterScavenging(MF); +} + +bool XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { return false; } @@ -205,8 +210,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Reg = MI.getOperand(0).getReg(); bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill(); - assert(XCore::GRRegsRegisterClass->contains(Reg) && - "Unexpected register operand"); + assert(XCore::GRRegsRegClass.contains(Reg) && "Unexpected register operand"); MachineBasicBlock &MBB = *MI.getParent(); @@ -217,7 +221,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!RS) report_fatal_error("eliminateFrameIndex Frame size too big: " + Twine(Offset)); - unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II, + unsigned ScratchReg = RS->scavengeRegister(&XCore::GRRegsRegClass, II, SPAdj); loadConstant(MBB, II, ScratchReg, Offset, dl); switch (MI.getOpcode()) { diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 7391cfd..c4dcb6b 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -50,6 +50,8 @@ public: bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool useFPForScavengingIndex(const MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 5afd5a1..11ec86b 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -55,7 +55,7 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { } bool XCorePassConfig::addInstSelector() { - PM->add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); + addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel())); return false; } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index e160f63..b94dd69 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -245,10 +245,7 @@ static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix, const ArgPromotion::IndicesVector &Longer) { if (Prefix.size() > Longer.size()) return false; - for (unsigned i = 0, e = Prefix.size(); i != e; ++i) - if (Prefix[i] != Longer[i]) - return false; - return true; + return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); } @@ -616,8 +613,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), - AttributesVec.end())); + NF->setAttributes(AttrListPtr::get(AttributesVec)); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -734,13 +730,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, "", Call); cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), - AttributesVec.end())); + cast(New)->setAttributes(AttrListPtr::get(AttributesVec)); } else { New = CallInst::Create(NF, Args, "", Call); cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), - AttributesVec.end())); + cast(New)->setAttributes(AttrListPtr::get(AttributesVec)); if (cast(Call)->isTailCall()) cast(New)->setTailCall(); } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 58b3551..3f6b1de 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,3 +20,5 @@ add_llvm_library(LLVMipo StripDeadPrototypes.cpp StripSymbols.cpp ) + +add_dependencies(LLVMipo intrinsics_gen) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 95aef27..fd23a93 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -238,7 +238,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { AttributesVec.push_back(PAL.getSlot(i)); if (Attributes FnAttrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); - PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); + PAL = AttrListPtr::get(AttributesVec); } Instruction *New; @@ -753,8 +753,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), - AttributesVec.end()); + AttrListPtr NewPAL = AttrListPtr::get(AttributesVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -816,8 +815,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); // Reconstruct the AttributesList based on the vector we constructed. - AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(), - AttributesVec.end()); + AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec); Instruction *New; if (InvokeInst *II = dyn_cast(Call)) { diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index d9911bf..4c7f0ed 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -53,12 +53,12 @@ namespace { I != E; ++I) { if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { I->setInitializer(0); - } else { - if (I->hasAvailableExternallyLinkage()) - continue; - if (I->getName() == "llvm.global_ctors") - continue; - } + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + if (I->getName() == "llvm.global_ctors") + continue; + } if (I->hasLocalLinkage()) I->setVisibility(GlobalValue::HiddenVisibility); @@ -69,10 +69,10 @@ namespace { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration()) { I->deleteBody(); - } else { - if (I->hasAvailableExternallyLinkage()) - continue; - } + } else { + if (I->hasAvailableExternallyLinkage()) + continue; + } if (I->hasLocalLinkage()) I->setVisibility(GlobalValue::HiddenVisibility); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 2b427aa..18c1c7b 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -65,7 +65,7 @@ bool GlobalDCE::runOnModule(Module &M) { for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Functions with external linkage are needed if they have a body - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + if (!I->isDiscardableIfUnused() && !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) GlobalIsNeeded(I); } @@ -75,7 +75,7 @@ bool GlobalDCE::runOnModule(Module &M) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() && + if (!I->isDiscardableIfUnused() && !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) GlobalIsNeeded(I); } @@ -84,7 +84,7 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible aliases are needed. - if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage()) + if (!I->isDiscardableIfUnused()) GlobalIsNeeded(I); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1522aa4..6d950d2 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -254,6 +254,8 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, GS.StoredType = GlobalStatus::isStored; } } + } else if (isa(I)) { + if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa(I)) { if (AnalyzeGlobal(I, GS, PHIUsers)) return true; } else if (isa(I)) { @@ -294,6 +296,168 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, return false; } +/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker +/// as a root? If so, we might not really want to eliminate the stores to it. +static bool isLeakCheckerRoot(GlobalVariable *GV) { + // A global variable is a root if it is a pointer, or could plausibly contain + // a pointer. There are two challenges; one is that we could have a struct + // the has an inner member which is a pointer. We recurse through the type to + // detect these (up to a point). The other is that we may actually be a union + // of a pointer and another type, and so our LLVM type is an integer which + // gets converted into a pointer, or our type is an [i8 x #] with a pointer + // potentially contained here. + + if (GV->hasPrivateLinkage()) + return false; + + SmallVector Types; + Types.push_back(cast(GV->getType())->getElementType()); + + unsigned Limit = 20; + do { + Type *Ty = Types.pop_back_val(); + switch (Ty->getTypeID()) { + default: break; + case Type::PointerTyID: return true; + case Type::ArrayTyID: + case Type::VectorTyID: { + SequentialType *STy = cast(Ty); + Types.push_back(STy->getElementType()); + break; + } + case Type::StructTyID: { + StructType *STy = cast(Ty); + if (STy->isOpaque()) return true; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + Type *InnerTy = *I; + if (isa(InnerTy)) return true; + if (isa(InnerTy)) + Types.push_back(InnerTy); + } + break; + } + } + if (--Limit == 0) return true; + } while (!Types.empty()); + return false; +} + +/// Given a value that is stored to a global but never read, determine whether +/// it's safe to remove the store and the chain of computation that feeds the +/// store. +static bool IsSafeComputationToRemove(Value *V) { + do { + if (isa(V)) + return true; + if (!V->hasOneUse()) + return false; + if (isa(V) || isa(V) || isa(V) || + isa(V)) + return false; + if (isAllocationFn(V)) + return true; + + Instruction *I = cast(V); + if (I->mayHaveSideEffects()) + return false; + if (GetElementPtrInst *GEP = dyn_cast(I)) { + if (!GEP->hasAllConstantIndices()) + return false; + } else if (I->getNumOperands() != 1) { + return false; + } + + V = I->getOperand(0); + } while (1); +} + +/// CleanupPointerRootUsers - This GV is a pointer root. Loop over all users +/// of the global and clean up any that obviously don't assign the global a +/// value that isn't dynamically allocated. +/// +static bool CleanupPointerRootUsers(GlobalVariable *GV) { + // A brief explanation of leak checkers. The goal is to find bugs where + // pointers are forgotten, causing an accumulating growth in memory + // usage over time. The common strategy for leak checkers is to whitelist the + // memory pointed to by globals at exit. This is popular because it also + // solves another problem where the main thread of a C++ program may shut down + // before other threads that are still expecting to use those globals. To + // handle that case, we expect the program may create a singleton and never + // destroy it. + + bool Changed = false; + + // If Dead[n].first is the only use of a malloc result, we can delete its + // chain of computation and the store to the global in Dead[n].second. + SmallVector, 32> Dead; + + // Constants can't be pointers to dynamically allocated memory. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); + UI != E;) { + User *U = *UI++; + if (StoreInst *SI = dyn_cast(U)) { + Value *V = SI->getValueOperand(); + if (isa(V)) { + Changed = true; + SI->eraseFromParent(); + } else if (Instruction *I = dyn_cast(V)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, SI)); + } + } else if (MemSetInst *MSI = dyn_cast(U)) { + if (isa(MSI->getValue())) { + Changed = true; + MSI->eraseFromParent(); + } else if (Instruction *I = dyn_cast(MSI->getValue())) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MSI)); + } + } else if (MemTransferInst *MTI = dyn_cast(U)) { + GlobalVariable *MemSrc = dyn_cast(MTI->getSource()); + if (MemSrc && MemSrc->isConstant()) { + Changed = true; + MTI->eraseFromParent(); + } else if (Instruction *I = dyn_cast(MemSrc)) { + if (I->hasOneUse()) + Dead.push_back(std::make_pair(I, MTI)); + } + } else if (ConstantExpr *CE = dyn_cast(U)) { + if (CE->use_empty()) { + CE->destroyConstant(); + Changed = true; + } + } else if (Constant *C = dyn_cast(U)) { + if (SafeToDestroyConstant(C)) { + C->destroyConstant(); + // This could have invalidated UI, start over from scratch. + Dead.clear(); + CleanupPointerRootUsers(GV); + return true; + } + } + } + + for (int i = 0, e = Dead.size(); i != e; ++i) { + if (IsSafeComputationToRemove(Dead[i].first)) { + Dead[i].second->eraseFromParent(); + Instruction *I = Dead[i].first; + do { + if (isAllocationFn(I)) + break; + Instruction *J = dyn_cast(I->getOperand(0)); + if (!J) + break; + I->eraseFromParent(); + I = J; + } while (1); + I->eraseFromParent(); + } + } + + return Changed; +} + /// CleanupConstantGlobalUsers - We just marked GV constant. Loop over all /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This @@ -517,7 +681,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), - GV->isThreadLocal(), + GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -550,7 +714,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, GlobalVariable::InternalLinkage, In, GV->getName()+"."+Twine(i), - GV->isThreadLocal(), + GV->getThreadLocalMode(), GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -810,13 +974,18 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); - CleanupConstantGlobalUsers(GV, 0, TD, TLI); + if (isLeakCheckerRoot(GV)) { + Changed |= CleanupPointerRootUsers(GV); + } else { + Changed = true; + CleanupConstantGlobalUsers(GV, 0, TD, TLI); + } if (GV->use_empty()) { + DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); + Changed = true; GV->eraseFromParent(); ++NumDeleted; } - Changed = true; } return Changed; } @@ -866,7 +1035,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, UndefValue::get(GlobalType), GV->getName()+".body", GV, - GV->isThreadLocal()); + GV->getThreadLocalMode()); // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update @@ -899,7 +1068,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), - GV->getName()+".init", GV->isThreadLocal()); + GV->getName()+".init", GV->getThreadLocalMode()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. @@ -1321,7 +1490,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), GV, - GV->isThreadLocal()); + GV->getThreadLocalMode()); FieldGlobals.push_back(NGV); unsigned TypeSize = TD->getTypeAllocSize(FieldTy); @@ -1567,8 +1736,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); - CI = dyn_cast(Malloc) ? - extractMallocCallFromBitCast(Malloc) : cast(Malloc); + if (BitCastInst *BCI = dyn_cast(Malloc)) + CI = cast(BCI->getOperand(0)); + else + CI = cast(Malloc); } GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD); @@ -1645,7 +1816,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GlobalValue::InternalLinkage, ConstantInt::getFalse(GV->getContext()), GV->getName()+".b", - GV->isThreadLocal()); + GV->getThreadLocalMode()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); @@ -1716,7 +1887,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// possible. If we make a change, return true. bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - if (!GV->hasLocalLinkage()) + if (!GV->isDiscardableIfUnused()) return false; // Do more involved optimizations if the global is internal. @@ -1729,6 +1900,9 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, return true; } + if (!GV->hasLocalLinkage()) + return false; + SmallPtrSet PHIUsers; GlobalStatus GS; @@ -1787,10 +1961,15 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (!GS.isLoaded) { DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); - // Delete any stores we can find to the global. We may not be able to - // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), - TD, TLI); + bool Changed; + if (isLeakCheckerRoot(GV)) { + // Delete any constant stores to the global. + Changed = CleanupPointerRootUsers(GV); + } else { + // Delete any stores we can find to the global. We may not be able to + // make it completely dead though. + Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI); + } // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1838,7 +2017,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, if (GV->use_empty()) { DEBUG(dbgs() << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"); + << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } else { @@ -1870,6 +2049,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, /// function, changing them to FastCC. static void ChangeCalleesToFastCall(Function *F) { for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + if (isa(*UI)) + continue; CallSite User(cast(*UI)); User.setCallingConv(CallingConv::Fast); } @@ -1890,6 +2071,8 @@ static AttrListPtr StripNest(const AttrListPtr &Attrs) { static void RemoveNestAttribute(Function *F) { F->setAttributes(StripNest(F->getAttributes())); for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ + if (isa(*UI)) + continue; CallSite User(cast(*UI)); User.setAttributes(StripNest(User.getAttributes())); } @@ -2045,7 +2228,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, // Create the new global and insert it next to the existing list. GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), CA, "", - GCL->isThreadLocal()); + GCL->getThreadLocalMode()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); @@ -2701,7 +2884,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, << " stores.\n"); for (DenseMap::const_iterator I = Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end(); - I != E; ++I) + I != E; ++I) CommitValueTo(I->second, I->first); for (SmallPtrSet::const_iterator I = Eval.getInvariants().begin(), E = Eval.getInvariants().end(); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index dc9cbfb..712888a 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -36,7 +36,7 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); -// This weirdly named statistic tracks the number of times that, when attemting +// This weirdly named statistic tracks the number of times that, when attempting // to inline a function A into B, we analyze the callers of B in order to see // if those would be more profitable and blocked inline steps. STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); @@ -201,19 +201,22 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, } unsigned Inliner::getInlineThreshold(CallSite CS) const { - int thres = InlineThreshold; + int thres = InlineThreshold; // -inline-threshold or else selected by + // overall opt level - // Listen to optsize when -inline-limit is not given. + // If -inline-threshold is not given, listen to the optsize attribute when it + // would decrease the threshold. Function *Caller = CS.getCaller(); - if (Caller && !Caller->isDeclaration() && - Caller->hasFnAttr(Attribute::OptimizeForSize) && - InlineLimit.getNumOccurrences() == 0) + bool OptSize = Caller && !Caller->isDeclaration() && + Caller->hasFnAttr(Attribute::OptimizeForSize); + if (!(InlineLimit.getNumOccurrences() > 0) && OptSize && OptSizeThreshold < thres) thres = OptSizeThreshold; - // Listen to inlinehint when it would increase the threshold. + // Listen to the inlinehint attribute when it would increase the threshold. Function *Callee = CS.getCalledFunction(); - if (HintThreshold > thres && Callee && !Callee->isDeclaration() && - Callee->hasFnAttr(Attribute::InlineHint)) + bool InlineHint = Callee && !Callee->isDeclaration() && + Callee->hasFnAttr(Attribute::InlineHint); + if (InlineHint && HintThreshold > thres) thres = HintThreshold; return thres; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 4f96afe4..97d7cdc 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/ADT/Statistic.h" #include #include @@ -132,7 +132,8 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (ShouldExtractLoop) { if (NumLoops == 0) return Changed; --NumLoops; - if (ExtractLoop(DT, L) != 0) { + CodeExtractor Extractor(DT, *L); + if (Extractor.extractCodeRegion() != 0) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. @@ -296,7 +297,7 @@ bool BlockExtractorPass::runOnModule(Module &M) { if (const InvokeInst *II = dyn_cast(BlocksToExtract[i]->getTerminator())) BlocksToExtractVec.push_back(II->getUnwindDest()); - ExtractBasicBlock(BlocksToExtractVec); + CodeExtractor(BlocksToExtractVec).extractCodeRegion(); } return !BlocksToExtract.empty(); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 0b01c38..9f70f66 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -45,22 +45,22 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" +#include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -389,7 +389,7 @@ bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { if (!C2) return false; // TODO: constant expressions with GEP or references to F1 or F2. if (C1->isNullValue() && C2->isNullValue() && - isEquivalentType(C1->getType(), C2->getType())) + isEquivalentType(C1->getType(), C2->getType())) return true; // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 // then they must have equal bit patterns. diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index d9d1d10..9c9910b 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -19,7 +19,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" using namespace llvm; @@ -122,7 +122,8 @@ Function* PartialInliner::unswitchFunction(Function* F) { DT.runOnFunction(*duplicateFunction); // Extract the body of the if. - Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + Function* extractedFunction + = CodeExtractor(toExtract, &DT).extractCodeRegion(); InlineFunctionInfo IFI; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index b5caa9a..80bfc1c 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -22,11 +22,12 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" @@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { // Strip any named types of their names. static void StripTypeNames(Module &M, bool PreserveDbgInfo) { - std::vector StructTypes; - M.findUsedStructTypes(StructTypes); + TypeFinder StructTypes; + StructTypes.run(M, false); for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index d070ccc..72cfe2c 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,3 +13,5 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) + +add_dependencies(LLVMInstCombine intrinsics_gen) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 199df51..0d5ef90 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,11 +11,11 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/TargetFolder.h" @@ -187,7 +187,7 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitMalloc(Instruction &FI); + Instruction *visitAllocSite(Instruction &FI); Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 05e702f..99b62f8 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -170,10 +170,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) if (Value *LHSV = dyn_castNegVal(LHS)) { - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } + if (!isa(RHS)) + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + return BinaryOperator::CreateNeg(NewAdd); + } return BinaryOperator::CreateSub(RHS, LHSV); } @@ -329,6 +330,20 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } + // Check for (x & y) + (x ^ y) + { + Value *A = 0, *B = 0; + if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && + (match(LHS, m_And(m_Specific(A), m_Specific(B))) || + match(LHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + + if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && + (match(RHS, m_And(m_Specific(A), m_Specific(B))) || + match(RHS, m_And(m_Specific(B), m_Specific(A))))) + return BinaryOperator::CreateOr(A, B); + } + return Changed ? &I : 0; } @@ -406,66 +421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { } -/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the -/// code necessary to compute the offset from the base pointer (without adding -/// in the base pointer). Return the result as a signed integer of intptr size. -Value *InstCombiner::EmitGEPOffset(User *GEP) { - TargetData &TD = *getTargetData(); - gep_type_iterator GTI = gep_type_begin(GEP); - Type *IntPtrTy = TD.getIntPtrType(GEP->getContext()); - Value *Result = Constant::getNullValue(IntPtrTy); - - // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. - bool isInBounds = cast(GEP)->isInBounds(); - - // Build a mask for high order bits. - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth); - - for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; - ++i, ++GTI) { - Value *Op = *i; - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; - if (ConstantInt *OpC = dyn_cast(Op)) { - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - - if (Size) - Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".offs"); - continue; - } - - Constant *Scale = ConstantInt::get(IntPtrTy, Size); - Constant *OC = - ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); - // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); - continue; - } - // Convert to correct type. - if (Op->getType() != IntPtrTy) - Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); - if (Size != 1) { - // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); - } - - // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); - } - return Result; -} - - - - /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. @@ -589,11 +544,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - // C - zext(bool) -> bool ? C - 1 : C - if (ZExtInst *ZI = dyn_cast(Op1)) - if (ZI->getSrcTy()->isIntegerTy(1)) - return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); - // C-(X+C2) --> (C-C2)-X ConstantInt *C2; if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 0dbe11d..7d0af0d 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -986,19 +986,23 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { bool Op1Ordered; unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + // uno && ord -> false + if (Op0Pred == 0 && Op1Pred == 0 && Op0Ordered != Op1Ordered) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Op1Pred == 0) { std::swap(LHS, RHS); std::swap(Op0Pred, Op1Pred); std::swap(Op0Ordered, Op1Ordered); } if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq + // uno && ueq -> uno && (uno || eq) -> uno // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) + if (!Op0Ordered && (Op0Ordered == Op1Ordered)) + return LHS; + if (Op0Ordered && (Op0Ordered == Op1Ordered)) return RHS; // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false if (!Op0Ordered) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); // ord && ueq -> ord && (uno || eq) -> oeq @@ -1932,10 +1936,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // A | ( A ^ B) -> A | B // A | (~A ^ B) -> A | ~B + // (A & B) | (A ^ B) if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { if (Op0 == A || Op0 == B) return BinaryOperator::CreateOr(A, B); + if (match(Op0, m_And(m_Specific(A), m_Specific(B))) || + match(Op0, m_And(m_Specific(B), m_Specific(A)))) + return BinaryOperator::CreateOr(A, B); + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { Value *Not = Builder->CreateNot(B, B->getName()+".not"); return BinaryOperator::CreateOr(Not, Op0); @@ -2212,7 +2221,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Op0I && Op1I && Op0I->isShift() && Op0I->getOpcode() == Op1I->getOpcode() && Op0I->getOperand(1) == Op1I->getOperand(1) && - (Op1I->hasOneUse() || Op1I->hasOneUse())) { + (Op0I->hasOneUse() || Op1I->hasOneUse())) { Value *NewOp = Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), Op0I->getName()); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 77e4727..d34fab1 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -172,8 +172,6 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); - if (isMalloc(&CI)) - return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -246,78 +244,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { - // We need target data for just about everything so depend on it. - if (!TD) break; - - Type *ReturnTy = CI.getType(); - uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL; - - // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); - - uint64_t Offset = 0; - uint64_t Size = -1ULL; - - // Try to look through constant GEPs. - if (GEPOperator *GEP = dyn_cast(Op1)) { - if (!GEP->hasAllConstantIndices()) break; - - // Get the current byte offset into the thing. Use the original - // operand in case we're looking through a bitcast. - SmallVector Ops(GEP->idx_begin(), GEP->idx_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return 0; - Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); - - Op1 = GEP->getPointerOperand()->stripPointerCasts(); - - // Make sure we're not a constant offset from an external - // global. - if (GlobalVariable *GV = dyn_cast(Op1)) - if (!GV->hasDefinitiveInitializer()) break; - } - - // If we've stripped down to a single global variable that we - // can know the size of then just return that. - if (GlobalVariable *GV = dyn_cast(Op1)) { - if (GV->hasDefinitiveInitializer()) { - Constant *C = GV->getInitializer(); - Size = TD->getTypeAllocSize(C->getType()); - } else { - // Can't determine size of the GV. - Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow); - return ReplaceInstUsesWith(CI, RetVal); - } - } else if (AllocaInst *AI = dyn_cast(Op1)) { - // Get alloca size. - if (AI->getAllocatedType()->isSized()) { - Size = TD->getTypeAllocSize(AI->getAllocatedType()); - if (AI->isArrayAllocation()) { - const ConstantInt *C = dyn_cast(AI->getArraySize()); - if (!C) break; - Size *= C->getZExtValue(); - } - } - } else if (CallInst *MI = extractMallocCall(Op1)) { - // Get allocation size. - Type* MallocType = getMallocAllocatedType(MI); - if (MallocType && MallocType->isSized()) - if (Value *NElems = getMallocArraySize(MI, TD, true)) - if (ConstantInt *NElements = dyn_cast(NElems)) - Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType); - } - - // Do not return "I don't know" here. Later optimization passes could - // make it possible to evaluate objectsize to a constant. - if (Size == -1ULL) - break; - - if (Size < Offset) { - // Out of bound reference? Negative index normalized to large - // index? Just return "I don't know". - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow)); - } - return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset)); + uint64_t Size; + if (getObjectSize(II->getArgOperand(0), Size, TD)) + return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); + return 0; } case Intrinsic::bswap: // bswap(bswap(x)) -> x @@ -694,6 +624,57 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + + // Handle mul by zero first: + if (isa(Arg0) || isa(Arg1)) { + return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); + } + + // Check for constant LHS & RHS - in this case we just simplify. + bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu); + VectorType *NewVT = cast(II->getType()); + unsigned NewWidth = NewVT->getElementType()->getIntegerBitWidth(); + if (ConstantDataVector *CV0 = dyn_cast(Arg0)) { + if (ConstantDataVector *CV1 = dyn_cast(Arg1)) { + VectorType* VT = cast(CV0->getType()); + SmallVector NewElems; + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + APInt CV0E = + (cast(CV0->getAggregateElement(i)))->getValue(); + CV0E = Zext ? CV0E.zext(NewWidth) : CV0E.sext(NewWidth); + APInt CV1E = + (cast(CV1->getAggregateElement(i)))->getValue(); + CV1E = Zext ? CV1E.zext(NewWidth) : CV1E.sext(NewWidth); + NewElems.push_back( + ConstantInt::get(NewVT->getElementType(), CV0E * CV1E)); + } + return ReplaceInstUsesWith(CI, ConstantVector::get(NewElems)); + } + + // Couldn't simplify - cannonicalize constant to the RHS. + std::swap(Arg0, Arg1); + } + + // Handle mul by one: + if (ConstantDataVector *CV1 = dyn_cast(Arg1)) { + if (ConstantInt *Splat = + dyn_cast_or_null(CV1->getSplatValue())) { + if (Splat->isOne()) { + if (Zext) + return CastInst::CreateZExtOrBitCast(Arg0, II->getType()); + // else + return CastInst::CreateSExtOrBitCast(Arg0, II->getType()); + } + } + } + + break; + } + case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. @@ -711,7 +692,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa(BI) || isMalloc(BI)) { + if (isa(BI)) { CannotRemove = true; break; } @@ -814,7 +795,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } @@ -898,6 +879,9 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { // visitCallSite - Improvements for call and invoke instructions. // Instruction *InstCombiner::visitCallSite(CallSite CS) { + if (isAllocLikeFn(CS.getInstruction())) + return visitAllocSite(*CS.getInstruction()); + bool Changed = false; // If the callee is a pointer to a function, attempt to move any casts to the @@ -933,24 +917,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { } if (isa(Callee) || isa(Callee)) { - // This instruction is not reachable, just remove it. We insert a store to - // undef so that we know that this code is not reachable, despite the fact - // that we can't modify the CFG here. - new StoreInst(ConstantInt::getTrue(Callee->getContext()), - UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), - CS.getInstruction()); - // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) ReplaceInstUsesWith(*CS.getInstruction(), UndefValue::get(CS.getInstruction()->getType())); - if (InvokeInst *II = dyn_cast(CS.getInstruction())) { - // Don't break the CFG, insert a dummy cond branch. - BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - ConstantInt::getTrue(Callee->getContext()), II); + if (isa(CS.getInstruction())) { + // Can't remove an invoke because we cannot change the CFG. + return 0; } + + // This instruction is not reachable, just remove it. We insert a store to + // undef so that we know that this code is not reachable, despite the fact + // that we can't modify the CFG here. + new StoreInst(ConstantInt::getTrue(Callee->getContext()), + UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), + CS.getInstruction()); + return EraseInstFromFunction(*CS.getInstruction()); } @@ -1194,8 +1178,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), - attrVec.end()); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec); Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { @@ -1367,8 +1350,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), - NewAttrs.end()); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs); Instruction *NewCaller; if (InvokeInst *II = dyn_cast(Caller)) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 39279f4..555b442 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -34,7 +34,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, if (BinaryOperator *I = dyn_cast(Val)) { // Cannot look past anything that might overflow. OverflowingBinaryOperator *OBI = dyn_cast(Val); - if (OBI && !OBI->hasNoUnsignedWrap()) { + if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) { Scale = 1; Offset = 0; return Val; @@ -648,10 +648,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear) { if (!I) return false; // If the input is a truncate from the destination type, we can trivially - // eliminate it, even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa(I) && I->getOperand(0)->getType() == Ty) + // eliminate it. + if (isa(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -992,11 +990,8 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) { Instruction *I = dyn_cast(V); if (!I) return false; - // If this is a truncate from the dest type, we can trivially eliminate it, - // even if it has multiple uses. - // FIXME: This is currently disabled until codegen can handle this without - // pessimizing code, PR5997. - if (0 && isa(I) && I->getOperand(0)->getType() == Ty) + // If this is a truncate from the dest type, we can trivially eliminate it. + if (isa(I) && I->getOperand(0)->getType() == Ty) return true; // We can't extend or shrink something that has multiple uses: doing so would @@ -1341,10 +1336,9 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // non-type-safe code. if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0)) && GEP->hasAllConstantIndices()) { - // We are guaranteed to get a constant from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(GEP)); - int64_t Offset = OffsetV->getSExtValue(); - + SmallVector Ops(GEP->idx_begin(), GEP->idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); + // Get the base pointer input of the bitcast, and the type it points to. Value *OrigBase = cast(GEP->getOperand(0))->getOperand(0); Type *GEPIdxTy = diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index ab2987f..bdd310e 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1035,7 +1035,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = RHS->getValue().zext(SrcBits); - NewRHS |= KnownOne; + NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits); return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantInt::get(ICI.getContext(), NewRHS)); } @@ -2580,10 +2580,25 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } + // Transform (zext A) == (B & (1< A == (trunc B) + // and (B & (1< A == (trunc B) + ConstantInt *Cst1; + if ((Op0->hasOneUse() && + match(Op0, m_ZExt(m_Value(A))) && + match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) || + (Op1->hasOneUse() && + match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) && + match(Op1, m_ZExt(m_Value(A))))) { + APInt Pow2 = Cst1->getValue() + 1; + if (Pow2.isPowerOf2() && isa(A->getType()) && + Pow2.logBase2() == cast(A->getType())->getBitWidth()) + return new ICmpInst(I.getPredicate(), A, + Builder->CreateTrunc(B, A->getType())); + } + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to // "icmp (and X, mask), cst" uint64_t ShAmt = 0; - ConstantInt *Cst1; if (Op0->hasOneUse() && match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) && @@ -2809,7 +2824,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index b2f2e24..c485844 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -22,72 +22,6 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); -// Try to kill dead allocas by walking through its uses until we see some use -// that could escape. This is a conservative analysis which tries to handle -// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things -// left after inlining and SROA finish chewing on an alloca. -static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { - SmallVector Worklist, DeadStores; - Worklist.push_back(&AI); - do { - Instruction *PI = Worklist.pop_back_val(); - for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); - UI != UE; ++UI) { - Instruction *I = cast(*UI); - switch (I->getOpcode()) { - default: - // Give up the moment we see something we can't handle. - return 0; - - case Instruction::GetElementPtr: - case Instruction::BitCast: - Worklist.push_back(I); - continue; - - case Instruction::Call: - // We can handle a limited subset of calls to no-op intrinsics. - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - continue; - default: - return 0; - } - } - // Reject everything else. - return 0; - - case Instruction::Store: { - // Stores into the alloca are only live if the alloca is live. - StoreInst *SI = cast(I); - // We can eliminate atomic stores, but not volatile. - if (SI->isVolatile()) - return 0; - // The store is only trivially safe if the poniter is the destination - // as opposed to the value. We're conservative here and don't check for - // the case where we store the address of a dead alloca into a dead - // alloca. - if (SI->getPointerOperand() != PI) - return 0; - DeadStores.push_back(I); - continue; - } - } - } - } while (!Worklist.empty()); - - // The alloca is dead. Kill off all the stores to it, and then replace it - // with undef. - while (!DeadStores.empty()) - IC.EraseInstFromFunction(*DeadStores.pop_back_val()); - return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); -} - Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -106,7 +40,6 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - assert(isa(AI) && "Unknown type of allocation inst!"); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); @@ -135,22 +68,54 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } - if (TD && isa(AI) && AI.getAllocatedType()->isSized()) { - // If alloca'ing a zero byte object, replace the alloca with a null pointer. - // Note that we only do this for alloca's, because malloc should allocate - // and return a unique pointer, even for a zero byte allocation. - if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - + if (TD && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); + + // Move all alloca's of zero byte objects to the entry block and merge them + // together. Note that we only do this for alloca's, because malloc should + // allocate and return a unique pointer, even for a zero byte allocation. + if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { + // For a zero sized alloca there is no point in doing an array allocation. + // This is helpful if the array size is a complicated expression not used + // elsewhere. + if (AI.isArrayAllocation()) { + AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); + return &AI; + } + + // Get the first instruction in the entry block. + BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); + Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); + if (FirstInst != &AI) { + // If the entry block doesn't start with a zero-size alloca then move + // this one to the start of the entry block. There is no problem with + // dominance as the array size was forced to a constant earlier already. + AllocaInst *EntryAI = dyn_cast(FirstInst); + if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || + TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { + AI.moveBefore(FirstInst); + return &AI; + } + + // Replace this zero-sized alloca with the one at the start of the entry + // block after ensuring that the address will be aligned enough for both + // types. + unsigned MaxAlign = + std::max(TD->getPrefTypeAlignment(EntryAI->getAllocatedType()), + TD->getPrefTypeAlignment(AI.getAllocatedType())); + EntryAI->setAlignment(MaxAlign); + if (AI.getType() != EntryAI->getType()) + return new BitCastInst(EntryAI, AI.getType()); + return ReplaceInstUsesWith(AI, EntryAI); + } + } } - // Try to aggressively remove allocas which are only used for GEPs, lifetime - // markers, and stores. This happens when SROA iteratively promotes stores - // out of the alloca, and we need to cleanup after it. - return removeDeadAlloca(*this, AI); + // At last, use the generic allocation site handler to aggressively remove + // unused allocas. + return visitAllocSite(AI); } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 5168e2a..35a0bbb 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -464,9 +464,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // X udiv (C1 << N), where C1 is "1< X >> (N+C2) { const APInt *CI; Value *N; - if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) { + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || + match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { if (*CI != 1) N = Builder->CreateAdd(N, ConstantInt::get(I.getType(),CI->logBase2())); + if (ZExtInst *Z = dyn_cast(Op1)) + N = Builder->CreateZExt(N, Z->getDestTy()); if (I.isExact()) return BinaryOperator::CreateExactLShr(Op0, N); return BinaryOperator::CreateLShr(Op0, N); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index e727b2c..291e800 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -129,6 +129,12 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->isCast()) { if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) return 0; + // The select condition may be a vector. We may only change the operand + // type if the vector width remains the same (and matches the condition). + Type *CondTy = SI.getCondition()->getType(); + if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != + FI->getOperand(0)->getType()->getVectorNumElements()) + return 0; } else { return 0; // unknown unary op. } @@ -498,7 +504,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // NOTE: if we wanted to, this is where to detect integer MIN/MAX - if (isa(CmpRHS)) { + if (CmpRHS != CmpLHS && isa(CmpRHS)) { if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { // Transform (X == C) ? X : Y -> (X == C) ? C : Y SI.setOperand(1, CmpRHS); @@ -875,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast(TrueVal)) { if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } if (SelectInst *FalseSI = dyn_cast(FalseVal)) { if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -893,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } + if (VectorType* VecTy = dyn_cast(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + } + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b31049e..4bb2403 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -151,7 +151,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't // profitable unless we know the and'd out bits are already zero. - if (CI->getZExtValue() > NumBits) { + if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) { unsigned LowBits = CI->getZExtValue() - NumBits; if (MaskedValueIsZero(I->getOperand(0), APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits)) @@ -529,6 +529,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ShiftOp = 0; if (ShiftOp && isa(ShiftOp->getOperand(1))) { + + // This is a constant shift of a constant shift. Be careful about hiding + // shl instructions behind bit masks. They are used to represent multiplies + // by a constant, and it is important that simple arithmetic expressions + // are still recognizable by scalar evolution. + // + // The transforms applied to shl are very similar to the transforms applied + // to mul by constant. We can be more aggressive about optimizing right + // shifts. + // + // Combinations of right and left shifts will still be optimized in + // DAGCombine where scalar evolution no longer applies. + ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); @@ -554,13 +567,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } if (ShiftAmt1 == ShiftAmt2) { - // If we have ((X >>? C) << C), turn this into X & (-1 << C). - if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, - ConstantInt::get(I.getContext(),Mask)); - } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -570,28 +576,23 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; - - // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2) + + // (X >>?,exact C1) << C2 --> X << (C2-C1) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - if (ShiftOp->isExact()) { - // (X >>?,exact C1) << C2 --> X << (C2-C1) - BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, - X, ShiftDiffCst); - NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); - return NewShl; - } - Value *Shift = Builder->CreateShl(X, ShiftDiffCst); - - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); + BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl, + X, ShiftDiffCst); + NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + NewShl->setHasNoSignedWrap(I.hasNoSignedWrap()); + return NewShl; } - + // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { @@ -627,24 +628,19 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, assert(ShiftAmt2 < ShiftAmt1); uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2; - // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2) + // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) + // The inexact version is deferred to DAGCombine so we don't hide shl + // behind a bit mask. if (I.getOpcode() == Instruction::Shl && - ShiftOp->getOpcode() != Instruction::Shl) { + ShiftOp->getOpcode() != Instruction::Shl && + ShiftOp->isExact()) { ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff); - if (ShiftOp->isExact()) { - // (X >>?exact C1) << C2 --> X >>?exact (C1-C2) - BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), - X, ShiftDiffCst); - NewShr->setIsExact(true); - return NewShr; - } - Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), - X, ShiftDiffCst); - APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, - ConstantInt::get(I.getContext(),Mask)); + BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(), + X, ShiftDiffCst); + NewShr->setIsExact(true); + return NewShr; } - + // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr && ShiftOp->getOpcode() == Instruction::Shl) { diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 125c74a..54be8ed 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } break; } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } case Instruction::BitCast: { // Vector->vector casts only. VectorType *VTy = dyn_cast(I->getOperand(0)->getType()); @@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; case Instruction::Call: { IntrinsicInst *II = dyn_cast(I); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 066b2ec..68ecd51 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -87,30 +87,34 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { } +Value *InstCombiner::EmitGEPOffset(User *GEP) { + return llvm::EmitGEPOffset(Builder, *getTargetData(), GEP); +} + /// ShouldChangeType - Return true if it is desirable to convert a computation /// from 'From' to 'To'. We don't want to convert from a legal to an illegal /// type for example, or from a smaller to a larger illegal type. bool InstCombiner::ShouldChangeType(Type *From, Type *To) const { assert(From->isIntegerTy() && To->isIntegerTy()); - + // If we don't have TD, we don't know if the source/dest are legal. if (!TD) return false; - + unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); bool FromLegal = TD->isLegalInteger(FromWidth); bool ToLegal = TD->isLegalInteger(ToWidth); - + // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. if (FromLegal && !ToLegal) return false; - + // Otherwise, if both are illegal, do not increase the size of the result. We // do allow things like i160 -> i64, but not i64 -> i160. if (!FromLegal && !ToLegal && ToWidth > FromWidth) return false; - + return true; } @@ -127,7 +131,7 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { // We reason about Add and Sub Only. Instruction::BinaryOps Opcode = I.getOpcode(); - if (Opcode != Instruction::Add && + if (Opcode != Instruction::Add && Opcode != Instruction::Sub) { return false; } @@ -203,7 +207,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. if (MaintainNoSignedWrap(I, B, C) && - (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { + (!Op0 || (isa(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. I.clearSubclassOptionalData(); @@ -211,7 +215,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { } else { I.clearSubclassOptionalData(); } - + Changed = true; ++NumReassoc; continue; @@ -540,7 +544,7 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); - + if (BinaryOperator *BO = dyn_cast(&I)) return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName()+".op"); @@ -579,7 +583,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) return 0; } - + Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this); @@ -599,7 +603,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return 0; - + // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the // uses into the PHI. @@ -613,7 +617,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Otherwise, we can replace *all* users with the new PHI we form. } - + // Check to see if all of the operands of the PHI are simple constants // (constantint/constantfp/undef). If there is one non-constant value, // remember the BB it is in. If there is more than one or if *it* is a PHI, @@ -627,7 +631,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (isa(InVal)) return 0; // Itself a phi. if (NonConstBB) return 0; // More than one non-const value. - + NonConstBB = PN->getIncomingBlock(i); // If the InVal is an invoke at the end of the pred block, then we can't @@ -635,14 +639,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { if (InvokeInst *II = dyn_cast(InVal)) if (II->getParent() == NonConstBB) return 0; - + // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (NonConstBB == I.getParent()) return 0; } - + // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only @@ -656,12 +660,12 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues()); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); - + // If we are going to have to insert a new computation, do so right before the // predecessors terminator. if (NonConstBB) Builder->SetInsertPoint(NonConstBB->getTerminator()); - + // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast(&I)) { // We only currently try to fold the condition of a select when it is a phi, @@ -706,20 +710,20 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PN->getIncomingValue(i), C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } else { + } else { CastInst *CI = cast(&I); Type *RetTy = CI->getType(); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); - else + else InV = Builder->CreateCast(CI->getOpcode(), PN->getIncomingValue(i), I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } - + for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast(*UI++); @@ -734,11 +738,11 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { /// or not there is a sequence of GEP indices into the type that will land us at /// the specified offset. If so, fill them into NewIndices and return the /// resultant element type, otherwise return null. -Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, +Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, SmallVectorImpl &NewIndices) { if (!TD) return 0; if (!Ty->isSized()) return 0; - + // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] @@ -747,7 +751,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; Offset -= FirstIdx*TySize; - + // Handle hosts where % returns negative instead of values [0..TySize). if (Offset < 0) { --FirstIdx; @@ -756,24 +760,24 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, } assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); - + // Index into the types. If we fail, set OrigBase to null. while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty)) return 0; - + if (StructType *STy = dyn_cast(Ty)) { const StructLayout *SL = TD->getStructLayout(STy); assert(Offset < (int64_t)SL->getSizeInBytes() && "Offset must stay within the indexed type"); - + unsigned Elt = SL->getElementContainingOffset(Offset); NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), Elt)); - + Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast(Ty)) { @@ -787,7 +791,7 @@ Type *InstCombiner::FindElementAtOffset(Type *Ty, int64_t Offset, return 0; } } - + return Ty; } @@ -948,7 +952,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Res->setIsInBounds(GEP.isInBounds()); return Res; } - + if (ArrayType *XATy = dyn_cast(StrippedPtrTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? @@ -981,16 +985,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // V and GEP are both pointer types --> BitCast return new BitCastInst(NewGEP, GEP.getType()); } - + // Transform things like: // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - + if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); - + // Check to see if "tmp" is a scale by a multiple of ArrayEltSize. We // allow either a mul, shift, or constant here. Value *NewIdx = 0; @@ -1015,7 +1019,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx = Inst->getOperand(0); } } - + // If the index will be to exactly the right offset with the scale taken // out, perform the transformation. Note, we don't know whether Scale is // signed or not. We'll use unsigned version of division/modulo @@ -1054,10 +1058,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { - // Determine how much the GEP moves the pointer. We are guaranteed to get - // a constant back from EmitGEPOffset. - ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP)); - int64_t Offset = OffsetV->getSExtValue(); + // Determine how much the GEP moves the pointer. + SmallVector Ops(GEP.idx_begin(), GEP.idx_end()); + int64_t Offset = TD->getIndexedOffset(GEP.getPointerOperandType(), Ops); // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. @@ -1065,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. if (isa(BCI->getOperand(0)) || - isMalloc(BCI->getOperand(0))) { + isAllocationFn(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1078,7 +1081,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } return new BitCastInst(BCI->getOperand(0), GEP.getType()); } - + // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. @@ -1089,68 +1092,103 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *NGEP = GEP.isInBounds() ? Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices) : Builder->CreateGEP(BCI->getOperand(0), NewIndices); - + if (NGEP->getType() == GEP.getType()) return ReplaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); return new BitCastInst(NGEP, GEP.getType()); } } - } - + } + return 0; } -static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl &Users, - int Depth = 0) { - if (Depth == 8) - return false; +static bool +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users) { + SmallVector Worklist; + Worklist.push_back(AI); - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - User *U = *UI; - if (isFreeCall(U)) { - Users.push_back(U); - continue; - } - if (ICmpInst *ICI = dyn_cast(U)) { - if (ICI->isEquality() && isa(ICI->getOperand(1))) { - Users.push_back(ICI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; + ++UI) { + Instruction *I = cast(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return false; + + case Instruction::BitCast: + case Instruction::GetElementPtr: + Users.push_back(I); + Worklist.push_back(I); continue; - } - } - if (BitCastInst *BCI = dyn_cast(U)) { - if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { - Users.push_back(BCI); + + case Instruction::ICmp: { + ICmpInst *ICI = cast(I); + // We can fold eq/ne comparisons with null to false/true, respectively. + if (!ICI->isEquality() || !isa(ICI->getOperand(1))) + return false; + Users.push_back(I); continue; } - } - if (GetElementPtrInst *GEPI = dyn_cast(U)) { - if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { - Users.push_back(GEPI); + + case Instruction::Call: + // Ignore no-op and store intrinsics. + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: { + MemIntrinsic *MI = cast(II); + if (MI->isVolatile() || MI->getRawDest() != PI) + return false; + } + // fall through + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + Users.push_back(I); + continue; + } + } + + if (isFreeCall(I)) { + Users.push_back(I); + continue; + } + return false; + + case Instruction::Store: { + StoreInst *SI = cast(I); + if (SI->isVolatile() || SI->getPointerOperand() != PI) + return false; + Users.push_back(I); continue; } - } - if (IntrinsicInst *II = dyn_cast(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - Users.push_back(II); - continue; } + llvm_unreachable("missing a return?"); } - return false; - } + } while (!Worklist.empty()); return true; } -Instruction *InstCombiner::visitMalloc(Instruction &MI) { +Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector Users; - if (IsOnlyNullComparedAndFreed(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null(&*Users[i]); if (!I) continue; @@ -1161,9 +1199,23 @@ Instruction *InstCombiner::visitMalloc(Instruction &MI) { C->isFalseWhenEqual())); } else if (isa(I) || isa(I)) { ReplaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::objectsize) { + ConstantInt *CI = cast(II->getArgOperand(1)); + uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); + } } EraseInstFromFunction(*I); } + + if (InvokeInst *II = dyn_cast(&MI)) { + // Replace invoke with a NOP intrinsic to maintain the original CFG + Module *M = II->getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); + InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), + ArrayRef(), "", II->getParent()); + } return EraseInstFromFunction(MI); } return 0; @@ -1181,7 +1233,7 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return EraseInstFromFunction(FI); } - + // If we have 'free null' delete the instruction. This can happen in stl code // when lots of inlining happens. if (isa(Op)) @@ -1207,14 +1259,14 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Cannonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), + if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), TrueDest, FalseDest)) && BI.getCondition()->hasOneUse()) if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || FPred == FCmpInst::FCMP_OGE) { FCmpInst *Cond = cast(BI.getCondition()); Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - + // Swap Destinations and condition. BI.swapSuccessors(); Worklist.Add(Cond); @@ -1280,7 +1332,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { } return 0; // Can't handle other constants } - + if (InsertValueInst *IV = dyn_cast(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; @@ -1329,7 +1381,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = extractvalue { i32, { i32 } } %I, 1, 0 // with // %E extractvalue { i32 } { i32 42 }, 0 - return ExtractValueInst::Create(IV->getInsertedValueOperand(), + return ExtractValueInst::Create(IV->getInsertedValueOperand(), makeArrayRef(exti, exte)); } if (IntrinsicInst *II = dyn_cast(Agg)) { @@ -1349,7 +1401,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); } - + // If the normal result of the add is dead, and the RHS is a constant, // we can transform this into a range comparison. // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 @@ -1798,7 +1850,7 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. /// -static bool AddReachableCodeToWorklist(BasicBlock *BB, +static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet &Visited, InstCombiner &IC, const TargetData *TD, @@ -1812,13 +1864,13 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, do { BB = Worklist.pop_back_val(); - + // We have now visited this block! If we've already been here, ignore it. if (!Visited.insert(BB)) continue; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = BBI++; - + // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst)) { ++NumDeadInst; @@ -1826,7 +1878,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Inst->eraseFromParent(); continue; } - + // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa(Inst->getOperand(0))) if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { @@ -1837,7 +1889,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Inst->eraseFromParent(); continue; } - + if (TD) { // See if we can constant fold its operands. for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); @@ -1881,17 +1933,17 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Worklist.push_back(ReachableBB); continue; } - + // Otherwise it is the default destination. Worklist.push_back(SI->getDefaultDest()); continue; } } - + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) Worklist.push_back(TI->getSuccessor(i)); } while (!Worklist.empty()); - + // Once we've found all of the instructions to add to instcombine's worklist, // add them in reverse order. This way instcombine will visit from the top // of the function down. This jives well with the way that it adds all uses @@ -1899,13 +1951,13 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // some N^2 behavior in pathological cases. IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], InstrsForInstCombineWorklist.size()); - + return MadeIRChange; } bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; - + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); @@ -1976,13 +2028,13 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { BasicBlock *BB = I->getParent(); Instruction *UserInst = cast(I->use_back()); BasicBlock *UserParent; - + // Get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) UserParent = PN->getIncomingBlock(I->use_begin().getUse()); else UserParent = UserInst->getParent(); - + if (UserParent != BB) { bool UserIsSuccessor = false; // See if the user is one of our successors. @@ -2004,7 +2056,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Now that we have an instruction, try combining it to simplify it. Builder->SetInsertPoint(I->getParent(), I); Builder->SetCurrentDebugLocation(I->getDebugLoc()); - + #ifndef NDEBUG std::string OrigI; #endif @@ -2069,14 +2121,14 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); - + /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. - IRBuilder + IRBuilder TheBuilder(F.getContext(), TargetFolder(TD), InstCombineIRInserter(Worklist)); Builder = &TheBuilder; - + bool EverMadeChange = false; // Lower dbg.declare intrinsics otherwise their value may be clobbered @@ -2087,7 +2139,7 @@ bool InstCombiner::runOnFunction(Function &F) { unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) EverMadeChange = true; - + Builder = 0; return EverMadeChange; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b43b9e5..bf35eac 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,20 +16,23 @@ #define DEBUG_TYPE "asan" #include "FunctionBlackList.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/InlineAsm.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Type.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include "llvm/Target/TargetData.h" @@ -37,7 +40,6 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Type.h" #include #include @@ -47,6 +49,7 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; +static const uint64_t kDefaultShadowOffsetAndroid = 0; static const size_t kMaxStackMallocSize = 1 << 16; // 64K static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; @@ -70,6 +73,9 @@ static const int kAsanStackMidRedzoneMagic = 0xf2; static const int kAsanStackRightRedzoneMagic = 0xf3; static const int kAsanStackPartialRedzoneMagic = 0xf4; +// Accesses sizes are powers of two: 1, 2, 4, 8, 16. +static const size_t kNumberOfAccessSizes = 5; + // Command-line flags. // This flag may need to be replaced with -f[no-]asan-reads. @@ -77,6 +83,17 @@ static cl::opt ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); static cl::opt ClInstrumentWrites("asan-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); +static cl::opt ClInstrumentAtomics("asan-instrument-atomics", + cl::desc("instrument atomic instructions (rmw, cmpxchg)"), + cl::Hidden, cl::init(true)); +// This flag limits the number of instructions to be instrumented +// in any given BB. Normally, this should be set to unlimited (INT_MAX), +// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary +// set it to 10000. +static cl::opt ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb", + cl::init(10000), + cl::desc("maximal number of instructions to instrument in any given BB"), + cl::Hidden); // This flag may need to be replaced with -f[no]asan-stack. static cl::opt ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); @@ -125,18 +142,29 @@ static cl::opt ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), namespace { +/// An object of this type is created while instrumenting every function. +struct AsanFunctionContext { + AsanFunctionContext(Function &Function) : F(Function) { } + + Function &F; +}; + /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public ModulePass { AddressSanitizer(); virtual const char *getPassName() const; - void instrumentMop(Instruction *I); - void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, + void instrumentMop(AsanFunctionContext &AFC, Instruction *I); + void instrumentAddress(AsanFunctionContext &AFC, + Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite); - Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr, - bool IsWrite, uint32_t TypeSize); - bool instrumentMemIntrinsic(MemIntrinsic *MI); - void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, - Value *Size, + Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, + Value *ShadowValue, uint32_t TypeSize); + Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC, + bool IsWrite, size_t AccessSizeIndex); + bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); + void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, + Instruction *OrigIns, Value *Addr, + Value *Size, Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool handleFunction(Module &M, Function &F); @@ -144,7 +172,6 @@ struct AddressSanitizer : public ModulePass { bool poisonStackInFunction(Module &M, Function &F); virtual bool runOnModule(Module &M); bool insertGlobalRedzones(Module &M); - BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp); static char ID; // Pass identification, replacement for typeid private: @@ -163,11 +190,11 @@ struct AddressSanitizer : public ModulePass { return getAlignedSize(SizeInBytes); } + Function *checkInterfaceFunction(Constant *FuncOrBitcast); void PoisonStack(const ArrayRef &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); - Module *CurrentModule; LLVMContext *C; TargetData *TD; uint64_t MappingOffset; @@ -180,7 +207,11 @@ struct AddressSanitizer : public ModulePass { Function *AsanInitFunction; Instruction *CtorInsertBefore; OwningPtr BL; + // This array is indexed by AccessIsWrite and log2(AccessSize). + Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + InlineAsm *EmptyAsm; }; + } // namespace char AddressSanitizer::ID = 0; @@ -196,6 +227,12 @@ const char *AddressSanitizer::getPassName() const { return "AddressSanitizer"; } +static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { + size_t Res = CountTrailingZeros_32(TypeSize / 8); + assert(Res < kNumberOfAccessSizes); + return Res; +} + // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); @@ -206,29 +243,32 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { // Split the basic block and insert an if-then code. // Before: // Head -// SplitBefore +// Cmp // Tail // After: // Head // if (Cmp) -// NewBasicBlock -// SplitBefore +// ThenBlock // Tail // -// Returns the NewBasicBlock's terminator. -BranchInst *AddressSanitizer::splitBlockAndInsertIfThen( - Instruction *SplitBefore, Value *Cmp) { +// If ThenBlock is zero, a new block is created and its terminator is returned. +// Otherwize 0 is returned. +static BranchInst *splitBlockAndInsertIfThen(Value *Cmp, + BasicBlock *ThenBlock = 0) { + Instruction *SplitBefore = cast(Cmp)->getNextNode(); BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); - BasicBlock *NewBasicBlock = - BasicBlock::Create(*C, "", Head->getParent()); - BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock, - /*ifFalse*/Tail, - Cmp); + BranchInst *CheckTerm = 0; + if (!ThenBlock) { + LLVMContext &C = Head->getParent()->getParent()->getContext(); + ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + CheckTerm = BranchInst::Create(Tail, ThenBlock); + } + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock); return CheckTerm; } @@ -242,12 +282,13 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { MappingOffset)); } -void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, +void AddressSanitizer::instrumentMemIntrinsicParam( + AsanFunctionContext &AFC, Instruction *OrigIns, Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { // Check the first byte. { IRBuilder<> IRB(InsertBefore); - instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); + instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite); } // Check the last byte. { @@ -257,15 +298,16 @@ void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); } } // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { +bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, + MemIntrinsic *MI) { Value *Dst = MI->getDest(); MemTransferInst *MemTran = dyn_cast(MI); - Value *Src = MemTran ? MemTran->getSource() : NULL; + Value *Src = MemTran ? MemTran->getSource() : 0; Value *Length = MI->getLength(); Constant *ConstLength = dyn_cast(Length); @@ -277,26 +319,46 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { IRBuilder<> IRB(InsertBefore); Value *Cmp = IRB.CreateICmpNE(Length, - Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp); + Constant::getNullValue(Length->getType())); + InsertBefore = splitBlockAndInsertIfThen(Cmp); } - instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); + instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); if (Src) - instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); + instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false); return true; } -static Value *getLDSTOperand(Instruction *I) { +// If I is an interesting memory access, return the PointerOperand +// and set IsWrite. Otherwise return NULL. +static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { if (LoadInst *LI = dyn_cast(I)) { + if (!ClInstrumentReads) return NULL; + *IsWrite = false; return LI->getPointerOperand(); } - return cast(*I).getPointerOperand(); + if (StoreInst *SI = dyn_cast(I)) { + if (!ClInstrumentWrites) return NULL; + *IsWrite = true; + return SI->getPointerOperand(); + } + if (AtomicRMWInst *RMW = dyn_cast(I)) { + if (!ClInstrumentAtomics) return NULL; + *IsWrite = true; + return RMW->getPointerOperand(); + } + if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { + if (!ClInstrumentAtomics) return NULL; + *IsWrite = true; + return XCHG->getPointerOperand(); + } + return NULL; } -void AddressSanitizer::instrumentMop(Instruction *I) { - int IsWrite = isa(*I); - Value *Addr = getLDSTOperand(I); +void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { + bool IsWrite; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite); + assert(Addr); if (ClOpt && ClOptGlobals && isa(Addr)) { // We are accessing a global scalar variable. Nothing to catch here. return; @@ -314,22 +376,57 @@ void AddressSanitizer::instrumentMop(Instruction *I) { } IRBuilder<> IRB(I); - instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); + instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite); +} + +// Validate the result of Module::getOrInsertFunction called for an interface +// function of AddressSanitizer. If the instrumented module defines a function +// with the same name, their prototypes must match, otherwise +// getOrInsertFunction returns a bitcast. +Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) { + if (isa(FuncOrBitcast)) return cast(FuncOrBitcast); + FuncOrBitcast->dump(); + report_fatal_error("trying to redefine an AddressSanitizer " + "interface function"); } Instruction *AddressSanitizer::generateCrashCode( - IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) { - // IsWrite and TypeSize are encoded in the function name. - std::string FunctionName = std::string(kAsanReportErrorTemplate) + - (IsWrite ? "store" : "load") + itostr(TypeSize / 8); - Value *ReportWarningFunc = CurrentModule->getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IntptrTy, NULL); - CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr); - Call->setDoesNotReturn(); + BasicBlock *BB, Value *Addr, Value *PC, + bool IsWrite, size_t AccessSizeIndex) { + IRBuilder<> IRB(BB->getFirstNonPHI()); + CallInst *Call; + if (PC) + Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex], + Addr, PC); + else + Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + // We don't do Call->setDoesNotReturn() because the BB already has + // UnreachableInst at the end. + // This EmptyAsm is required to avoid callback merge. + IRB.CreateCall(EmptyAsm); return Call; } -void AddressSanitizer::instrumentAddress(Instruction *OrigIns, +Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, + Value *ShadowValue, + uint32_t TypeSize) { + size_t Granularity = 1 << MappingScale; + // Addr & (Granularity - 1) + Value *LastAccessedByte = IRB.CreateAnd( + AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + // (Addr & (Granularity - 1)) + size - 1 + if (TypeSize / 8 > 1) + LastAccessedByte = IRB.CreateAdd( + LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + LastAccessedByte = IRB.CreateIntCast( + LastAccessedByte, IRB.getInt8Ty(), false); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); +} + +void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, + Instruction *OrigIns, IRBuilder<> &IRB, Value *Addr, uint32_t TypeSize, bool IsWrite) { Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); @@ -344,31 +441,25 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - Instruction *CheckTerm = splitBlockAndInsertIfThen( - cast(Cmp)->getNextNode(), Cmp); - IRBuilder<> IRB2(CheckTerm); + BasicBlock *CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F); + new UnreachableInst(*C, CrashBlock); + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + Instruction *Crash = + generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex); + Crash->setDebugLoc(OrigIns->getDebugLoc()); size_t Granularity = 1 << MappingScale; if (TypeSize < 8 * Granularity) { - // Addr & (Granularity - 1) - Value *Lower3Bits = IRB2.CreateAnd( - AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); - // (Addr & (Granularity - 1)) + size - 1 - Value *LastAccessedByte = IRB2.CreateAdd( - Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); - // (uint8_t) ((Addr & (Granularity-1)) + size - 1) - LastAccessedByte = IRB2.CreateIntCast( - LastAccessedByte, IRB.getInt8Ty(), false); - // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue - Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue); - - CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2); - } - - IRBuilder<> IRB1(CheckTerm); - Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); - Crash->setDebugLoc(OrigIns->getDebugLoc()); - ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C)); + BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp); + assert(CheckTerm->isUnconditional()); + BasicBlock *NextBB = CheckTerm->getSuccessor(0); + IRB.SetInsertPoint(CheckTerm); + Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); + BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); + ReplaceInstWithInst(CheckTerm, NewTerm); + } else { + splitBlockAndInsertIfThen(Cmp, CrashBlock); + } } // This function replaces all global variables with new variables that have @@ -473,7 +564,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( M, NewTy, G->isConstant(), G->getLinkage(), - NewInitializer, "", G, G->isThreadLocal()); + NewInitializer, "", G, G->getThreadLocalMode()); NewGlobal->copyAttributesFrom(G); NewGlobal->setAlignment(RedzoneSize); @@ -501,7 +592,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); - Function *AsanRegisterGlobals = cast(M.getOrInsertFunction( + Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); @@ -516,8 +607,10 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); - Function *AsanUnregisterGlobals = cast(M.getOrInsertFunction( - kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + Function *AsanUnregisterGlobals = + checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnregisterGlobalsName, + IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); IRB_Dtor.CreateCall2(AsanUnregisterGlobals, @@ -537,7 +630,6 @@ bool AddressSanitizer::runOnModule(Module &M) { return false; BL.reset(new FunctionBlackList(ClBlackListFile)); - CurrentModule = &M; C = &(M.getContext()); LongSize = TD->getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -551,13 +643,33 @@ bool AddressSanitizer::runOnModule(Module &M) { // call __asan_init in the module ctor. IRBuilder<> IRB(CtorInsertBefore); - AsanInitFunction = cast( + AsanInitFunction = checkInterfaceFunction( M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); AsanInitFunction->setLinkage(Function::ExternalLinkage); IRB.CreateCall(AsanInitFunction); - MappingOffset = LongSize == 32 - ? kDefaultShadowOffset32 : kDefaultShadowOffset64; + // Create __asan_report* callbacks. + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + // IsWrite and TypeSize are encoded in the function name. + std::string FunctionName = std::string(kAsanReportErrorTemplate) + + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); + // If we are merging crash callbacks, they have two parameters. + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast( + M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + } + } + // We insert an empty inline asm after __asan_report* to avoid callback merge. + EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), + StringRef(""), StringRef(""), + /*hasSideEffects=*/true); + + llvm::Triple targetTriple(M.getTargetTriple()); + bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI; + + MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid : + (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64); if (ClMappingOffsetLog >= 0) { if (ClMappingOffsetLog == 0) { // special case @@ -640,17 +752,17 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { SmallSet TempsToInstrument; SmallVector ToInstrument; SmallVector NoReturnCalls; + bool IsWrite; // Fill the set of memory operations to instrument. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { TempsToInstrument.clear(); + int NumInsnsPerBB = 0; for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (LooksLikeCodeInBug11395(BI)) return false; - if ((isa(BI) && ClInstrumentReads) || - (isa(BI) && ClInstrumentWrites)) { - Value *Addr = getLDSTOperand(BI); + if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. @@ -668,19 +780,24 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { continue; } ToInstrument.push_back(BI); + NumInsnsPerBB++; + if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) + break; } } + AsanFunctionContext AFC(F); + // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { Instruction *Inst = ToInstrument[i]; if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isa(Inst) || isa(Inst)) - instrumentMop(Inst); + if (isInterestingMemoryAccess(Inst, &IsWrite)) + instrumentMop(AFC, Inst); else - instrumentMemIntrinsic(cast(Inst)); + instrumentMemIntrinsic(AFC, cast(Inst)); } NumInstrumented++; } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp new file mode 100644 index 0000000..09e0f14 --- /dev/null +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -0,0 +1,209 @@ +//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that instruments the code to perform run-time +// bounds checking on loads, stores, and other memory intrinsics. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bounds-checking" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/TargetFolder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Instrumentation.h" +using namespace llvm; + +static cl::opt SingleTrapBB("bounds-checking-single-trap", + cl::desc("Use one trap block per function")); + +STATISTIC(ChecksAdded, "Bounds checks added"); +STATISTIC(ChecksSkipped, "Bounds checks skipped"); +STATISTIC(ChecksUnable, "Bounds checks unable to add"); + +typedef IRBuilder BuilderTy; + +namespace { + struct BoundsChecking : public FunctionPass { + static char ID; + + BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){ + initializeBoundsCheckingPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + + private: + const TargetData *TD; + ObjectSizeOffsetEvaluator *ObjSizeEval; + BuilderTy *Builder; + Instruction *Inst; + BasicBlock *TrapBB; + unsigned Penalty; + + BasicBlock *getTrapBB(); + void emitBranchToTrap(Value *Cmp = 0); + bool computeAllocSize(Value *Ptr, APInt &Offset, Value* &OffsetValue, + APInt &Size, Value* &SizeValue); + bool instrument(Value *Ptr, Value *Val); + }; +} + +char BoundsChecking::ID = 0; +INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", + false, false) + + +/// getTrapBB - create a basic block that traps. All overflowing conditions +/// branch to this block. There's only one trap block per function. +BasicBlock *BoundsChecking::getTrapBB() { + if (TrapBB && SingleTrapBB) + return TrapBB; + + Function *Fn = Inst->getParent()->getParent(); + BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint(); + TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); + Builder->SetInsertPoint(TrapBB); + + llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); + CallInst *TrapCall = Builder->CreateCall(F); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + TrapCall->setDebugLoc(Inst->getDebugLoc()); + Builder->CreateUnreachable(); + + Builder->SetInsertPoint(PrevInsertPoint); + return TrapBB; +} + + +/// emitBranchToTrap - emit a branch instruction to a trap block. +/// If Cmp is non-null, perform a jump only if its value evaluates to true. +void BoundsChecking::emitBranchToTrap(Value *Cmp) { + // check if the comparison is always false + ConstantInt *C = dyn_cast_or_null(Cmp); + if (C) { + ++ChecksSkipped; + if (!C->getZExtValue()) + return; + else + Cmp = 0; // unconditional branch + } + + Instruction *Inst = Builder->GetInsertPoint(); + BasicBlock *OldBB = Inst->getParent(); + BasicBlock *Cont = OldBB->splitBasicBlock(Inst); + OldBB->getTerminator()->eraseFromParent(); + + if (Cmp) + BranchInst::Create(getTrapBB(), Cont, Cmp, OldBB); + else + BranchInst::Create(getTrapBB(), OldBB); +} + + +/// instrument - adds run-time bounds checks to memory accessing instructions. +/// Ptr is the pointer that will be read/written, and InstVal is either the +/// result from the load or the value being stored. It is used to determine the +/// size of memory block that is touched. +/// Returns true if any change was made to the IR, false otherwise. +bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { + uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType()); + DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) + << " bytes\n"); + + SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr); + + if (!ObjSizeEval->bothKnown(SizeOffset)) { + ++ChecksUnable; + return false; + } + + Value *Size = SizeOffset.first; + Value *Offset = SizeOffset.second; + ConstantInt *SizeCI = dyn_cast(Size); + + IntegerType *IntTy = TD->getIntPtrType(Inst->getContext()); + Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); + + // three checks are required to ensure safety: + // . Offset >= 0 (since the offset is given from the base ptr) + // . Size >= Offset (unsigned) + // . Size - Offset >= NeededSize (unsigned) + // + // optimization: if Size >= 0 (signed), skip 1st check + // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows + Value *ObjSize = Builder->CreateSub(Size, Offset); + Value *Cmp2 = Builder->CreateICmpULT(Size, Offset); + Value *Cmp3 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); + Value *Or = Builder->CreateOr(Cmp2, Cmp3); + if (!SizeCI || SizeCI->getValue().slt(0)) { + Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); + Or = Builder->CreateOr(Cmp1, Or); + } + emitBranchToTrap(Or); + + ++ChecksAdded; + return true; +} + +bool BoundsChecking::runOnFunction(Function &F) { + TD = &getAnalysis(); + + TrapBB = 0; + BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); + Builder = &TheBuilder; + ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext()); + ObjSizeEval = &TheObjSizeEval; + + // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory + // touching instructions + std::vector WorkList; + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &*i; + if (isa(I) || isa(I) || isa(I) || + isa(I)) + WorkList.push_back(I); + } + + bool MadeChange = false; + for (std::vector::iterator i = WorkList.begin(), + e = WorkList.end(); i != e; ++i) { + Inst = *i; + + Builder->SetInsertPoint(Inst); + if (LoadInst *LI = dyn_cast(Inst)) { + MadeChange |= instrument(LI->getPointerOperand(), LI); + } else if (StoreInst *SI = dyn_cast(Inst)) { + MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); + } else if (AtomicCmpXchgInst *AI = dyn_cast(Inst)) { + MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); + } else if (AtomicRMWInst *AI = dyn_cast(Inst)) { + MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); + } else { + llvm_unreachable("unknown Instruction type"); + } + } + return MadeChange; +} + +FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) { + return new BoundsChecking(Penalty); +} diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index e4c8cf1..00de882 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp + BoundsChecking.cpp EdgeProfiling.cpp FunctionBlackList.cpp GCOVProfiling.cpp @@ -9,3 +10,5 @@ add_llvm_library(LLVMInstrumentation ProfilingUtils.cpp ThreadSanitizer.cpp ) + +add_dependencies(LLVMInstrumentation intrinsics_gen) diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 96e5d5b..264a6a6 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -18,22 +18,23 @@ #include "ProfilingUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Analysis/DebugInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/PathV2.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/PathV2.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" #include #include using namespace llvm; @@ -57,7 +58,6 @@ namespace { virtual const char *getPassName() const { return "GCOV Profiler"; } - private: bool runOnModule(Module &M); @@ -90,6 +90,7 @@ namespace { // list. void insertCounterWriteout(SmallVector, 8> &); + void insertIndirectCounterIncrement(); std::string mangleName(DICompileUnit CU, std::string NewStem); @@ -421,6 +422,7 @@ bool GCOVProfiler::emitProfileArcs() { if (!CU_Nodes) return false; bool Result = false; + bool InsertIndCounterIncrCode = false; for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); DIArray SPs = CU.getSubprograms(); @@ -446,7 +448,7 @@ bool GCOVProfiler::emitProfileArcs() { new GlobalVariable(*M, CounterTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(CounterTy), - "__llvm_gcov_ctr", 0, false, 0); + "__llvm_gcov_ctr"); CountersBySP.push_back(std::make_pair(Counters, (MDNode*)SP)); UniqueVector ComplexEdgePreds; @@ -507,15 +509,21 @@ bool GCOVProfiler::emitProfileArcs() { Value *CounterPtrArray = Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, i * ComplexEdgePreds.size()); + + // Build code to increment the counter. + InsertIndCounterIncrCode = true; Builder.CreateCall2(getIncrementIndirectCounterFunc(), EdgeState, CounterPtrArray); - // clear the predecessor number - Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); } } } + insertCounterWriteout(CountersBySP); } + + if (InsertIndCounterIncrCode) + insertIndirectCounterIncrement(); + return Result; } @@ -574,13 +582,14 @@ Constant *GCOVProfiler::getStartFileFunc() { } Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { + Type *Int32Ty = Type::getInt32Ty(*Ctx); + Type *Int64Ty = Type::getInt64Ty(*Ctx); Type *Args[] = { - Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor - Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row + Int32Ty->getPointerTo(), // uint32_t *predecessor + Int64Ty->getPointerTo()->getPointerTo() // uint64_t **counters }; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); - return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); + return M->getOrInsertFunction("__llvm_gcov_indirect_counter_increment", FTy); } Constant *GCOVProfiler::getEmitFunctionFunc() { @@ -588,8 +597,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); } @@ -665,5 +673,75 @@ void GCOVProfiler::insertCounterWriteout( } Builder.CreateRetVoid(); - InsertProfilingShutdownCall(WriteoutF, M); + // Create a small bit of code that registers the "__llvm_gcov_writeout" + // function to be executed at exit. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + + BB = BasicBlock::Create(*Ctx, "entry", F); + Builder.SetInsertPoint(BB); + + FTy = FunctionType::get(Type::getInt32Ty(*Ctx), + PointerType::get(FTy, 0), false); + Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); + Builder.CreateCall(AtExitFn, WriteoutF); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); +} + +void GCOVProfiler::insertIndirectCounterIncrement() { + Function *Fn = + cast(GCOVProfiler::getIncrementIndirectCounterFunc()); + Fn->setUnnamedAddr(true); + Fn->setLinkage(GlobalValue::InternalLinkage); + Fn->addFnAttr(Attribute::NoInline); + + Type *Int32Ty = Type::getInt32Ty(*Ctx); + Type *Int64Ty = Type::getInt64Ty(*Ctx); + Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff); + + // Create basic blocks for function. + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn); + IRBuilder<> Builder(BB); + + BasicBlock *PredNotNegOne = BasicBlock::Create(*Ctx, "", Fn); + BasicBlock *CounterEnd = BasicBlock::Create(*Ctx, "", Fn); + BasicBlock *Exit = BasicBlock::Create(*Ctx, "exit", Fn); + + // uint32_t pred = *predecessor; + // if (pred == 0xffffffff) return; + Argument *Arg = Fn->arg_begin(); + Arg->setName("predecessor"); + Value *Pred = Builder.CreateLoad(Arg, "pred"); + Value *Cond = Builder.CreateICmpEQ(Pred, NegOne); + BranchInst::Create(Exit, PredNotNegOne, Cond, BB); + + Builder.SetInsertPoint(PredNotNegOne); + + // uint64_t *counter = counters[pred]; + // if (!counter) return; + Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty); + Arg = llvm::next(Fn->arg_begin()); + Arg->setName("counters"); + Value *GEP = Builder.CreateGEP(Arg, ZExtPred); + Value *Counter = Builder.CreateLoad(GEP, "counter"); + Cond = Builder.CreateICmpEQ(Counter, + Constant::getNullValue(Int64Ty->getPointerTo())); + Builder.CreateCondBr(Cond, Exit, CounterEnd); + + // ++*counter; + Builder.SetInsertPoint(CounterEnd); + Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter), + ConstantInt::get(Int64Ty, 1)); + Builder.CreateStore(Add, Counter); + Builder.CreateBr(Exit); + + // Fill in the exit block. + Builder.SetInsertPoint(Exit); + Builder.CreateRetVoid(); } diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index c7266e2..1e0b4a3 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -20,11 +20,12 @@ using namespace llvm; /// initializeInstrumentation - Initialize all passes in the TransformUtils /// library. void llvm::initializeInstrumentation(PassRegistry &Registry) { + initializeAddressSanitizerPass(Registry); + initializeBoundsCheckingPass(Registry); initializeEdgeProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); - initializeGCOVProfilerPass(Registry); - initializeAddressSanitizerPass(Registry); initializeThreadSanitizerPass(Registry); } diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index b214796..cc27146 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -55,11 +55,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeBuilder.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 8bb337e..dc0fa71 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,73 +22,73 @@ #define DEBUG_TYPE "tsan" #include "FunctionBlackList.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Intrinsics.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Type.h" using namespace llvm; static cl::opt ClBlackListFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); -static cl::opt ClPrintStats("tsan-print-stats", - cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); +STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); +STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); +STATISTIC(NumOmittedReadsBeforeWrite, + "Number of reads ignored due to following writes"); +STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); +STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); +STATISTIC(NumOmittedReadsFromConstantGlobals, + "Number of reads from constant globals"); +STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); namespace { -// Stats counters for ThreadSanitizer instrumentation. -struct ThreadSanitizerStats { - size_t NumInstrumentedReads; - size_t NumInstrumentedWrites; - size_t NumOmittedReadsBeforeWrite; - size_t NumAccessesWithBadSize; - size_t NumInstrumentedVtableWrites; - size_t NumOmittedReadsFromConstantGlobals; - size_t NumOmittedReadsFromVtable; -}; - /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { ThreadSanitizer(); + const char *getPassName() const; bool runOnFunction(Function &F); bool doInitialization(Module &M); - bool doFinalization(Module &M); - bool instrumentLoadOrStore(Instruction *I); static char ID; // Pass identification, replacement for typeid. private: - void choseInstructionsToInstrument(SmallVectorImpl &Local, - SmallVectorImpl &All); + bool instrumentLoadOrStore(Instruction *I); + bool instrumentAtomic(Instruction *I); + void chooseInstructionsToInstrument(SmallVectorImpl &Local, + SmallVectorImpl &All); bool addrPointsToConstantData(Value *Addr); + int getMemoryAccessFuncIndex(Value *Addr); TargetData *TD; OwningPtr BL; + IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. - Value *TsanFuncEntry; - Value *TsanFuncExit; + Function *TsanFuncEntry; + Function *TsanFuncExit; // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; - Value *TsanRead[kNumberOfAccessSizes]; - Value *TsanWrite[kNumberOfAccessSizes]; - Value *TsanVptrUpdate; - - // Stats are modified w/o synchronization. - ThreadSanitizerStats stats; + Function *TsanRead[kNumberOfAccessSizes]; + Function *TsanWrite[kNumberOfAccessSizes]; + Function *TsanAtomicLoad[kNumberOfAccessSizes]; + Function *TsanAtomicStore[kNumberOfAccessSizes]; + Function *TsanVptrUpdate; }; } // namespace @@ -97,6 +97,10 @@ INITIALIZE_PASS(ThreadSanitizer, "tsan", "ThreadSanitizer: detects data races.", false, false) +const char *ThreadSanitizer::getPassName() const { + return "ThreadSanitizer"; +} + ThreadSanitizer::ThreadSanitizer() : FunctionPass(ID), TD(NULL) { @@ -106,12 +110,18 @@ FunctionPass *llvm::createThreadSanitizerPass() { return new ThreadSanitizer(); } +static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { + if (Function *F = dyn_cast(FuncOrBitcast)) + return F; + FuncOrBitcast->dump(); + report_fatal_error("ThreadSanitizer interface function redefined"); +} + bool ThreadSanitizer::doInitialization(Module &M) { TD = getAnalysisIfAvailable(); if (!TD) return false; BL.reset(new FunctionBlackList(ClBlackListFile)); - memset(&stats, 0, sizeof(stats)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -120,38 +130,38 @@ bool ThreadSanitizer::doInitialization(Module &M) { appendToGlobalCtors(M, cast(TsanInit), 0); // Initialize the callbacks. - TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(), - IRB.getInt8PtrTy(), NULL); - TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), - NULL); + TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); + TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_func_exit", IRB.getVoidTy(), NULL)); + OrdTy = IRB.getInt32Ty(); for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { - SmallString<32> ReadName("__tsan_read"); - ReadName += itostr(1 << i); - TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(), - IRB.getInt8PtrTy(), NULL); - SmallString<32> WriteName("__tsan_write"); - WriteName += itostr(1 << i); - TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), - IRB.getInt8PtrTy(), NULL); - } - TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), - NULL); - return true; -} + const size_t ByteSize = 1 << i; + const size_t BitSize = ByteSize * 8; + SmallString<32> ReadName("__tsan_read" + itostr(ByteSize)); + TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction( + ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); -bool ThreadSanitizer::doFinalization(Module &M) { - if (ClPrintStats) { - errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() - << ": wr " << stats.NumInstrumentedWrites - << "; rd " << stats.NumInstrumentedReads - << "; vt " << stats.NumInstrumentedVtableWrites - << "; bs " << stats.NumAccessesWithBadSize - << "; rbw " << stats.NumOmittedReadsBeforeWrite - << "; rcg " << stats.NumOmittedReadsFromConstantGlobals - << "; rvt " << stats.NumOmittedReadsFromVtable - << "\n"; + SmallString<32> WriteName("__tsan_write" + itostr(ByteSize)); + TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction( + WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); + + Type *Ty = Type::getIntNTy(M.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) + + "_load"); + TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicLoadName, Ty, PtrTy, OrdTy, NULL)); + + SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) + + "_store"); + TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction( + AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, + NULL)); } + TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), NULL)); return true; } @@ -173,13 +183,13 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { if (GlobalVariable *GV = dyn_cast(Addr)) { if (GV->isConstant()) { // Reads from constant globals can not race with any writes. - stats.NumOmittedReadsFromConstantGlobals++; + NumOmittedReadsFromConstantGlobals++; return true; } } else if(LoadInst *L = dyn_cast(Addr)) { if (isVtableAccess(L)) { // Reads from a vtable pointer can not race with any writes. - stats.NumOmittedReadsFromVtable++; + NumOmittedReadsFromVtable++; return true; } } @@ -197,7 +207,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { // // 'Local' is a vector of insns within the same BB (no calls between). // 'All' is a vector of insns that will be instrumented. -void ThreadSanitizer::choseInstructionsToInstrument( +void ThreadSanitizer::chooseInstructionsToInstrument( SmallVectorImpl &Local, SmallVectorImpl &All) { SmallSet WriteTargets; @@ -212,7 +222,7 @@ void ThreadSanitizer::choseInstructionsToInstrument( Value *Addr = Load->getPointerOperand(); if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. - stats.NumOmittedReadsBeforeWrite++; + NumOmittedReadsBeforeWrite++; continue; } if (addrPointsToConstantData(Addr)) { @@ -225,12 +235,27 @@ void ThreadSanitizer::choseInstructionsToInstrument( Local.clear(); } +static bool isAtomic(Instruction *I) { + if (LoadInst *LI = dyn_cast(I)) + return LI->isAtomic() && LI->getSynchScope() == CrossThread; + if (StoreInst *SI = dyn_cast(I)) + return SI->isAtomic() && SI->getSynchScope() == CrossThread; + if (isa(I)) + return true; + if (isa(I)) + return true; + if (FenceInst *FI = dyn_cast(I)) + return FI->getSynchScope() == CrossThread; + return false; +} + bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; if (BL->isIn(F)) return false; SmallVector RetVec; SmallVector AllLoadsAndStores; SmallVector LocalLoadsAndStores; + SmallVector AtomicAccesses; bool Res = false; bool HasCalls = false; @@ -240,16 +265,18 @@ bool ThreadSanitizer::runOnFunction(Function &F) { BasicBlock &BB = *FI; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE; ++BI) { - if (isa(BI) || isa(BI)) + if (isAtomic(BI)) + AtomicAccesses.push_back(BI); + else if (isa(BI) || isa(BI)) LocalLoadsAndStores.push_back(BI); else if (isa(BI)) RetVec.push_back(BI); else if (isa(BI) || isa(BI)) { HasCalls = true; - choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } } - choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } // We have collected all loads and stores. @@ -261,6 +288,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); } + // Instrument atomic memory accesses. + for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { + Res |= instrumentAtomic(AtomicAccesses[i]); + } + // Instrument function entry/exit points if there were instrumented accesses. if (Res || HasCalls) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); @@ -283,29 +315,98 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { Value *Addr = IsWrite ? cast(I)->getPointerOperand() : cast(I)->getPointerOperand(); - Type *OrigPtrTy = Addr->getType(); - Type *OrigTy = cast(OrigPtrTy)->getElementType(); - assert(OrigTy->isSized()); - uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); - if (TypeSize != 8 && TypeSize != 16 && - TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { - stats.NumAccessesWithBadSize++; - // Ignore all unusual sizes. + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) return false; - } if (IsWrite && isVtableAccess(I)) { + DEBUG(dbgs() << " VPTR : " << *I << "\n"); Value *StoredValue = cast(I)->getValueOperand(); + // StoredValue does not necessary have a pointer type. + if (isa(StoredValue->getType())) + StoredValue = IRB.CreateIntToPtr(StoredValue, IRB.getInt8PtrTy()); + // Call TsanVptrUpdate. IRB.CreateCall2(TsanVptrUpdate, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); - stats.NumInstrumentedVtableWrites++; + NumInstrumentedVtableWrites++; return true; } - size_t Idx = CountTrailingZeros_32(TypeSize / 8); - assert(Idx < kNumberOfAccessSizes); Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); - if (IsWrite) stats.NumInstrumentedWrites++; - else stats.NumInstrumentedReads++; + if (IsWrite) NumInstrumentedWrites++; + else NumInstrumentedReads++; + return true; +} + +static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { + uint32_t v = 0; + switch (ord) { + case NotAtomic: assert(false); + case Unordered: // Fall-through. + case Monotonic: v = 1 << 0; break; + // case Consume: v = 1 << 1; break; // Not specified yet. + case Acquire: v = 1 << 2; break; + case Release: v = 1 << 3; break; + case AcquireRelease: v = 1 << 4; break; + case SequentiallyConsistent: v = 1 << 5; break; + } + return IRB->getInt32(v); +} + +bool ThreadSanitizer::instrumentAtomic(Instruction *I) { + IRBuilder<> IRB(I); + if (LoadInst *LI = dyn_cast(I)) { + Value *Addr = LI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + createOrdering(&IRB, LI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicLoad[Idx], + ArrayRef(Args)); + ReplaceInstWithInst(I, C); + + } else if (StoreInst *SI = dyn_cast(I)) { + Value *Addr = SI->getPointerOperand(); + int Idx = getMemoryAccessFuncIndex(Addr); + if (Idx < 0) + return false; + const size_t ByteSize = 1 << Idx; + const size_t BitSize = ByteSize * 8; + Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize); + Type *PtrTy = Ty->getPointerTo(); + Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy), + IRB.CreateIntCast(SI->getValueOperand(), Ty, false), + createOrdering(&IRB, SI->getOrdering())}; + CallInst *C = CallInst::Create(TsanAtomicStore[Idx], + ArrayRef(Args)); + ReplaceInstWithInst(I, C); + } else if (isa(I)) { + // FIXME: Not yet supported. + } else if (isa(I)) { + // FIXME: Not yet supported. + } else if (isa(I)) { + // FIXME: Not yet supported. + } return true; } + +int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) { + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast(OrigPtrTy)->getElementType(); + assert(OrigTy->isSized()); + uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); + if (TypeSize != 8 && TypeSize != 16 && + TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + NumAccessesWithBadSize++; + // Ignore all unusual sizes. + return -1; + } + size_t Idx = CountTrailingZeros_32(TypeSize / 8); + assert(Idx < kNumberOfAccessSizes); + return Idx; +} diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index ba214d1..b344952 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -9,7 +9,7 @@ // // This file implements the Aggressive Dead Code Elimination pass. This pass // optimistically assumes that all instructions are dead until proven otherwise, -// allowing it to eliminate dead computations that other DCE passes do not +// allowing it to eliminate dead computations that other DCE passes do not // catch, particularly involving loop computations. // //===----------------------------------------------------------------------===// @@ -36,13 +36,13 @@ namespace { ADCE() : FunctionPass(ID) { initializeADCEPass(*PassRegistry::getPassRegistry()); } - + virtual bool runOnFunction(Function& F); - + virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.setPreservesCFG(); } - + }; } @@ -52,7 +52,7 @@ INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false) bool ADCE::runOnFunction(Function& F) { SmallPtrSet alive; SmallVector worklist; - + // Collect the set of "root" instructions that are known live. for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isa(I.getInstructionIterator()) || @@ -62,7 +62,7 @@ bool ADCE::runOnFunction(Function& F) { alive.insert(I.getInstructionIterator()); worklist.push_back(I.getInstructionIterator()); } - + // Propagate liveness backwards to operands. while (!worklist.empty()) { Instruction* curr = worklist.pop_back_val(); @@ -72,7 +72,7 @@ bool ADCE::runOnFunction(Function& F) { if (alive.insert(Inst)) worklist.push_back(Inst); } - + // The inverse of the live set is the dead set. These are those instructions // which have no side effects and do not influence the control flow or return // value of the function, and may therefore be deleted safely. @@ -82,7 +82,7 @@ bool ADCE::runOnFunction(Function& F) { worklist.push_back(I.getInstructionIterator()); I->dropAllReferences(); } - + for (SmallVector::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { ++NumRemoved; diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index d660c72..a01e066 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -32,3 +32,5 @@ add_llvm_library(LLVMScalarOpts Sink.cpp TailRecursionElimination.cpp ) + +add_dependencies(LLVMScalarOpts intrinsics_gen) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9a5423f..bc87106 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -18,32 +18,32 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Transforms/Utils/AddrModeMatcher.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/AddrModeMatcher.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -60,6 +60,7 @@ STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); +STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -70,6 +71,10 @@ static cl::opt DisableDeleteDeadBlocks( "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false), cl::desc("Disable deleting dead blocks in CodeGenPrepare")); +static cl::opt DisableSelectToBranch( + "disable-cgp-select2branch", cl::Hidden, cl::init(false), + cl::desc("Disable select to branch conversion.")); + namespace { class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -78,7 +83,7 @@ namespace { const TargetLibraryInfo *TLInfo; DominatorTree *DT; ProfileInfo *PFI; - + /// CurInstIterator - As we scan instructions optimizing them, this is the /// next instruction to optimize. Xforms that can invalidate this should /// update it. @@ -93,6 +98,9 @@ namespace { /// be updated. bool ModifiedDT; + /// OptSize - True if optimizing for size. + bool OptSize; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -108,6 +116,7 @@ namespace { } private: + bool EliminateFallThrough(Function &F); bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); @@ -118,6 +127,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); + bool OptimizeSelectInst(SelectInst *SI); bool DupRetToEnableTailCallOpts(ReturnInst *RI); bool PlaceDbgValues(Function &F); }; @@ -141,13 +151,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis(); DT = getAnalysisIfAvailable(); PFI = getAnalysisIfAvailable(); + OptSize = F.hasFnAttr(Attribute::OptimizeForSize); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); // llvm.dbg.value is far away from the value then iSel may not be able - // handle it properly. iSel will drop llvm.dbg.value if it can not + // handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. EverMadeChange |= PlaceDbgValues(F); @@ -182,6 +193,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) { I = WorkList.begin(), E = WorkList.end(); I != E; ++I) DeleteDeadBlock(*I); + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); + if (MadeChange) ModifiedDT = true; EverMadeChange |= MadeChange; @@ -193,6 +209,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + if (!SinglePred || SinglePred == BB) continue; + + BranchInst *Term = dyn_cast(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + DEBUG(dbgs() << "Merged:\n"<< *SinglePred << "\n\n\n"); + } + } + return Changed; +} + /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, /// debug info directives, and an unconditional branch. Passes before isel /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for @@ -326,7 +375,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } @@ -537,7 +586,7 @@ protected: bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { BasicBlock *BB = CI->getParent(); - + // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. @@ -554,19 +603,19 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { if (OptimizeInlineAsmInst(CI)) return true; } - + // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast(II->getArgOperand(1))->getZExtValue() == 1); Type *ReturnTy = CI->getType(); - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. WeakVH IterHandle(CurInstIterator); - + replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0, TLInfo, ModifiedDT ? 0 : DT); @@ -594,13 +643,13 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; - + // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD); + return Simplifier.fold(CI, TD, TLInfo); } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -635,10 +684,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; + PHINode *PN = 0; + BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); - PHINode *PN = V ? dyn_cast(V) : NULL; - if (V && !PN) - return false; + if (V) { + BCI = dyn_cast(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast(V); + if (!PN) + return false; + } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) @@ -656,6 +713,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; if (&*BI != RI) return false; } else { @@ -750,13 +810,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy) { Value *Repl = Addr; - - // Try to collapse single-value PHI nodes. This is necessary to undo + + // Try to collapse single-value PHI nodes. This is necessary to undo // unprofitable PRE transformations. SmallVector worklist; SmallPtrSet Visited; worklist.push_back(Addr); - + // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. @@ -768,20 +828,20 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, while (!worklist.empty()) { Value *V = worklist.back(); worklist.pop_back(); - + // Break use-def graph loops. if (!Visited.insert(V)) { Consensus = 0; break; } - + // For a PHI node, push all of its incoming values. if (PHINode *P = dyn_cast(V)) { for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) worklist.push_back(P->getIncomingValue(i)); continue; } - + // For non-PHIs, determine the addressing mode being computed. SmallVector NewAddrModeInsts; ExtAddrMode NewAddrMode = @@ -816,15 +876,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } continue; } - + Consensus = 0; break; } - + // If the addressing mode couldn't be determined, or if multiple different // ones were determined, bail out now. if (!Consensus) return false; - + // Check to see if any of the instructions supersumed by this addr mode are // non-local to I's BB. bool AnyNonLocal = false; @@ -933,7 +993,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a WeakVH to hold onto it in case this happens. WeakVH IterHandle(CurInstIterator); BasicBlock *BB = CurInstIterator->getParent(); - + RecursivelyDeleteTriviallyDeadInstructions(Repl); if (IterHandle != CurInstIterator) { @@ -945,7 +1005,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // This address is now available for reassignment, so erase the table // entry; we don't want to match some completely different instruction. SunkAddrs[Addr] = 0; - } + } } ++NumMemoryInsts; return true; @@ -957,12 +1017,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; - TargetLowering::AsmOperandInfoVector + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - + // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); @@ -1091,6 +1151,79 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { return MadeChange; } +/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be +/// turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { + // FIXME: This should use the same heuristics as IfConversion to determine + // whether a select is better represented as a branch. This requires that + // branch probability metadata is preserved for the select, which is not the + // case currently. + + CmpInst *Cmp = dyn_cast(SI->getCondition()); + + // If the branch is predicted right, an out of order CPU can avoid blocking on + // the compare. Emit cmovs on compares with a memory operand as branches to + // avoid stalls on the load from memory. If the compare has more than one use + // there's probably another cmov or setcc around so it's not worth emitting a + // branch. + if (!Cmp) + return false; + + Value *CmpOp0 = Cmp->getOperand(0); + Value *CmpOp1 = Cmp->getOperand(1); + + // We check that the memory operand has one use to avoid uses of the loaded + // value directly after the compare, making branches unprofitable. + return Cmp->hasOneUse() && + ((isa(CmpOp0) && CmpOp0->hasOneUse()) || + (isa(CmpOp1) && CmpOp1->hasOneUse())); +} + + +bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { + // If we have a SelectInst that will likely profit from branch prediction, + // turn it into a branch. + if (DisableSelectToBranch || OptSize || !TLI || + !TLI->isPredictableSelectExpensive()) + return false; + + if (!SI->getCondition()->getType()->isIntegerTy(1) || + !isFormingBranchFromSelectProfitable(SI)) + return false; + + ModifiedDT = true; + + // First, we split the block containing the select into 2 blocks. + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + + // Create a new block serving as the landing pad for the branch. + BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", + NextBlock->getParent(), NextBlock); + + // Move the unconditional branch from the block with the select in it into our + // landing pad block. + StartBlock->getTerminator()->eraseFromParent(); + BranchInst::Create(NextBlock, SmallBlock); + + // Insert the real conditional branch based on the original condition. + BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); + + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), StartBlock); + PN->addIncoming(SI->getFalseValue(), SmallBlock); + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + + // Instruct OptimizeBlock to skip to the next block. + CurInstIterator = StartBlock->end(); + ++NumSelectsExpanded; + return true; +} + bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (PHINode *P = dyn_cast(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) @@ -1104,7 +1237,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CastInst *CI = dyn_cast(I)) { // If the source of the cast is a constant, then this should have // already been constant folded. The only reason NOT to constant fold @@ -1124,23 +1257,23 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CmpInst *CI = dyn_cast(I)) return OptimizeCmpExpression(CI); - + if (LoadInst *LI = dyn_cast(I)) { if (TLI) return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); return false; } - + if (StoreInst *SI = dyn_cast(I)) { if (TLI) return OptimizeMemoryInst(I, SI->getOperand(1), SI->getOperand(0)->getType()); return false; } - + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -1154,13 +1287,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { } return false; } - + if (CallInst *CI = dyn_cast(I)) return OptimizeCallInst(CI); if (ReturnInst *RI = dyn_cast(I)) return DupRetToEnableTailCallOpts(RI); + if (SelectInst *SI = dyn_cast(I)) + return OptimizeSelectInst(SI); + return false; } @@ -1179,7 +1315,7 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { } // llvm.dbg.value is far away from the value then iSel may not be able -// handle it properly. iSel will drop llvm.dbg.value if it can not +// handle it properly. iSel will drop llvm.dbg.value if it can not // find a node corresponding to the value. bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c8c5360..8b1283f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -32,7 +32,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -71,7 +71,7 @@ namespace { bool HandleFree(CallInst *F); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallPtrSet &DeadStackObjects); + SmallSetVector &DeadStackObjects); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -106,7 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// static void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD, - SmallPtrSet *ValueSet = 0) { + SmallSetVector *ValueSet = 0) { SmallVector NowDeadInsts; NowDeadInsts.push_back(I); @@ -136,7 +136,7 @@ static void DeleteDeadInstruction(Instruction *I, DeadInst->eraseFromParent(); - if (ValueSet) ValueSet->erase(DeadInst); + if (ValueSet) ValueSet->remove(DeadInst); } while (!NowDeadInsts.empty()); } @@ -248,7 +248,7 @@ static bool isShortenable(Instruction *I) { // Don't shorten stores for now if (isa(I)) return false; - + IntrinsicInst *II = cast(I); switch (II->getIntrinsicID()) { default: return false; @@ -275,33 +275,9 @@ static Value *getStoredPointerOperand(Instruction *I) { } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { - const TargetData *TD = AA.getTargetData(); - - if (const CallInst *CI = extractMallocCall(V)) { - if (const ConstantInt *C = dyn_cast(CI->getArgOperand(0))) - return C->getZExtValue(); - } - - if (TD == 0) - return AliasAnalysis::UnknownSize; - - if (const AllocaInst *A = dyn_cast(V)) { - // Get size information for the alloca - if (const ConstantInt *C = dyn_cast(A->getArraySize())) - return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); - } - - if (const Argument *A = dyn_cast(V)) { - if (A->hasByValAttr()) - if (PointerType *PT = dyn_cast(A->getType())) - return TD->getTypeAllocSize(PT->getElementType()); - } - - if (const GlobalVariable *GV = dyn_cast(V)) { - if (!GV->mayBeOverridden()) - return TD->getTypeAllocSize(GV->getType()->getElementType()); - } - + uint64_t Size; + if (getObjectSize(V, Size, AA.getTargetData())) + return Size; return AliasAnalysis::UnknownSize; } @@ -316,7 +292,7 @@ namespace { /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely /// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, @@ -339,7 +315,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, if (AA.getTargetData() == 0 && Later.Ptr->getType() == Earlier.Ptr->getType()) return OverwriteComplete; - + return OverwriteUnknown; } @@ -402,10 +378,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && + Later.Size >= Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) return OverwriteComplete; - + // The other interesting case is if the later store overwrites the end of // the earlier store // @@ -544,11 +520,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && + if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, + DepWriteOffset, InstWriteOffset); if (OR == OverwriteComplete) { DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite << "\n KILLER: " << *Inst << '\n'); @@ -557,7 +533,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DeleteDeadInstruction(DepWrite, *MD); ++NumFastStores; MadeChange = true; - + // DeleteDeadInstruction can delete the current instruction in loop // cases, reset BBI. BBI = Inst; @@ -575,16 +551,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { unsigned DepWriteAlign = DepIntrinsic->getAlignment(); if (llvm::isPowerOf2_64(InstWriteOffset) || ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " << DepLoc.Size << ")" << *Inst << '\n'); - + Value* DepWriteLength = DepIntrinsic->getLength(); Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - + InstWriteOffset - DepWriteOffset); DepIntrinsic->setLength(TrimmedLength); MadeChange = true; @@ -694,19 +670,18 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Keep track of all of the stack objects that are dead at the end of the // function. - SmallPtrSet DeadStackObjects; + SmallSetVector DeadStackObjects; // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { - if (AllocaInst *AI = dyn_cast(I)) - DeadStackObjects.insert(AI); + if (isa(I)) + DeadStackObjects.insert(I); // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - if (CallInst *CI = extractMallocCall(I)) - if (!PointerMayBeCaptured(CI, true, true)) - DeadStackObjects.insert(CI); + else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true)) + DeadStackObjects.insert(I); } // Treat byval arguments the same, stores to them are dead at the end of the @@ -723,14 +698,30 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If we find a store, check to see if it points into a dead stack value. if (hasMemoryWrite(BBI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts - Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI)); + SmallVector Pointers; + GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); // Stores to stack values are valid candidates for removal. - if (DeadStackObjects.count(Pointer)) { + bool AllDead = true; + for (SmallVectorImpl::iterator I = Pointers.begin(), + E = Pointers.end(); I != E; ++I) + if (!DeadStackObjects.count(*I)) { + AllDead = false; + break; + } + + if (AllDead) { Instruction *Dead = BBI++; DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n DEAD: " - << *Dead << "\n Object: " << *Pointer << '\n'); + << *Dead << "\n Objects: "; + for (SmallVectorImpl::iterator I = Pointers.begin(), + E = Pointers.end(); I != E; ++I) { + dbgs() << **I; + if (llvm::next(I) != E) + dbgs() << ", "; + } + dbgs() << '\n'); // DCE instructions only used to calculate that store. DeleteDeadInstruction(Dead, *MD, &DeadStackObjects); @@ -749,17 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (AllocaInst *A = dyn_cast(BBI)) { - DeadStackObjects.erase(A); - continue; - } - - if (CallInst *CI = extractMallocCall(BBI)) { - DeadStackObjects.erase(CI); + if (isa(BBI)) { + // Remove allocas from the list of dead stack objects; there can't be + // any references before the definition. + DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast(BBI)) { + // Remove allocation function calls from the list of dead stack objects; + // there can't be any references before the definition. + if (isAllocLikeFn(BBI)) + DeadStackObjects.remove(BBI); + // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) @@ -768,7 +761,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If the call might load from any of our allocas, then any store above // the call is live. SmallVector LiveAllocas; - for (SmallPtrSet::iterator I = DeadStackObjects.begin(), + for (SmallSetVector::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the call site touches it. AliasAnalysis::ModRefResult A = @@ -780,12 +773,12 @@ bool DSE::handleEndBlock(BasicBlock &BB) { for (SmallVector::iterator I = LiveAllocas.begin(), E = LiveAllocas.end(); I != E; ++I) - DeadStackObjects.erase(*I); + DeadStackObjects.remove(*I); // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) - return MadeChange; + break; continue; } @@ -827,7 +820,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { /// of the stack objects in the DeadStackObjects set. If so, they become live /// because the location is being loaded. void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, - SmallPtrSet &DeadStackObjects) { + SmallSetVector &DeadStackObjects) { const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr); // A constant can't be in the dead pointer set. @@ -837,12 +830,12 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, // If the kill pointer can be easily reduced to an alloca, don't bother doing // extraneous AA queries. if (isa(UnderlyingPointer) || isa(UnderlyingPointer)) { - DeadStackObjects.erase(const_cast(UnderlyingPointer)); + DeadStackObjects.remove(const_cast(UnderlyingPointer)); return; } SmallVector NowLive; - for (SmallPtrSet::iterator I = DeadStackObjects.begin(), + for (SmallSetVector::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { // See if the loaded location could alias the stack location. AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA)); @@ -852,5 +845,5 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, for (SmallVector::iterator I = NowLive.begin(), E = NowLive.end(); I != E; ++I) - DeadStackObjects.erase(*I); + DeadStackObjects.remove(*I); } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index f3c92d6..9759549 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -39,7 +39,7 @@ static unsigned getHash(const void *V) { } //===----------------------------------------------------------------------===// -// SimpleValue +// SimpleValue //===----------------------------------------------------------------------===// namespace { @@ -47,16 +47,16 @@ namespace { /// scoped hash table. struct SimpleValue { Instruction *Inst; - + SimpleValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - + bool isSentinel() const { return Inst == DenseMapInfo::getEmptyKey() || Inst == DenseMapInfo::getTombstoneKey(); } - + static bool canHandle(Instruction *Inst) { // This can only handle non-void readnone functions. if (CallInst *CI = dyn_cast(Inst)) @@ -90,7 +90,7 @@ template<> struct DenseMapInfo { unsigned DenseMapInfo::getHashValue(SimpleValue Val) { Instruction *Inst = Val.Inst; - + // Hash in all of the operands as pointers. unsigned Res = 0; for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) @@ -126,13 +126,13 @@ bool DenseMapInfo::isEqual(SimpleValue LHS, SimpleValue RHS) { if (LHS.isSentinel() || RHS.isSentinel()) return LHSI == RHSI; - + if (LHSI->getOpcode() != RHSI->getOpcode()) return false; return LHSI->isIdenticalTo(RHSI); } //===----------------------------------------------------------------------===// -// CallValue +// CallValue //===----------------------------------------------------------------------===// namespace { @@ -140,21 +140,21 @@ namespace { /// the scoped hash table. struct CallValue { Instruction *Inst; - + CallValue(Instruction *I) : Inst(I) { assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); } - + bool isSentinel() const { return Inst == DenseMapInfo::getEmptyKey() || Inst == DenseMapInfo::getTombstoneKey(); } - + static bool canHandle(Instruction *Inst) { // Don't value number anything that returns void. if (Inst->getType()->isVoidTy()) return false; - + CallInst *CI = dyn_cast(Inst); if (CI == 0 || !CI->onlyReadsMemory()) return false; @@ -168,7 +168,7 @@ namespace llvm { template<> struct isPodLike { static const bool value = true; }; - + template<> struct DenseMapInfo { static inline CallValue getEmptyKey() { return DenseMapInfo::getEmptyKey(); @@ -189,7 +189,7 @@ unsigned DenseMapInfo::getHashValue(CallValue Val) { "Cannot value number calls with metadata operands"); Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); } - + // Mix in the opcode. return (Res << 1) ^ Inst->getOpcode(); } @@ -203,11 +203,11 @@ bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { //===----------------------------------------------------------------------===// -// EarlyCSE pass. +// EarlyCSE pass. //===----------------------------------------------------------------------===// namespace { - + /// EarlyCSE - This pass does a simple depth-first walk over the dominator /// tree, eliminating trivially redundant instructions and using instsimplify /// to canonicalize things as it goes. It is intended to be fast and catch @@ -223,14 +223,14 @@ public: ScopedHashTableVal > AllocatorTy; typedef ScopedHashTable, AllocatorTy> ScopedHTType; - + /// AvailableValues - This scoped hash table contains the current values of /// all of our simple scalar expressions. As we walk down the domtree, we /// look to see if instructions are in this: if so, we replace them with what /// we find, otherwise we insert them so that dominated values can succeed in /// their lookup. ScopedHTType *AvailableValues; - + /// AvailableLoads - This scoped hash table contains the current values /// of loads. This allows us to get efficient access to dominating loads when /// we have a fully redundant load. In addition to the most recent load, we @@ -243,15 +243,15 @@ public: typedef ScopedHashTable, DenseMapInfo, LoadMapAllocator> LoadHTType; LoadHTType *AvailableLoads; - + /// AvailableCalls - This scoped hash table contains the current values /// of read-only call values. It uses the same generation count as loads. typedef ScopedHashTable > CallHTType; CallHTType *AvailableCalls; - + /// CurrentGeneration - This is the current generation of the memory value. unsigned CurrentGeneration; - + static char ID; explicit EarlyCSE() : FunctionPass(ID) { initializeEarlyCSEPass(*PassRegistry::getPassRegistry()); @@ -326,7 +326,7 @@ private: }; bool processNode(DomTreeNode *Node); - + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -350,7 +350,7 @@ INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) bool EarlyCSE::processNode(DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); - + // If this block has a single predecessor, then the predecessor is the parent // of the domtree node and all of the live out memory values are still current // in this block. If this block has multiple predecessors, then they could @@ -359,20 +359,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // predecessors. if (BB->getSinglePredecessor() == 0) ++CurrentGeneration; - + /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead /// stores which can occur in bitfield code among other things. StoreInst *LastStore = 0; - + bool Changed = false; // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; - + // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst)) { DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); @@ -381,7 +381,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumSimplify; continue; } - + // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { @@ -392,7 +392,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumSimplify; continue; } - + // If this is a simple instruction that we can value number, process it. if (SimpleValue::canHandle(Inst)) { // See if the instruction has an available value. If so, use it. @@ -404,12 +404,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSE; continue; } - + // Otherwise, just remember that this value is available. AvailableValues->insert(Inst, Inst); continue; } - + // If this is a non-volatile load, process it. if (LoadInst *LI = dyn_cast(Inst)) { // Ignore volatile loads. @@ -417,7 +417,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore = 0; continue; } - + // If we have an available version of this load, and if it is the right // generation, replace this instruction. std::pair InVal = @@ -431,18 +431,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSELoad; continue; } - + // Otherwise, remember that we have this instruction. AvailableLoads->insert(Inst->getOperand(0), std::pair(Inst, CurrentGeneration)); LastStore = 0; continue; } - + // If this instruction may read from memory, forget LastStore. if (Inst->mayReadFromMemory()) LastStore = 0; - + // If this is a read-only call, process it. if (CallValue::canHandle(Inst)) { // If we have an available version of this call, and if it is the right @@ -457,19 +457,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ++NumCSECall; continue; } - + // Otherwise, remember that we have this instruction. AvailableCalls->insert(Inst, std::pair(Inst, CurrentGeneration)); continue; } - + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. if (Inst->mayWriteToMemory()) { ++CurrentGeneration; - + if (StoreInst *SI = dyn_cast(Inst)) { // We do a trivial form of DSE if there are two stores to the same // location with no intervening loads. Delete the earlier store. @@ -483,7 +483,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore = 0; continue; } - + // Okay, we just invalidated anything we knew about loaded values. Try // to salvage *something* by remembering that the stored value is a live // version of the pointer. It is safe to forward from volatile stores @@ -491,7 +491,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // the store. AvailableLoads->insert(SI->getPointerOperand(), std::pair(SI->getValueOperand(), CurrentGeneration)); - + // Remember that this was the last store we saw for DSE. if (SI->isSimple()) LastStore = SI; @@ -509,7 +509,7 @@ bool EarlyCSE::runOnFunction(Function &F) { TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); DT = &getAnalysis(); - + // Tables that the pass uses when walking the domtree. ScopedHTType AVTable; AvailableValues = &AVTable; @@ -517,7 +517,7 @@ bool EarlyCSE::runOnFunction(Function &F) { AvailableLoads = &LoadTable; CallHTType CallTable; AvailableCalls = &CallTable; - + CurrentGeneration = 0; bool Changed = false; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index fb733ad..120175d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -18,8 +18,15 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" @@ -30,20 +37,14 @@ #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; using namespace PatternMatch; @@ -59,6 +60,11 @@ static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); +// Maximum allowed recursion depth. +static cl::opt +MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, + cl::desc("Max recurse depth (default = 1000)")); + //===----------------------------------------------------------------------===// // ValueTable Class //===----------------------------------------------------------------------===// @@ -167,7 +173,7 @@ Expression ValueTable::create_expression(Instruction *I) { if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); } - + if (CmpInst *C = dyn_cast(I)) { // Sort the operand value numbers so xx get the same value number. CmpInst::Predicate Predicate = C->getPredicate(); @@ -181,7 +187,7 @@ Expression ValueTable::create_expression(Instruction *I) { II != IE; ++II) e.varargs.push_back(*II); } - + return e; } @@ -385,7 +391,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) { valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - + Instruction* I = cast(V); Expression exp; switch (I->getOpcode()) { @@ -501,17 +507,17 @@ namespace { const TargetLibraryInfo *TLI; ValueTable VN; - + /// LeaderTable - A mapping from value numbers to lists of Value*'s that /// have that value number. Use findLeader to query it. struct LeaderTableEntry { Value *Val; - BasicBlock *BB; + const BasicBlock *BB; LeaderTableEntry *Next; }; DenseMap LeaderTable; BumpPtrAllocator TableAllocator; - + SmallVector InstrsToErase; public: static char ID; // Pass identification, replacement for typeid @@ -521,14 +527,14 @@ namespace { } bool runOnFunction(Function &F); - + /// markInstructionForDeletion - This removes the specified instruction from /// our various maps and marks it for deletion. void markInstructionForDeletion(Instruction *I) { VN.erase(I); InstrsToErase.push_back(I); } - + const TargetData *getTargetData() const { return TD; } DominatorTree &getDominatorTree() const { return *DT; } AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } @@ -536,32 +542,32 @@ namespace { private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. - void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - + LeaderTableEntry *Node = TableAllocator.Allocate(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; Curr.Next = Node; } - + /// removeFromLeaderTable - Scan the list of values corresponding to a given - /// value number, and remove the given value if encountered. - void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + /// value number, and remove the given instruction if encountered. + void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { LeaderTableEntry* Prev = 0; LeaderTableEntry* Curr = &LeaderTable[N]; - while (Curr->Val != V || Curr->BB != BB) { + while (Curr->Val != I || Curr->BB != BB) { Prev = Curr; Curr = Curr->Next; } - + if (Prev) { Prev->Next = Curr->Next; } else { @@ -591,7 +597,7 @@ namespace { AU.addPreserved(); AU.addPreserved(); } - + // Helper fuctions // FIXME: eliminate or document these better @@ -602,13 +608,13 @@ namespace { void dump(DenseMap &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); - Value *findLeader(BasicBlock *BB, uint32_t num); + Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); unsigned replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root); - bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); + const BasicBlock *Root); + bool propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root); }; char GVN::ID = 0; @@ -647,7 +653,11 @@ void GVN::dump(DenseMap& d) { /// 3) we are speculating for this block and have used that to speculate for /// other blocks. static bool IsValueFullyAvailableInBlock(BasicBlock *BB, - DenseMap &FullyAvailableBlocks) { + DenseMap &FullyAvailableBlocks, + uint32_t RecurseDepth) { + if (RecurseDepth > MaxRecurseDepth) + return false; + // Optimistically assume that the block is fully available and check to see // if we already know about this block in one lookup. std::pair::iterator, char> IV = @@ -673,7 +683,7 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, // If the value isn't fully available in one of our predecessors, then it // isn't fully available in this block either. Undo our previous // optimistic assumption and bail out. - if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks,RecurseDepth+1)) goto SpeculationFailure; return true; @@ -725,15 +735,15 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) return false; - + // The store has to be at least as big as the load. if (TD.getTypeSizeInBits(StoredVal->getType()) < TD.getTypeSizeInBits(LoadTy)) return false; - + return true; } - + /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and /// then a load from a must-aliased pointer of a different type, try to coerce @@ -741,80 +751,80 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, /// InsertPt is the place to insert new instructions. /// /// If we can't do it, return null. -static Value *CoerceAvailableValueToLoadType(Value *StoredVal, +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, Instruction *InsertPt, const TargetData &TD) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; - + // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); - + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); - + // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { // Pointer to Pointer -> use bitcast. if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - + // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - + Type *TypeToCastTo = LoadedTy; if (TypeToCastTo->isPointerTy()) TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); - + if (StoredValTy != TypeToCastTo) StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); - + // Cast to pointer if the load needs a pointer type. if (LoadedTy->isPointerTy()) StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); - + return StoredVal; } - + // If the loaded value is smaller than the available value, then we can // extract out a piece from it. If the available value is too small, then we // can't do anything. assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); - + // Convert source pointers to integers, which can be manipulated. if (StoredValTy->isPointerTy()) { StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); } - + // Convert vectors and fp to integer, which can be manipulated. if (!StoredValTy->isIntegerTy()) { StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); } - + // If this is a big-endian system, we need to shift the value down to the low // bits so that a truncate will work. if (TD.isBigEndian()) { Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); } - + // Truncate the integer to the right size now. Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); - + if (LoadedTy == NewIntTy) return StoredVal; - + // If the result is a pointer, inttoptr. if (LoadedTy->isPointerTy()) return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); - + // Otherwise, bitcast. return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } @@ -835,13 +845,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, // to transform them. We need to be able to bitcast to integer. if (LoadTy->isStructTy() || LoadTy->isArrayTy()) return -1; - + int64_t StoreOffset = 0, LoadOffset = 0; Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; - + // If the load and store are to the exact same address, they should have been // a must alias. AA must have gotten confused. // FIXME: Study to see if/when this happens. One case is forwarding a memset @@ -856,18 +866,18 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, abort(); } #endif - + // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); - + if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. LoadSize >>= 3; - - + + bool isAAFailure = false; if (StoreOffset < LoadOffset) isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; @@ -885,7 +895,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, #endif return -1; } - + // If the Load isn't completely contained within the stored bits, we don't // have all the bits to feed it. We could do something crazy in the future // (issue a smaller load then merge the bits in) but this seems unlikely to be @@ -893,11 +903,11 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, if (StoreOffset > LoadOffset || StoreOffset+StoreSize < LoadOffset+LoadSize) return -1; - + // Okay, we can do this transformation. Return the number of bytes into the // store that the load is. return LoadOffset-StoreOffset; -} +} /// AnalyzeLoadFromClobberingStore - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. @@ -923,23 +933,23 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, // Cannot handle reading from store of first-class aggregate yet. if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - + Value *DepPtr = DepLI->getPointerOperand(); uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); if (R != -1) return R; - + // If we have a load/load clobber an DepLI can be widened to cover this load, // then we should widen it! int64_t LoadOffs = 0; const Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); unsigned LoadSize = TD.getTypeStoreSize(LoadTy); - + unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); if (Size == 0) return -1; - + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); } @@ -958,29 +968,29 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, if (MI->getIntrinsicID() == Intrinsic::memset) return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), MemSizeInBits, TD); - + // If we have a memcpy/memmove, the only case we can handle is if this is a // copy from constant memory. In that case, we can read directly from the // constant memory. MemTransferInst *MTI = cast(MI); - + Constant *Src = dyn_cast(MTI->getSource()); if (Src == 0) return -1; - + GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, &TD)); if (GV == 0 || !GV->isConstant()) return -1; - + // See if the access is within the bounds of the transfer. int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), MemSizeInBits, TD); if (Offset == -1) return Offset; - + // Otherwise, see if we can constant fold a load from the constant with the // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, llvm::Type::getInt8PtrTy(Src->getContext())); - Constant *OffsetCst = + Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); @@ -988,7 +998,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, return Offset; return -1; } - + /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means @@ -999,32 +1009,32 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; - + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); - + // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (SrcVal->getType()->isPointerTy()) SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx)); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8)); - + // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; if (TD.isLittleEndian()) ShiftAmt = Offset*8; else ShiftAmt = (StoreSize-LoadSize-Offset)*8; - + if (ShiftAmt) SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt); - + if (LoadSize != StoreSize) SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8)); - + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } @@ -1051,14 +1061,14 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, NewLoadSize = NextPowerOf2(NewLoadSize); Value *PtrVal = SrcVal->getPointerOperand(); - + // Insert the new load after the old load. This ensures that subsequent // memdep queries will find the new load. We can't easily remove the old // load completely because it is already in the value numbering table. IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); - Type *DestPTy = + Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize*8); - DestPTy = PointerType::get(DestPTy, + DestPTy = PointerType::get(DestPTy, cast(PtrVal->getType())->getAddressSpace()); Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc()); PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); @@ -1068,7 +1078,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); - + // Replace uses of the original load with the wider load. On a big endian // system, we need to shift down to get the relevant bits. Value *RV = NewLoad; @@ -1077,7 +1087,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); RV = Builder.CreateTrunc(RV, SrcVal->getType()); SrcVal->replaceAllUsesWith(RV); - + // We would like to use gvn.markInstructionForDeletion here, but we can't // because the load is already memoized into the leader map table that GVN // tracks. It is potentially possible to remove the load from the table, @@ -1086,7 +1096,7 @@ static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, gvn.getMemDep().removeInstruction(SrcVal); SrcVal = NewLoad; } - + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); } @@ -1100,7 +1110,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); - + // We know that this method is only called when the mem transfer fully // provides the bits for the load. if (MemSetInst *MSI = dyn_cast(SrcInst)) { @@ -1109,9 +1119,9 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, Value *Val = MSI->getValue(); if (LoadSize != 1) Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); - + Value *OneElt = Val; - + // Splat the value out to the right number of bits. for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { // If we can double the number of bytes set, do it. @@ -1121,16 +1131,16 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, NumBytesSet <<= 1; continue; } - + // Otherwise insert one byte at a time. Value *ShVal = Builder.CreateShl(Val, 1*8); Val = Builder.CreateOr(OneElt, ShVal); ++NumBytesSet; } - + return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); } - + // Otherwise, this is a memcpy/memmove from a constant global. MemTransferInst *MTI = cast(SrcInst); Constant *Src = cast(MTI->getSource()); @@ -1139,7 +1149,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, // offset applied as appropriate. Src = ConstantExpr::getBitCast(Src, llvm::Type::getInt8PtrTy(Src->getContext())); - Constant *OffsetCst = + Constant *OffsetCst = ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); Src = ConstantExpr::getGetElementPtr(Src, OffsetCst); Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); @@ -1156,13 +1166,13 @@ struct AvailableValueInBlock { LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; - + /// V - The value that is live out of the block. PointerIntPair Val; - + /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; - + static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset = 0) { AvailableValueInBlock Res; @@ -1182,7 +1192,7 @@ struct AvailableValueInBlock { Res.Offset = Offset; return Res; } - + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, unsigned Offset = 0) { AvailableValueInBlock Res; @@ -1201,17 +1211,17 @@ struct AvailableValueInBlock { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } - + LoadInst *getCoercedLoadValue() const { assert(isCoercedLoadValue() && "Wrong accessor"); return cast(Val.getPointer()); } - + MemIntrinsic *getMemIntrinValue() const { assert(isMemIntrinValue() && "Wrong accessor"); return cast(Val.getPointer()); } - + /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const { @@ -1223,7 +1233,7 @@ struct AvailableValueInBlock { assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1235,7 +1245,7 @@ struct AvailableValueInBlock { } else { Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), gvn); - + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " << *getCoercedLoadValue() << '\n' << *Res << '\n' << "\n\n\n"); @@ -1258,12 +1268,12 @@ struct AvailableValueInBlock { /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. -static Value *ConstructSSAForLoadSet(LoadInst *LI, +static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl &ValuesPerBlock, GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. - if (ValuesPerBlock.size() == 1 && + if (ValuesPerBlock.size() == 1 && gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); @@ -1272,29 +1282,29 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVector NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); SSAUpdate.Initialize(LI->getType(), LI->getName()); - + Type *LoadTy = LI->getType(); - + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { const AvailableValueInBlock &AV = ValuesPerBlock[i]; BasicBlock *BB = AV.BB; - + if (SSAUpdate.HasValueForBlock(BB)) continue; SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } - + // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); - + // If new PHI nodes were created, notify alias analysis. if (V->getType()->isPointerTy()) { AliasAnalysis *AA = gvn.getAliasAnalysis(); - + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); - + // Now that we've copied information to the new PHIs, scan through // them again and inform alias analysis that we've added potentially // escaping uses to any values that are operands to these PHIs. @@ -1366,7 +1376,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // the pointer operand of the load if PHI translation occurs. Make sure // to consider the right address. Value *Address = Deps[i].getAddress(); - + // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. @@ -1382,7 +1392,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { } } } - + // Check to see if we have something like this: // load i32* P // load i8* (P+1) @@ -1394,7 +1404,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), LI->getPointerOperand(), DepLI, *TD); - + if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, Offset)); @@ -1413,10 +1423,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, Offset)); continue; - } + } } } - + UnavailableBlocks.push_back(DepBB); continue; } @@ -1426,14 +1436,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa(DepInst) || isMalloc(DepInst) || + if (isa(DepInst) || isMallocLikeFn(DepInst) || // Loading immediately after lifetime begin -> undef. isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, UndefValue::get(LI->getType()))); continue; } - + if (StoreInst *S = dyn_cast(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. @@ -1451,7 +1461,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { S->getValueOperand())); continue; } - + if (LoadInst *LD = dyn_cast(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { @@ -1460,12 +1470,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ UnavailableBlocks.push_back(DepBB); continue; - } + } } ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } - + UnavailableBlocks.push_back(DepBB); continue; } @@ -1479,7 +1489,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - + // Perform PHI construction. Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); @@ -1522,10 +1532,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return false; if (Blockers.count(TmpBB)) return false; - + // If any of these blocks has more than one successor (i.e. if the edge we - // just traversed was critical), then there are other paths through this - // block along which the load may not be anticipated. Hoisting the load + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load // above this block would be adding the load to execution paths along // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) @@ -1570,7 +1580,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *Pred = *PI; - if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { continue; } PredLoads[Pred] = 0; @@ -1603,7 +1613,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - + // If this load is unavailable in multiple predecessors, reject it. // FIXME: If we could restructure the CFG, we could make a common pred with // all the preds that don't have an available LI and insert a new load into @@ -1680,10 +1690,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " << *NewInsts.back() << '\n'); - + // Assign value numbers to the new instructions. for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { - // FIXME: We really _ought_ to insert these value numbers into their + // FIXME: We really _ought_ to insert these value numbers into their // parent's availability map. However, in doing so, we risk getting into // ordering issues. If a block hasn't been processed yet, we would be // marking a value as AVAIL-IN, which isn't what we intend. @@ -1725,6 +1735,53 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } +static void patchReplacementInstruction(Value *Repl, Instruction *I) { + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + BinaryOperator *Op = dyn_cast(I); + BinaryOperator *ReplOp = dyn_cast(Repl); + if (Op && ReplOp && isa(Op) && + isa(ReplOp)) { + if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) + ReplOp->setHasNoSignedWrap(false); + if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) + ReplOp->setHasNoUnsignedWrap(false); + } + if (Instruction *ReplInst = dyn_cast(Repl)) { + SmallVector, 4> Metadata; + ReplInst->getAllMetadataOtherThanDebugLoc(Metadata); + for (int i = 0, n = Metadata.size(); i < n; ++i) { + unsigned Kind = Metadata[i].first; + MDNode *IMD = I->getMetadata(Kind); + MDNode *ReplMD = Metadata[i].second; + switch(Kind) { + default: + ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata + break; + case LLVMContext::MD_dbg: + llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); + case LLVMContext::MD_tbaa: + ReplInst->setMetadata(Kind, MDNode::getMostGenericTBAA(IMD, ReplMD)); + break; + case LLVMContext::MD_range: + ReplInst->setMetadata(Kind, MDNode::getMostGenericRange(IMD, ReplMD)); + break; + case LLVMContext::MD_prof: + llvm_unreachable("MD_prof in a non terminator instruction"); + break; + case LLVMContext::MD_fpmath: + ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD)); + break; + } + } + } +} + +static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { + patchReplacementInstruction(Repl, I); + I->replaceAllUsesWith(Repl); +} + /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L) { @@ -1738,7 +1795,7 @@ bool GVN::processLoad(LoadInst *L) { markInstructionForDeletion(L); return true; } - + // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); @@ -1764,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L) { AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, L->getType(), L, *TD); } - + // Check to see if we have something like this: // load i32* P // load i8* (P+1) @@ -1774,14 +1831,14 @@ bool GVN::processLoad(LoadInst *L) { // we have the first instruction in the entry block. if (DepLI == L) return false; - + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), L->getPointerOperand(), DepLI, *TD); if (Offset != -1) AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); } - + // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { @@ -1791,11 +1848,11 @@ bool GVN::processLoad(LoadInst *L) { if (Offset != -1) AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } - + if (AvailVal) { DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' << *AvailVal << '\n' << *L << "\n\n\n"); - + // Replace the load! L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) @@ -1805,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L) { return true; } } - + // If the value isn't available, don't do anything! if (Dep.isClobber()) { DEBUG( @@ -1835,7 +1892,7 @@ bool GVN::processLoad(LoadInst *L) { Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { Value *StoredVal = DepSI->getValueOperand(); - + // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). @@ -1845,11 +1902,11 @@ bool GVN::processLoad(LoadInst *L) { L, *TD); if (StoredVal == 0) return false; - + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal << '\n' << *L << "\n\n\n"); } - else + else return false; } @@ -1864,7 +1921,7 @@ bool GVN::processLoad(LoadInst *L) { if (LoadInst *DepLI = dyn_cast(DepInst)) { Value *AvailableVal = DepLI; - + // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). @@ -1874,16 +1931,16 @@ bool GVN::processLoad(LoadInst *L) { L, *TD); if (AvailableVal == 0) return false; - + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal << "\n" << *L << "\n\n\n"); } - else + else return false; } - + // Remove it! - L->replaceAllUsesWith(AvailableVal); + patchAndReplaceAllUsesWith(AvailableVal, L); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -1894,13 +1951,13 @@ bool GVN::processLoad(LoadInst *L) { // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa(DepInst) || isMalloc(DepInst)) { + if (isa(DepInst) || isMallocLikeFn(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. if (IntrinsicInst *II = dyn_cast(DepInst)) { @@ -1915,28 +1972,28 @@ bool GVN::processLoad(LoadInst *L) { return false; } -// findLeader - In order to find a leader for a given value number at a +// findLeader - In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, -// and then scan the list to find one whose block dominates the block in +// and then scan the list to find one whose block dominates the block in // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. -Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { +Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; if (!Vals.Val) return 0; - + Value *Val = 0; if (DT->dominates(Vals.BB, BB)) { Val = Vals.Val; if (isa(Val)) return Val; } - + LeaderTableEntry* Next = Vals.Next; while (Next) { if (DT->dominates(Next->BB, BB)) { if (isa(Next->Val)) return Next->Val; if (!Val) Val = Next->Val; } - + Next = Next->Next; } @@ -1947,7 +2004,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// use is dominated by the given basic block. Returns the number of uses that /// were replaced. unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root) { + const BasicBlock *Root) { unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE; ) { @@ -1973,7 +2030,7 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, /// propagateEquality - The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { +bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) { SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; @@ -2012,9 +2069,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { DT->properlyDominates(cast(RHS)->getParent(), Root)) && "Instruction doesn't dominate scope!"); - // If value numbering later deduces that an instruction in the scope is equal - // to 'LHS' then ensure it will be turned into 'RHS'. - addToLeaderTable(LVN, RHS, Root); + // If value numbering later sees that an instruction in the scope is equal + // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve + // the invariant that instructions only occur in the leader table for their + // own value number (this is used by removeFromLeaderTable), do not do this + // if RHS is an instruction (if an instruction in the scope is morphed into + // LHS then it will be turned into RHS by the next GVN iteration anyway, so + // using the leader table is about compiling faster, not optimizing better). + if (!isa(RHS)) + addToLeaderTable(LVN, RHS, Root); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2180,7 +2243,7 @@ bool GVN::processInstruction(Instruction *I) { // Instructions with void type don't return a value, so there's // no point in trying to find redundancies in them. if (I->getType()->isVoidTy()) return false; - + uint32_t NextNum = VN.getNextUnusedValueNumber(); unsigned Num = VN.lookup_or_add(I); @@ -2198,7 +2261,7 @@ bool GVN::processInstruction(Instruction *I) { addToLeaderTable(Num, I, I->getParent()); return false; } - + // Perform fast-path value-number based elimination of values inherited from // dominators. Value *repl = findLeader(I->getParent(), Num); @@ -2207,9 +2270,9 @@ bool GVN::processInstruction(Instruction *I) { addToLeaderTable(Num, I, I->getParent()); return false; } - + // Remove it! - I->replaceAllUsesWith(repl); + patchAndReplaceAllUsesWith(repl, I); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); @@ -2234,7 +2297,7 @@ bool GVN::runOnFunction(Function& F) { // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { BasicBlock *BB = FI++; - + bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) ++NumGVNBlocks; @@ -2391,7 +2454,7 @@ bool GVN::performPRE(Function &F) { // we would need to insert instructions in more than one pred. if (NumWithout != 1 || NumWith == 0) continue; - + // Don't do PRE across indirect branch. if (isa(PREPred->getTerminator())) continue; @@ -2467,7 +2530,7 @@ bool GVN::performPRE(Function &F) { unsigned jj = PHINode::getOperandNumForIncomingValue(ii); VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj)); } - + if (MD) MD->invalidateCachedPointerInfo(Phi); } @@ -2504,7 +2567,7 @@ bool GVN::splitCriticalEdges() { /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2539,7 +2602,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const { I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) { const LeaderTableEntry *Node = &I->second; assert(Node->Val != Inst && "Inst still in value numbering scope!"); - + while (Node->Next) { Node = Node->Next; assert(Node->Val != Inst && "Inst still in value numbering scope!"); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index c2bd6e6..b36a3cb 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -12,7 +12,7 @@ // global). Such a transformation can significantly reduce the register pressure // when many globals are involved. // -// For example, consider the code which touches several global variables at +// For example, consider the code which touches several global variables at // once: // // static int foo[N], bar[N], baz[N]; @@ -208,8 +208,8 @@ bool GlobalMerge::doInitialization(Module &M) { if (BSSGlobals.size() > 1) Changed |= doMerge(BSSGlobals, M, false); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore // them in the future. // if (ConstGlobals.size() > 1) // Changed |= doMerge(ConstGlobals, M, true); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index a9ba657..37f8bdf 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1215,21 +1215,26 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { return 0; } -/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show -/// that the current exit test is already sufficiently canonical. -static bool needsLFTR(Loop *L, DominatorTree *DT) { +/// Return the compare guarding the loop latch, or NULL for unrecognized tests. +static ICmpInst *getLoopTest(Loop *L) { assert(L->getExitingBlock() && "expected loop exit"); BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return false; + return 0; BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); + return dyn_cast(BI->getCondition()); +} + +/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show +/// that the current exit test is already sufficiently canonical. +static bool needsLFTR(Loop *L, DominatorTree *DT) { // Do LFTR to simplify the exit condition to an ICMP. - ICmpInst *Cond = dyn_cast(BI->getCondition()); + ICmpInst *Cond = getLoopTest(L); if (!Cond) return true; @@ -1259,6 +1264,48 @@ static bool needsLFTR(Loop *L, DominatorTree *DT) { return Phi != getLoopPhiForCounter(IncV, L, DT); } +/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils +/// down to checking that all operands are constant and listing instructions +/// that may hide undef. +static bool hasConcreteDefImpl(Value *V, SmallPtrSet &Visited, + unsigned Depth) { + if (isa(V)) + return !isa(V); + + if (Depth >= 6) + return false; + + // Conservatively handle non-constant non-instructions. For example, Arguments + // may be undef. + Instruction *I = dyn_cast(V); + if (!I) + return false; + + // Load and return values may be undef. + if(I->mayReadFromMemory() || isa(I) || isa(I)) + return false; + + // Optimistically handle other instructions. + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + if (!Visited.insert(*OI)) + continue; + if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) + return false; + } + return true; +} + +/// Return true if the given value is concrete. We must prove that undef can +/// never reach it. +/// +/// TODO: If we decide that this is a good approach to checking for undef, we +/// may factor it into a common location. +static bool hasConcreteDef(Value *V) { + SmallPtrSet Visited; + Visited.insert(V); + return hasConcreteDefImpl(V, Visited, 0); +} + /// AlmostDeadIV - Return true if this IV has any uses other than the (soon to /// be rewritten) loop exit test. static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { @@ -1283,6 +1330,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// valid count without scaling the address stride, so it remains a pointer /// expression as far as SCEV is concerned. /// +/// Currently only valid for LFTR. See the comments on hasConcreteDef below. +/// /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount /// /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. @@ -1331,6 +1380,19 @@ FindLoopCounter(Loop *L, const SCEV *BECount, if (getLoopPhiForCounter(IncV, L, DT) != Phi) continue; + // Avoid reusing a potentially undef value to compute other values that may + // have originally had a concrete definition. + if (!hasConcreteDef(Phi)) { + // We explicitly allow unknown phis as long as they are already used by + // the loop test. In this case we assume that performing LFTR could not + // increase the number of undef users. + if (ICmpInst *Cond = getLoopTest(L)) { + if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT) + && Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) { + continue; + } + } + } const SCEV *Init = AR->getStart(); if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { @@ -1347,7 +1409,7 @@ FindLoopCounter(Loop *L, const SCEV *BECount, // If two IVs both count from zero or both count from nonzero then the // narrower is likely a dead phi that has been widened. Use the wider phi // to allow the other to be eliminated. - if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) + else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) continue; } BestPhi = Phi; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 429b61b..dd42c59 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -670,6 +670,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { } else if (SwitchInst *SI = dyn_cast(Terminator)) { Condition = SI->getCondition(); } else if (IndirectBrInst *IB = dyn_cast(Terminator)) { + // Can't thread indirect branch with no successors. + if (IB->getNumSuccessors() == 0) return false; Condition = IB->getAddress()->stripPointerCasts(); Preference = WantBlockAddress; } else { @@ -859,7 +861,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If all of the loads and stores that feed the value have the same TBAA tag, // then we can propagate it onto any newly inserted loads. - MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); + MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); SmallPtrSet PredsScanned; typedef SmallVector, 8> AvailablePredsTy; @@ -885,7 +887,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { OneUnavailablePred = PredBB; continue; } - + // If tbaa tags disagree or are not present, forget about them. if (TBAATag != ThisTBAATag) TBAATag = 0; @@ -949,7 +951,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { NewVal->setDebugLoc(LI->getDebugLoc()); if (TBAATag) NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag); - + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 8795cd8..0192e92 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -175,7 +175,9 @@ namespace { bool canSinkOrHoistInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - void PromoteAliasSet(AliasSet &AS); + void PromoteAliasSet(AliasSet &AS, + SmallVectorImpl &ExitBlocks, + SmallVectorImpl &InsertPts); }; } @@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + SmallVector ExitBlocks; + SmallVector InsertPts; + // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I); + PromoteAliasSet(*I, ExitBlocks, InsertPts); } // Clear out loops state information for the next iteration @@ -618,6 +623,11 @@ bool LICM::isGuaranteedToExecute(Instruction &Inst) { if (!DT->dominates(Inst.getParent(), ExitBlocks[i])) return false; + // As a degenerate case, if the loop is statically infinite then we haven't + // proven anything since there are no exit blocks. + if (ExitBlocks.empty()) + return false; + return true; } @@ -626,6 +636,7 @@ namespace { Value *SomePtr; // Designated pointer to store to. SmallPtrSet &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; + SmallVectorImpl &LoopInsertPts; AliasSetTracker &AST; DebugLoc DL; int Alignment; @@ -633,11 +644,12 @@ namespace { LoopPromoter(Value *SP, const SmallVectorImpl &Insts, SSAUpdater &S, SmallPtrSet &PMA, - SmallVectorImpl &LEB, AliasSetTracker &ast, - DebugLoc dl, int alignment) + SmallVectorImpl &LEB, + SmallVectorImpl &LIP, + AliasSetTracker &ast, DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), - Alignment(alignment) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), + AST(ast), DL(dl), Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl &) const { @@ -657,7 +669,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -679,7 +691,9 @@ namespace { /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// -void LICM::PromoteAliasSet(AliasSet &AS) { +void LICM::PromoteAliasSet(AliasSet &AS, + SmallVectorImpl &ExitBlocks, + SmallVectorImpl &InsertPts) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -789,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - SmallVector ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.resize(ExitBlocks.size()); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + } // We use the SSAUpdater interface to insert phi nodes as required. SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index f7f3298..3771f5a 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -32,10 +32,10 @@ namespace { LoopDeletion() : LoopPass(ID) { initializeLoopDeletionPass(*PassRegistry::getPassRegistry()); } - + // Possibly eliminate loop L if it is dead. bool runOnLoop(Loop* L, LPPassManager& LPM); - + bool IsLoopDead(Loop* L, SmallVector& exitingBlocks, SmallVector& exitBlocks, bool &Changed, BasicBlock *Preheader); @@ -46,7 +46,7 @@ namespace { AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -55,7 +55,7 @@ namespace { } }; } - + char LoopDeletion::ID = 0; INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) @@ -79,7 +79,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, SmallVector& exitBlocks, bool &Changed, BasicBlock *Preheader) { BasicBlock* exitBlock = exitBlocks[0]; - + // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop // must pass through a PHI in the exit block, meaning that this check is @@ -97,14 +97,14 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - + if (Instruction* I = dyn_cast(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; ++BI; } - + // Make sure that no instructions in the block have potential side-effects. // This includes instructions that could write to memory, and loads that are // marked volatile. This could be made more aggressive by using aliasing @@ -117,23 +117,23 @@ bool LoopDeletion::IsLoopDead(Loop* L, return false; } } - + return true; } /// runOnLoop - Remove dead loops, by which we mean loops that do not impact the -/// observable behavior of the program other than finite running time. Note +/// observable behavior of the program other than finite running time. Note /// we do ensure that this never remove a loop that might be infinite, as doing /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { - // We can only remove the loop if there is a preheader that we can + // We can only remove the loop if there is a preheader that we can // branch from after removing it. BasicBlock* preheader = L->getLoopPreheader(); if (!preheader) return false; - + // If LoopSimplify form is not available, stay out of trouble. if (!L->hasDedicatedExits()) return false; @@ -142,36 +142,36 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // they would already have been removed in earlier executions of this pass. if (L->begin() != L->end()) return false; - + SmallVector exitingBlocks; L->getExitingBlocks(exitingBlocks); - + SmallVector exitBlocks; L->getUniqueExitBlocks(exitBlocks); - + // We require that the loop only have a single exit block. Otherwise, we'd // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. if (exitBlocks.size() != 1) return false; - + // Finally, we have to check that the loop really is dead. bool Changed = false; if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; - + // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa(S)) return Changed; - + // Now that we know the removal is safe, remove the loop by changing the - // branch from the preheader to go to the single exit block. + // branch from the preheader to go to the single exit block. BasicBlock* exitBlock = exitBlocks[0]; - + // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. @@ -197,7 +197,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { P->removeIncomingValue(exitingBlocks[i]); ++BI; } - + // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. DominatorTree& DT = getAnalysis(); @@ -211,7 +211,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { DE = ChildNodes.end(); DI != DE; ++DI) { DT.changeImmediateDominator(*DI, DT[preheader]); } - + ChildNodes.clear(); DT.eraseNode(*LI); @@ -219,7 +219,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // delete it freely later. (*LI)->dropAllReferences(); } - + // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its @@ -236,13 +236,13 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { for (SmallPtrSet::iterator I = blocks.begin(), E = blocks.end(); I != E; ++I) loopInfo.removeBlock(*I); - + // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); Changed = true; - + ++NumDeleted; - + return Changed; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index ad15cbb..ac1082c 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -43,20 +43,20 @@ #define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); @@ -173,7 +173,7 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - // Disable loop idiom recognition if the function's name is a common idiom. + // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); if (Name == "memset" || Name == "memcpy") return false; diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index f0f05e6..982400c 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -48,7 +48,7 @@ namespace { } }; } - + char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", "Simplify instructions in loops", false, false) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 59aace9..7eeb152 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -418,12 +418,13 @@ bool LoopRotate::rotateLoop(Loop *L) { } // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and - // thus is not a preheader anymore. Split the edge to form a real preheader. + // thus is not a preheader anymore. + // Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this); NewPH->setName(NewHeader->getName() + ".lr.ph"); - // Preserve canonical loop form, which means that 'Exit' should have only one - // predecessor. + // Preserve canonical loop form, which means that 'Exit' should have only + // one predecessor. BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this); ExitSplit->moveBefore(Exit); } else { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b085b00..b14a713 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1308,8 +1308,8 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0; case LSRUse::Special: - // Only handle -1 scales, or no scale. - return AM.Scale == 0 || AM.Scale == -1; + // Special case Basic to handle -1 scales. + return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0; } llvm_unreachable("Invalid LSRUse Kind!"); @@ -1439,7 +1439,41 @@ struct IVInc { // IVChain - The list of IV increments in program order. // We typically add the head of a chain without finding subsequent links. -typedef SmallVector IVChain; +struct IVChain { + SmallVector Incs; + const SCEV *ExprBase; + + IVChain() : ExprBase(0) {} + + IVChain(const IVInc &Head, const SCEV *Base) + : Incs(1, Head), ExprBase(Base) {} + + typedef SmallVectorImpl::const_iterator const_iterator; + + // begin - return the first increment in the chain. + const_iterator begin() const { + assert(!Incs.empty()); + return llvm::next(Incs.begin()); + } + const_iterator end() const { + return Incs.end(); + } + + // hasIncs - Returns true if this chain contains any increments. + bool hasIncs() const { return Incs.size() >= 2; } + + // add - Add an IVInc to the end of this chain. + void add(const IVInc &X) { Incs.push_back(X); } + + // tailUserInst - Returns the last UserInst in the chain. + Instruction *tailUserInst() const { return Incs.back().UserInst; } + + // isProfitableIncrement - Returns true if IncExpr can be profitably added to + // this chain. + bool isProfitableIncrement(const SCEV *OperExpr, + const SCEV *IncExpr, + ScalarEvolution&); +}; /// ChainUsers - Helper for CollectChains to track multiple IV increment uses. /// Distinguish between FarUsers that definitely cross IV increments and @@ -2160,7 +2194,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, return &LU; // This is the formula where all the registers and symbols matched; // there aren't going to be any others. Since we declined it, we - // can skip the rest of the formulae and procede to the next LSRUse. + // can skip the rest of the formulae and proceed to the next LSRUse. break; } } @@ -2319,41 +2353,23 @@ static const SCEV *getExprBase(const SCEV *S) { /// increment will be an offset relative to the same base. We allow such offsets /// to potentially be used as chain increment as long as it's not obviously /// expensive to expand using real instructions. -static const SCEV * -getProfitableChainIncrement(Value *NextIV, Value *PrevIV, - const IVChain &Chain, Loop *L, - ScalarEvolution &SE, const TargetLowering *TLI) { - // Prune the solution space aggressively by checking that both IV operands - // are expressions that operate on the same unscaled SCEVUnknown. This - // "base" will be canceled by the subsequent getMinusSCEV call. Checking first - // avoids creating extra SCEV expressions. - const SCEV *OperExpr = SE.getSCEV(NextIV); - const SCEV *PrevExpr = SE.getSCEV(PrevIV); - if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain) - return 0; - - const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); - if (!SE.isLoopInvariant(IncExpr, L)) - return 0; - - // We are not able to expand an increment unless it is loop invariant, - // however, the following checks are purely for profitability. +bool IVChain::isProfitableIncrement(const SCEV *OperExpr, + const SCEV *IncExpr, + ScalarEvolution &SE) { + // Aggressively form chains when -stress-ivchain. if (StressIVChain) - return IncExpr; + return true; // Do not replace a constant offset from IV head with a nonconstant IV // increment. if (!isa(IncExpr)) { - const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand)); + const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); if (isa(SE.getMinusSCEV(OperExpr, HeadExpr))) return 0; } SmallPtrSet Processed; - if (isHighCostExpansion(IncExpr, Processed, SE)) - return 0; - - return IncExpr; + return !isHighCostExpansion(IncExpr, Processed, SE); } /// Return true if the number of registers needed for the chain is estimated to @@ -2372,18 +2388,18 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, if (StressIVChain) return true; - if (Chain.size() <= 2) + if (!Chain.hasIncs()) return false; if (!Users.empty()) { - DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n"; + DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; for (SmallPtrSet::const_iterator I = Users.begin(), E = Users.end(); I != E; ++I) { dbgs() << " " << **I << "\n"; }); return false; } - assert(!Chain.empty() && "empty IV chains are not allowed"); + assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); // The chain itself may require a register, so intialize cost to 1. int cost = 1; @@ -2391,15 +2407,15 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, // A complete chain likely eliminates the need for keeping the original IV in // a register. LSR does not currently know how to form a complete chain unless // the header phi already exists. - if (isa(Chain.back().UserInst) - && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) { + if (isa(Chain.tailUserInst()) + && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { --cost; } const SCEV *LastIncExpr = 0; unsigned NumConstIncrements = 0; unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; - for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); I != E; ++I) { if (I->IncExpr->isZero()) @@ -2435,7 +2451,8 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, // the stride. cost -= NumReusedIncrements; - DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n"); + DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost + << "\n"); return cost < 0; } @@ -2446,25 +2463,39 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, SmallVectorImpl &ChainUsersVec) { // When IVs are used as types of varying widths, they are generally converted // to a wider type with some uses remaining narrow under a (free) trunc. - Value *NextIV = getWideOperand(IVOper); + Value *const NextIV = getWideOperand(IVOper); + const SCEV *const OperExpr = SE.getSCEV(NextIV); + const SCEV *const OperExprBase = getExprBase(OperExpr); // Visit all existing chains. Check if its IVOper can be computed as a // profitable loop invariant increment from the last link in the Chain. unsigned ChainIdx = 0, NChains = IVChainVec.size(); const SCEV *LastIncExpr = 0; for (; ChainIdx < NChains; ++ChainIdx) { - Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand); + IVChain &Chain = IVChainVec[ChainIdx]; + + // Prune the solution space aggressively by checking that both IV operands + // are expressions that operate on the same unscaled SCEVUnknown. This + // "base" will be canceled by the subsequent getMinusSCEV call. Checking + // first avoids creating extra SCEV expressions. + if (!StressIVChain && Chain.ExprBase != OperExprBase) + continue; + + Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); if (!isCompatibleIVType(PrevIV, NextIV)) continue; // A phi node terminates a chain. - if (isa(UserInst) - && isa(IVChainVec[ChainIdx].back().UserInst)) + if (isa(UserInst) && isa(Chain.tailUserInst())) + continue; + + // The increment must be loop-invariant so it can be kept in a register. + const SCEV *PrevExpr = SE.getSCEV(PrevIV); + const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); + if (!SE.isLoopInvariant(IncExpr, L)) continue; - if (const SCEV *IncExpr = - getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx], - L, SE, TLI)) { + if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { LastIncExpr = IncExpr; break; } @@ -2478,24 +2509,24 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, DEBUG(dbgs() << "IV Chain Limit\n"); return; } - LastIncExpr = SE.getSCEV(NextIV); + LastIncExpr = OperExpr; // IVUsers may have skipped over sign/zero extensions. We don't currently // attempt to form chains involving extensions unless they can be hoisted // into this loop's AddRec. if (!isa(LastIncExpr)) return; ++NChains; - IVChainVec.resize(NChains); + IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), + OperExprBase)); ChainUsersVec.resize(NChains); - DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr - << "\n"); + DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst + << ") IV=" << *LastIncExpr << "\n"); + } else { + DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst + << ") IV+" << *LastIncExpr << "\n"); + // Add this IV user to the end of the chain. + IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); } - else - DEBUG(dbgs() << "IV Inc: (" << *UserInst << ") IV+" << *LastIncExpr - << "\n"); - - // Add this IV user to the end of the chain. - IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr)); SmallPtrSet &NearUsers = ChainUsersVec[ChainIdx].NearUsers; // This chain's NearUsers become FarUsers. @@ -2551,6 +2582,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, /// loop latch. This will discover chains on side paths, but requires /// maintaining multiple copies of the Chains state. void LSRInstance::CollectChains() { + DEBUG(dbgs() << "Collecting IV Chains.\n"); SmallVector ChainUsersVec; SmallVector LatchPath; @@ -2622,10 +2654,10 @@ void LSRInstance::CollectChains() { } void LSRInstance::FinalizeChain(IVChain &Chain) { - assert(!Chain.empty() && "empty IV chains are not allowed"); - DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n"); + assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); + DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); - for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end(); + for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); I != E; ++I) { DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n"); User::op_iterator UseI = @@ -2659,7 +2691,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. - const IVInc &Head = Chain[0]; + const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); @@ -2691,7 +2723,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); const SCEV *LeftOverExpr = 0; - for (IVChain::const_iterator IncI = llvm::next(Chain.begin()), + for (IVChain::const_iterator IncI = Chain.begin(), IncE = Chain.end(); IncI != IncE; ++IncI) { Instruction *InsertPt = IncI->UserInst; @@ -2736,7 +2768,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, } // If LSR created a new, wider phi, we may also replace its postinc. We only // do this if we also found a wide value for the head of the chain. - if (isa(Chain.back().UserInst)) { + if (isa(Chain.tailUserInst())) { for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast(I); ++I) { if (!isCompatibleIVType(Phi, IVSrc)) @@ -2804,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // x == y --> x - y == 0 const SCEV *N = SE.getSCEV(NV); - if (SE.isLoopInvariant(N, L)) { + if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. N = TransformForPostIncUse(Normalize, N, CI, 0, @@ -2974,42 +3006,64 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// CollectSubexprs - Split S into subexpressions which can be pulled out into /// separate registers. If C is non-null, multiply each subexpression by C. -static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, - SmallVectorImpl &Ops, - const Loop *L, - ScalarEvolution &SE) { +/// +/// Return remainder expression after factoring the subexpressions captured by +/// Ops. If Ops is complete, return NULL. +static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, + SmallVectorImpl &Ops, + const Loop *L, + ScalarEvolution &SE, + unsigned Depth = 0) { + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return S; + if (const SCEVAddExpr *Add = dyn_cast(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - CollectSubexprs(*I, C, Ops, L, SE); - return; + I != E; ++I) { + const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1); + if (Remainder) + Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); + } + return NULL; } else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { // Split a non-zero base out of an addrec. - if (!AR->getStart()->isZero()) { - CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), - AR->getStepRecurrence(SE), - AR->getLoop(), - //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) - SCEV::FlagAnyWrap), - C, Ops, L, SE); - CollectSubexprs(AR->getStart(), C, Ops, L, SE); - return; + if (AR->getStart()->isZero()) + return S; + + const SCEV *Remainder = CollectSubexprs(AR->getStart(), + C, Ops, L, SE, Depth+1); + // Split the non-zero AddRec unless it is part of a nested recurrence that + // does not pertain to this loop. + if (Remainder && (AR->getLoop() == L || !isa(Remainder))) { + Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); + Remainder = NULL; + } + if (Remainder != AR->getStart()) { + if (!Remainder) + Remainder = SE.getConstant(AR->getType(), 0); + return SE.getAddRecExpr(Remainder, + AR->getStepRecurrence(SE), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); } } else if (const SCEVMulExpr *Mul = dyn_cast(S)) { // Break (C * (a + b + c)) into C*a + C*b + C*c. - if (Mul->getNumOperands() == 2) - if (const SCEVConstant *Op0 = - dyn_cast(Mul->getOperand(0))) { - CollectSubexprs(Mul->getOperand(1), - C ? cast(SE.getMulExpr(C, Op0)) : Op0, - Ops, L, SE); - return; - } + if (Mul->getNumOperands() != 2) + return S; + if (const SCEVConstant *Op0 = + dyn_cast(Mul->getOperand(0))) { + C = C ? cast(SE.getMulExpr(C, Op0)) : Op0; + const SCEV *Remainder = + CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); + if (Remainder) + Ops.push_back(SE.getMulExpr(C, Remainder)); + return NULL; + } } - - // Otherwise use the value itself, optionally with a scale applied. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + return S; } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -3024,7 +3078,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, const SCEV *BaseReg = Base.BaseRegs[i]; SmallVector AddOps; - CollectSubexprs(BaseReg, 0, AddOps, L, SE); + const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); if (AddOps.size() == 1) continue; @@ -4236,13 +4292,6 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); } - // Flush the operand list to suppress SCEVExpander hoisting. - if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); - Ops.clear(); - Ops.push_back(SE.getUnknown(FullV)); - } - // Expand the ScaledReg portion. Value *ICmpScaledV = 0; if (F.AM.Scale != 0) { @@ -4264,23 +4313,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. + + // Flush the operand list to suppress SCEVExpander hoisting address modes. + if (!Ops.empty() && LU.Kind == LSRUse::Address) { + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.AM.Scale)); Ops.push_back(ScaledS); - - // Flush the operand list to suppress SCEVExpander hoisting. - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); - Ops.clear(); - Ops.push_back(SE.getUnknown(FullV)); } } // Expand the GV portion. if (F.AM.BaseGV) { + // Flush the operand list to suppress SCEVExpander hoisting. + if (!Ops.empty()) { + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); + Ops.clear(); + Ops.push_back(SE.getUnknown(FullV)); + } Ops.push_back(SE.getUnknown(F.AM.BaseGV)); + } - // Flush the operand list to suppress SCEVExpander hoisting. + // Flush the operand list to suppress SCEVExpander hoisting of both folded and + // unfolded offsets. LSR assumes they both live next to their uses. + if (!Ops.empty()) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); @@ -4485,7 +4545,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl &Solution, // Mark phi nodes that terminate chains so the expander tries to reuse them. for (SmallVectorImpl::const_iterator ChainI = IVChainVec.begin(), ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { - if (PHINode *PN = dyn_cast(ChainI->back().UserInst)) + if (PHINode *PN = dyn_cast(ChainI->tailUserInst())) Rewriter.setChainedPhi(PN); } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 930980f..58f7739 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1214,8 +1214,8 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be - // 'false'. - if (Value *V = SimplifyInstruction(I, 0, 0, DT)) + // 'false'. TODO: update the domtree properly so we can pass it here. + if (Value *V = SimplifyInstruction(I)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 689bbe9..7419a65 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -15,9 +15,9 @@ #define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { @@ -25,12 +25,12 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Ptr = CXI->getPointerOperand(); Value *Cmp = CXI->getCompareOperand(); Value *Val = CXI->getNewValOperand(); - + LoadInst *Orig = Builder.CreateLoad(Ptr); Value *Equal = Builder.CreateICmpEQ(Orig, Cmp); Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - + CXI->replaceAllUsesWith(Orig); CXI->eraseFromParent(); return true; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a87cce3..2a5ee33 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -15,21 +15,21 @@ #define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" #include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -44,7 +44,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, gep_type_iterator GTI = gep_type_begin(GEP); for (unsigned i = 1; i != Idx; ++i, ++GTI) /*skip along*/; - + // Compute the offset implied by the rest of the indices. int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { @@ -58,7 +58,7 @@ static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx, Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); continue; } - + // Otherwise, we have a sequential type like an array or vector. Multiply // the index by the ElementSize. uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); @@ -77,7 +77,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, Ptr2 = Ptr2->stripPointerCasts(); GetElementPtrInst *GEP1 = dyn_cast(Ptr1); GetElementPtrInst *GEP2 = dyn_cast(Ptr2); - + bool VariableIdxFound = false; // If one pointer is a GEP and the other isn't, then see if the GEP is a @@ -91,7 +91,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } - + // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical // base. After that base, they may have some number of common (and // potentially variable) indices. After that they handle some constant @@ -99,7 +99,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // handle no other case. if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) return false; - + // Skip any common indices and track the GEP types. unsigned Idx = 1; for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) @@ -109,7 +109,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); if (VariableIdxFound) return false; - + Offset = Offset2-Offset1; return true; } @@ -128,19 +128,19 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, namespace { struct MemsetRange { // Start/End - A semi range that describes the span that this range covers. - // The range is closed at the start and open at the end: [Start, End). + // The range is closed at the start and open at the end: [Start, End). int64_t Start, End; /// StartPtr - The getelementptr instruction that points to the start of the /// range. Value *StartPtr; - + /// Alignment - The known alignment of the first store. unsigned Alignment; - + /// TheStores - The actual stores that make up this range. SmallVector TheStores; - + bool isProfitableToUseMemset(const TargetData &TD) const; }; @@ -152,17 +152,17 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; - + // If any of the stores are a memset, then it is always good to extend the // memset. for (unsigned i = 0, e = TheStores.size(); i != e; ++i) if (!isa(TheStores[i])) return true; - + // Assume that the code generator is capable of merging pairs of stores // together if it wants to. if (TheStores.size() == 2) return false; - + // If we have fewer than 8 stores, it can still be worthwhile to do this. // For example, merging 4 i8 stores into an i32 store is useful almost always. // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the @@ -175,15 +175,15 @@ bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { // actually reducing the number of stores used. unsigned Bytes = unsigned(End-Start); unsigned NumPointerStores = Bytes/TD.getPointerSize(); - + // Assume the remaining bytes if any are done a byte at a time. unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(); - + // If we will reduce the # stores (according to this heuristic), do the // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 // etc. return TheStores.size() > NumPointerStores+NumByteStores; -} +} namespace { @@ -195,12 +195,12 @@ class MemsetRanges { const TargetData &TD; public: MemsetRanges(const TargetData &td) : TD(td) {} - + typedef std::list::const_iterator const_iterator; const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } - + void addInst(int64_t OffsetFromFirst, Instruction *Inst) { if (StoreInst *SI = dyn_cast(Inst)) addStore(OffsetFromFirst, SI); @@ -210,21 +210,21 @@ public: void addStore(int64_t OffsetFromFirst, StoreInst *SI) { int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); - + addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(), SI->getAlignment(), SI); } - + void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast(MSI->getLength())->getZExtValue(); addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); } - + void addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst); }; - + } // end anon namespace @@ -240,10 +240,10 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst) { int64_t End = Start+Size; range_iterator I = Ranges.begin(), E = Ranges.end(); - + while (I != E && Start > I->End) ++I; - + // We now know that I == E, in which case we didn't find anything to merge // with, or that Start <= I->End. If End < I->Start or I == E, then we need // to insert a new range. Handle this now. @@ -256,18 +256,18 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, R.TheStores.push_back(Inst); return; } - + // This store overlaps with I, add it. I->TheStores.push_back(Inst); - + // At this point, we may have an interval that completely contains our store. // If so, just add it to the interval and return. if (I->Start <= Start && I->End >= End) return; - + // Now we know that Start <= I->End and End >= I->Start so the range overlaps // but is not entirely contained within the range. - + // See if the range extends the start of the range. In this case, it couldn't // possibly cause it to join the prior range, because otherwise we would have // stopped on *it*. @@ -276,7 +276,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, I->StartPtr = Ptr; I->Alignment = Alignment; } - + // Now we know that Start <= I->End and Start >= I->Start (so the startpoint // is in or right at the end of I), and that End >= I->Start. Extend I out to // End. @@ -325,7 +325,7 @@ namespace { AU.addPreserved(); AU.addPreserved(); } - + // Helper fuctions bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); @@ -341,7 +341,7 @@ namespace { bool iterateOnFunction(Function &F); }; - + char MemCpyOpt::ID = 0; } @@ -361,16 +361,16 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consecutive ones, it /// attempts to merge them together into a memcpy/memset. -Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, +Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { if (TD == 0) return 0; - + // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(*TD); - + BasicBlock::iterator BI = StartInst; for (++BI; !isa(BI); ++BI) { if (!isa(BI) && !isa(BI)) { @@ -381,43 +381,43 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, break; continue; } - + if (StoreInst *NextStore = dyn_cast(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; - + // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; - + // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; - + Ranges.addStore(Offset, NextStore); } else { MemSetInst *MSI = cast(BI); - + if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa(MSI->getLength())) break; - + // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) break; - + Ranges.addMemSet(Offset, MSI); } } - + // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return 0; - + // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. @@ -434,28 +434,28 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; - + if (Range.TheStores.size() == 1) continue; - + // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(*TD)) continue; - + // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; - + // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { - Type *EltType = + Type *EltType = cast(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } - - AMemSet = + + AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); - + DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; @@ -473,14 +473,14 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, } ++NumMemSetInfer; } - + return AMemSet; } bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - + if (TD == 0) return false; // Detect cases where we're performing call slot forwarding, but @@ -510,7 +510,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (C) { bool changed = performCallSlotOptzn(LI, - SI->getPointerOperand()->stripPointerCasts(), + SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), TD->getTypeStoreSize(SI->getOperand(0)->getType()), C); if (changed) { @@ -524,10 +524,10 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { } } } - + // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. - + // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. @@ -537,7 +537,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { BBI = I; // Don't invalidate iterator. return true; } - + return false; } @@ -662,7 +662,11 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis(); - if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) + AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); + // If necessary, perform additional analysis. + if (MR != AliasAnalysis::NoModRef) + MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); + if (MR != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. @@ -676,7 +680,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else - CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } @@ -697,14 +701,14 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, /// processMemCpyMemCpyDependence - We've found that the (upward scanning) /// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to /// copy from MDep's input if we can. MSize is the size of M's copy. -/// +/// bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, uint64_t MSize) { // We can only transforms memcpy's where the dest of one is the source of the // other. if (M->getSource() != MDep->getDest() || MDep->isVolatile()) return false; - + // If dep instruction is reading from our current input, then it is a noop // transfer and substituting the input won't change this instruction. Just // ignore the input and let someone else zap MDep. This handles cases like: @@ -712,14 +716,14 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, // memcpy(b <- a) if (M->getSource() == MDep->getSource()) return false; - + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast(MDep->getLength()); ConstantInt *MLen = dyn_cast(M->getLength()); if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; - + AliasAnalysis &AA = getAnalysis(); // Verify that the copied-from memory doesn't change in between the two @@ -739,23 +743,23 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, false, M, M->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; - + // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) UseMemMove = true; - + // If all checks passed, then we can transform M. - + // Make sure to use the lesser of the alignment of the source and the dest // since we're changing where we're reading from, but don't want to increase // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - + IRBuilder<> Builder(M); if (UseMemMove) Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), @@ -835,13 +839,13 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { if (!TLI->has(LibFunc::memmove)) return false; - + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; - + DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); - + // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); Type *ArgTys[3] = { M->getRawDest()->getType(), @@ -857,7 +861,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { ++NumMoveToCpy; return true; } - + /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (TD == 0) return false; @@ -880,7 +884,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (MDep == 0 || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; - + // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast(MDep->getLength()); if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) @@ -890,13 +894,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); if (ByValAlign == 0) return false; - + // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. if (MDep->getAlignment() < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) return false; - + // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. // memcpy(a <- b) @@ -911,16 +915,16 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { false, CS.getInstruction(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; - + Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), "tmpcast", CS.getInstruction()); - + DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" << " " << *MDep << "\n" << " " << *CS.getInstruction() << "\n"); - + // Otherwise we're good! Update the byval argument. CS.setArgument(ArgNo, TmpCast); ++NumMemCpyInstr; @@ -936,9 +940,9 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { // Avoid invalidating the iterator. Instruction *I = BI++; - + bool RepeatInstruction = false; - + if (StoreInst *SI = dyn_cast(I)) MadeChange |= processStore(SI, BI); else if (MemSetInst *M = dyn_cast(I)) @@ -960,7 +964,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { } } } - + return MadeChange; } @@ -972,19 +976,19 @@ bool MemCpyOpt::runOnFunction(Function &F) { MD = &getAnalysis(); TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); - + // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) return false; - + while (1) { if (!iterateOnFunction(F)) break; MadeChange = true; } - + MD = 0; return MadeChange; } diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 7e3e69b..3222f20 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -20,7 +20,7 @@ // This file also defines a simple ARC-aware AliasAnalysis. // // WARNING: This file knows about certain library functions. It recognizes them -// by name, and hardwires knowedge of their semantics. +// by name, and hardwires knowledge of their semantics. // // WARNING: This file knows about how certain Objective-C library functions are // used. Naive LLVM IR transformations which would otherwise be @@ -29,18 +29,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "objc-arc" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; // A handy option to enable/disable all optimizations in this file. @@ -141,6 +131,13 @@ namespace { // ARC Utilities. //===----------------------------------------------------------------------===// +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/CallSite.h" +#include "llvm/ADT/StringSwitch.h" + namespace { /// InstructionClass - A simple classification for instructions. enum InstructionClass { @@ -299,22 +296,23 @@ static InstructionClass GetInstructionClass(const Value *V) { // None of the intrinsic functions do objc_release. For intrinsics, the // only question is whether or not they may be users. switch (F->getIntrinsicID()) { - case 0: break; - case Intrinsic::bswap: case Intrinsic::ctpop: - case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::returnaddress: case Intrinsic::frameaddress: case Intrinsic::stacksave: case Intrinsic::stackrestore: case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend: + case Intrinsic::objectsize: case Intrinsic::prefetch: + case Intrinsic::stackprotector: + case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: + case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa: + case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline: + case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: case Intrinsic::invariant_end: // Don't let dbg info affect our results. case Intrinsic::dbg_declare: case Intrinsic::dbg_value: // Short cut: Some intrinsics obviously don't use ObjC pointers. return IC_None; default: - for (Function::const_arg_iterator AI = F->arg_begin(), - AE = F->arg_end(); AI != AE; ++AI) - if (IsPotentialUse(AI)) - return IC_User; - return IC_None; + break; } } return GetCallSiteClass(CI); @@ -382,14 +380,14 @@ static InstructionClass GetBasicInstructionClass(const Value *V) { return isa(V) ? IC_CallOrUser : IC_User; } -/// IsRetain - Test if the the given class is objc_retain or +/// IsRetain - Test if the given class is objc_retain or /// equivalent. static bool IsRetain(InstructionClass Class) { return Class == IC_Retain || Class == IC_RetainRV; } -/// IsAutorelease - Test if the the given class is objc_autorelease or +/// IsAutorelease - Test if the given class is objc_autorelease or /// equivalent. static bool IsAutorelease(InstructionClass Class) { return Class == IC_Autorelease || @@ -444,7 +442,7 @@ static bool IsNoThrow(InstructionClass Class) { Class == IC_AutoreleasepoolPop; } -/// EraseInstruction - Erase the given instruction. ObjC calls return their +/// EraseInstruction - Erase the given instruction. Many ObjC calls return their /// argument verbatim, so if it's such a call and the return value has users, /// replace them with the argument value. static void EraseInstruction(Instruction *CI) { @@ -565,9 +563,8 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return Arg; } - // If we found an identifiable object but it has multiple uses, but they - // are trivial uses, we can still consider this to be a single-use - // value. + // If we found an identifiable object but it has multiple uses, but they are + // trivial uses, we can still consider this to be a single-use value. if (IsObjCIdentifiedObject(Arg)) { for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ++UI) { @@ -692,7 +689,7 @@ namespace { /// specified pass info. virtual void *getAdjustedAnalysisPointer(const void *PI) { if (PI == &AliasAnalysis::ID) - return (AliasAnalysis*)this; + return static_cast(this); return this; } @@ -815,7 +812,7 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: // These functions don't access any memory visible to the compiler. - // Note that this doesn't include objc_retainBlock, becuase it updates + // Note that this doesn't include objc_retainBlock, because it updates // pointers when it copies block data. return NoModRef; default: @@ -915,6 +912,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) { //===----------------------------------------------------------------------===// #include "llvm/Constants.h" +#include "llvm/ADT/STLExtras.h" namespace { /// ObjCARCAPElim - Autorelease pool elimination. @@ -922,8 +920,8 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual bool runOnModule(Module &M); - bool MayAutorelease(CallSite CS, unsigned Depth = 0); - bool OptimizeBB(BasicBlock *BB); + static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0); + static bool OptimizeBB(BasicBlock *BB); public: static char ID; @@ -949,15 +947,16 @@ void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const { /// MayAutorelease - Interprocedurally determine if calls made by the /// given call site can possibly produce autoreleases. -bool ObjCARCAPElim::MayAutorelease(CallSite CS, unsigned Depth) { - if (Function *Callee = CS.getCalledFunction()) { +bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { + if (const Function *Callee = CS.getCalledFunction()) { if (Callee->isDeclaration() || Callee->mayBeOverridden()) return true; - for (Function::iterator I = Callee->begin(), E = Callee->end(); + for (Function::const_iterator I = Callee->begin(), E = Callee->end(); I != E; ++I) { - BasicBlock *BB = I; - for (BasicBlock::iterator J = BB->begin(), F = BB->end(); J != F; ++J) - if (CallSite JCS = CallSite(J)) + const BasicBlock *BB = I; + for (BasicBlock::const_iterator J = BB->begin(), F = BB->end(); + J != F; ++J) + if (ImmutableCallSite JCS = ImmutableCallSite(J)) // This recursion depth limit is arbitrary. It's just great // enough to cover known interesting testcases. if (Depth < 3 && @@ -992,7 +991,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { Push = 0; break; case IC_CallOrUser: - if (MayAutorelease(CallSite(Inst))) + if (MayAutorelease(ImmutableCallSite(Inst))) Push = 0; break; default: @@ -1033,7 +1032,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) { Value *Op = *OI; // llvm.global_ctors is an array of pairs where the second members // are constructor functions. - Function *F = cast(cast(Op)->getOperand(1)); + Function *F = dyn_cast(cast(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; // Only look at function definitions. if (F->isDeclaration()) continue; @@ -1089,14 +1092,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) { // TODO: Delete release+retain pairs (rare). -#include "llvm/GlobalAlias.h" -#include "llvm/Constants.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/DenseSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1144,22 +1143,13 @@ bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) { // If the values are Selects with the same condition, we can do a more precise // check: just check for relations between the values on corresponding arms. if (const SelectInst *SB = dyn_cast(B)) - if (A->getCondition() == SB->getCondition()) { - if (related(A->getTrueValue(), SB->getTrueValue())) - return true; - if (related(A->getFalseValue(), SB->getFalseValue())) - return true; - return false; - } + if (A->getCondition() == SB->getCondition()) + return related(A->getTrueValue(), SB->getTrueValue()) || + related(A->getFalseValue(), SB->getFalseValue()); // Check both arms of the Select node individually. - if (related(A->getTrueValue(), B)) - return true; - if (related(A->getFalseValue(), B)) - return true; - - // The arms both checked out. - return false; + return related(A->getTrueValue(), B) || + related(A->getFalseValue(), B); } bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) { @@ -1357,12 +1347,6 @@ namespace { /// with the "tail" keyword. bool IsTailCallRelease; - /// Partial - True of we've seen an opportunity for partial RR elimination, - /// such as pushing calls into a CFG triangle or into one side of a - /// CFG diamond. - /// TODO: Consider moving this to PtrState. - bool Partial; - /// ReleaseMetadata - If the Calls are objc_release calls and they all have /// a clang.imprecise_release tag, this is the metadata tag. MDNode *ReleaseMetadata; @@ -1377,7 +1361,7 @@ namespace { RRInfo() : KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), Partial(false), + IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); @@ -1388,7 +1372,6 @@ void RRInfo::clear() { KnownSafe = false; IsRetainBlock = false; IsTailCallRelease = false; - Partial = false; ReleaseMetadata = 0; Calls.clear(); ReverseInsertPts.clear(); @@ -1398,36 +1381,39 @@ namespace { /// PtrState - This class summarizes several per-pointer runtime properties /// which are propogated through the flow graph. class PtrState { - /// RefCount - The known minimum number of reference count increments. - unsigned RefCount; - /// NestCount - The known minimum level of retain+release nesting. unsigned NestCount; + /// KnownPositiveRefCount - True if the reference count is known to + /// be incremented. + bool KnownPositiveRefCount; + + /// Partial - True of we've seen an opportunity for partial RR elimination, + /// such as pushing calls into a CFG triangle or into one side of a + /// CFG diamond. + bool Partial; + /// Seq - The current position in the sequence. - Sequence Seq; + Sequence Seq : 8; public: /// RRI - Unidirectional information about the current sequence. /// TODO: Encapsulate this better. RRInfo RRI; - PtrState() : RefCount(0), NestCount(0), Seq(S_None) {} - - void SetAtLeastOneRefCount() { - if (RefCount == 0) RefCount = 1; - } + PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false), + Seq(S_None) {} - void IncrementRefCount() { - if (RefCount != UINT_MAX) ++RefCount; + void SetKnownPositiveRefCount() { + KnownPositiveRefCount = true; } - void DecrementRefCount() { - if (RefCount != 0) --RefCount; + void ClearRefCount() { + KnownPositiveRefCount = false; } bool IsKnownIncremented() const { - return RefCount > 0; + return KnownPositiveRefCount; } void IncrementNestCount() { @@ -1451,7 +1437,12 @@ namespace { } void ClearSequenceProgress() { - Seq = S_None; + ResetSequenceProgress(S_None); + } + + void ResetSequenceProgress(Sequence NewSeq) { + Seq = NewSeq; + Partial = false; RRI.clear(); } @@ -1462,7 +1453,7 @@ namespace { void PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); - RefCount = std::min(RefCount, Other.RefCount); + KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; NestCount = std::min(NestCount, Other.NestCount); // We can't merge a plain objc_retain with an objc_retainBlock. @@ -1471,31 +1462,31 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { // If we're not in a sequence (anymore), drop all associated state. if (Seq == S_None) { + Partial = false; RRI.clear(); - } else if (RRI.Partial || Other.RRI.Partial) { + } else if (Partial || Other.Partial) { // If we're doing a merge on a path that's previously seen a partial // merge, conservatively drop the sequence, to avoid doing partial // RR elimination. If the branch predicates for the two merge differ, // mixing them is unsafe. - Seq = S_None; - RRI.clear(); + ClearSequenceProgress(); } else { // Conservatively merge the ReleaseMetadata information. if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata) RRI.ReleaseMetadata = 0; RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe; - RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease; + RRI.IsTailCallRelease = RRI.IsTailCallRelease && + Other.RRI.IsTailCallRelease; RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end()); // Merge the insert point sets. If there are any differences, // that makes this a partial merge. - RRI.Partial = RRI.ReverseInsertPts.size() != - Other.RRI.ReverseInsertPts.size(); + Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size(); for (SmallPtrSet::const_iterator I = Other.RRI.ReverseInsertPts.begin(), E = Other.RRI.ReverseInsertPts.end(); I != E; ++I) - RRI.Partial |= RRI.ReverseInsertPts.insert(*I); + Partial |= RRI.ReverseInsertPts.insert(*I); } } @@ -1521,6 +1512,11 @@ namespace { /// known about a pointer at the top of each block. MapTy PerPtrBottomUp; + /// Preds, Succs - Effective successors and predecessors of the current + /// block (this ignores ignorable edges and ignored backedges). + SmallVector Preds; + SmallVector Succs; + public: BBState() : TopDownPathCount(0), BottomUpPathCount(0) {} @@ -1578,14 +1574,22 @@ namespace { /// entry to an exit which pass through this block. This is only valid /// after both the top-down and bottom-up traversals are complete. unsigned GetAllPathCount() const { + assert(TopDownPathCount != 0); + assert(BottomUpPathCount != 0); return TopDownPathCount * BottomUpPathCount; } - /// IsVisitedTopDown - Test whether the block for this BBState has been - /// visited by the top-down portion of the algorithm. - bool isVisitedTopDown() const { - return TopDownPathCount != 0; - } + // Specialized CFG utilities. + typedef SmallVectorImpl::const_iterator edge_iterator; + edge_iterator pred_begin() { return Preds.begin(); } + edge_iterator pred_end() { return Preds.end(); } + edge_iterator succ_begin() { return Succs.begin(); } + edge_iterator succ_end() { return Succs.end(); } + + void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); } + void addPred(BasicBlock *Pred) { Preds.push_back(Pred); } + + bool isExit() const { return Succs.empty(); } }; } @@ -1783,12 +1787,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { if (!RetainRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainRVCallee = M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy, Attributes); @@ -1800,12 +1801,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { if (!AutoreleaseRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); AutoreleaseRVCallee = M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy, Attributes); @@ -1816,10 +1814,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); ReleaseCallee = M->getOrInsertFunction( "objc_release", @@ -1832,10 +1828,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainCallee = M->getOrInsertFunction( "objc_retain", @@ -1848,16 +1842,14 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { if (!RetainBlockCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; // objc_retainBlock is not nounwind because it calls user copy constructors // which could theoretically throw. RetainBlockCallee = M->getOrInsertFunction( "objc_retainBlock", FunctionType::get(Params[0], Params, /*isVarArg=*/false), - Attributes); + AttrListPtr()); } return RetainBlockCallee; } @@ -1865,10 +1857,8 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) { Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); - std::vector Params; - Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) }; + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); AutoreleaseCallee = M->getOrInsertFunction( "objc_autorelease", @@ -2153,13 +2143,13 @@ static bool isNoopInstruction(const Instruction *I) { /// objc_retainAutoreleasedReturnValue if the operand is a return value. void ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { - CallSite CS(GetObjCArg(Retain)); - Instruction *Call = CS.getInstruction(); + ImmutableCallSite CS(GetObjCArg(Retain)); + const Instruction *Call = CS.getInstruction(); if (!Call) return; if (Call->getParent() != Retain->getParent()) return; // Check that the call is next to the retain. - BasicBlock::iterator I = Call; + BasicBlock::const_iterator I = Call; ++I; while (isNoopInstruction(I)) ++I; if (&*I != Retain) @@ -2172,25 +2162,24 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { } /// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into -/// objc_retain if the operand is not a return value. Or, if it can be -/// paired with an objc_autoreleaseReturnValue, delete the pair and -/// return true. +/// objc_retain if the operand is not a return value. Or, if it can be paired +/// with an objc_autoreleaseReturnValue, delete the pair and return true. bool ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // Check for the argument being from an immediately preceding call or invoke. - Value *Arg = GetObjCArg(RetainRV); - CallSite CS(Arg); - if (Instruction *Call = CS.getInstruction()) { + const Value *Arg = GetObjCArg(RetainRV); + ImmutableCallSite CS(Arg); + if (const Instruction *Call = CS.getInstruction()) { if (Call->getParent() == RetainRV->getParent()) { - BasicBlock::iterator I = Call; + BasicBlock::const_iterator I = Call; ++I; while (isNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; - } else if (InvokeInst *II = dyn_cast(Call)) { + } else if (const InvokeInst *II = dyn_cast(Call)) { BasicBlock *RetainRVParent = RetainRV->getParent(); if (II->getNormalDest() == RetainRVParent) { - BasicBlock::iterator I = RetainRVParent->begin(); + BasicBlock::const_iterator I = RetainRVParent->begin(); while (isNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; @@ -2418,7 +2407,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // These can always be moved up. break; case IC_Release: - // These can't be moved across things that care about the retain count. + // These can't be moved across things that care about the retain + // count. FindDependencies(NeedsPositiveRetainCount, Arg, Inst->getParent(), Inst, DependingInstructions, Visited, PA); @@ -2500,13 +2490,14 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has visited this successor, take what we know about it. - DenseMap::iterator BBI = BBStates.find(*SI); - if (BBI != BBStates.end()) { - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - } + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; switch (SuccSSeq) { case S_None: case S_CanRelease: { @@ -2553,13 +2544,14 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, for (; SI != SE; ++SI) { Sequence SuccSSeq = S_None; bool SuccSRRIKnownSafe = false; - // If VisitBottomUp has visited this successor, take what we know about it. - DenseMap::iterator BBI = BBStates.find(*SI); - if (BBI != BBStates.end()) { - const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); - SuccSSeq = SuccS.GetSeq(); - SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; - } + // If VisitBottomUp has pointer information for this successor, take + // what we know about it. + DenseMap::iterator BBI = + BBStates.find(*SI); + assert(BBI != BBStates.end()); + const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg); + SuccSSeq = SuccS.GetSeq(); + SuccSRRIKnownSafe = SuccS.RRI.KnownSafe; switch (SuccSSeq) { case S_None: { if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) { @@ -2617,16 +2609,13 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) NestingDetected = true; - S.RRI.clear(); - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); + S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.IncrementRefCount(); S.IncrementNestCount(); break; } @@ -2641,8 +2630,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrBottomUpState(Arg); - S.DecrementRefCount(); - S.SetAtLeastOneRefCount(); + S.SetKnownPositiveRefCount(); S.DecrementNestCount(); switch (S.GetSeq()) { @@ -2692,7 +2680,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); + S.ClearRefCount(); switch (Seq) { case S_Use: S.SetSeq(S_CanRelease); @@ -2759,37 +2747,20 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Merge the states from each successor to compute the initial state // for the current block. - const TerminatorInst *TI = cast(&BB->back()); - succ_const_iterator SI(TI), SE(TI, false); - if (SI == SE) - MyStates.SetAsExit(); - else { - // If the terminator is an invoke marked with the - // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be - // ignored, for ARC purposes. - if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) - --SE; - - do { - const BasicBlock *Succ = *SI++; - if (Succ == BB) - continue; - DenseMap::iterator I = BBStates.find(Succ); - // If we haven't seen this node yet, then we've found a CFG cycle. - // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (I == BBStates.end()) - continue; - MyStates.InitFromSucc(I->second); - while (SI != SE) { - Succ = *SI++; - if (Succ != BB) { - I = BBStates.find(Succ); - if (I != BBStates.end()) - MyStates.MergeSucc(I->second); - } - } - break; - } while (SI != SE); + for (BBState::edge_iterator SI(MyStates.succ_begin()), + SE(MyStates.succ_end()); SI != SE; ++SI) { + const BasicBlock *Succ = *SI; + DenseMap::iterator I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.InitFromSucc(I->second); + ++SI; + for (; SI != SE; ++SI) { + Succ = *SI; + I = BBStates.find(Succ); + assert(I != BBStates.end()); + MyStates.MergeSucc(I->second); + } + break; } // Visit all the instructions, bottom-up. @@ -2803,15 +2774,14 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); } - // If there's a predecessor with an invoke, visit the invoke as - // if it were part of this block, since we can't insert code after - // an invoke in its own block, and we don't want to split critical - // edges. - for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) { + // If there's a predecessor with an invoke, visit the invoke as if it were + // part of this block, since we can't insert code after an invoke in its own + // block, and we don't want to split critical edges. + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { BasicBlock *Pred = *PI; - TerminatorInst *PredTI = cast(&Pred->back()); - if (isa(PredTI)) - NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates); + if (InvokeInst *II = dyn_cast(&Pred->back())) + NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } return NestingDetected; @@ -2851,25 +2821,23 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, if (S.GetSeq() == S_Retain) NestingDetected = true; - S.SetSeq(S_Retain); - S.RRI.clear(); + S.ResetSequenceProgress(S_Retain); S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not - // sufficient. + // Don't check S.IsKnownIncremented() here because it's not sufficient. S.RRI.KnownSafe = S.IsKnownNested(); S.RRI.Calls.insert(Inst); } - S.SetAtLeastOneRefCount(); - S.IncrementRefCount(); S.IncrementNestCount(); - return NestingDetected; + + // A retain can be a potential use; procede to the generic checking + // code below. + break; } case IC_Release: { Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementRefCount(); S.DecrementNestCount(); switch (S.GetSeq()) { @@ -2916,7 +2884,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); + S.ClearRefCount(); switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); @@ -2967,41 +2935,21 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Merge the states from each predecessor to compute the initial state // for the current block. - const_pred_iterator PI(BB), PE(BB, false); - if (PI == PE) - MyStates.SetAsEntry(); - else - do { - unsigned OperandNo = PI.getOperandNo(); - const Use &Us = PI.getUse(); - ++PI; - - // Skip invoke unwind edges on invoke instructions marked with - // clang.arc.no_objc_arc_exceptions. - if (const InvokeInst *II = dyn_cast(Us.getUser())) - if (OperandNo == II->getNumArgOperands() + 2 && - II->getMetadata(NoObjCARCExceptionsMDKind)) - continue; - - const BasicBlock *Pred = cast(Us.getUser())->getParent(); - if (Pred == BB) - continue; - DenseMap::iterator I = BBStates.find(Pred); - // If we haven't seen this node yet, then we've found a CFG cycle. - // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (I == BBStates.end() || !I->second.isVisitedTopDown()) - continue; - MyStates.InitFromPred(I->second); - while (PI != PE) { - Pred = *PI++; - if (Pred != BB) { - I = BBStates.find(Pred); - if (I != BBStates.end() && I->second.isVisitedTopDown()) - MyStates.MergePred(I->second); - } - } - break; - } while (PI != PE); + for (BBState::edge_iterator PI(MyStates.pred_begin()), + PE(MyStates.pred_end()); PI != PE; ++PI) { + const BasicBlock *Pred = *PI; + DenseMap::iterator I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.InitFromPred(I->second); + ++PI; + for (; PI != PE; ++PI) { + Pred = *PI; + I = BBStates.find(Pred); + assert(I != BBStates.end()); + MyStates.MergePred(I->second); + } + break; + } // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -3016,73 +2964,82 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, static void ComputePostOrders(Function &F, SmallVectorImpl &PostOrder, - SmallVectorImpl &ReverseCFGPostOrder) { - /// Backedges - Backedges detected in the DFS. These edges will be - /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be - /// traversed in the desired order. - DenseSet > Backedges; - + SmallVectorImpl &ReverseCFGPostOrder, + unsigned NoObjCARCExceptionsMDKind, + DenseMap &BBStates) { /// Visited - The visited set, for doing DFS walks. SmallPtrSet Visited; // Do DFS, computing the PostOrder. SmallPtrSet OnStack; SmallVector, 16> SuccStack; + + // Functions always have exactly one entry block, and we don't have + // any other block that we treat like an entry block. BasicBlock *EntryBB = &F.getEntryBlock(); - SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB))); + BBState &MyStates = BBStates[EntryBB]; + MyStates.SetAsEntry(); + TerminatorInst *EntryTI = cast(&EntryBB->back()); + SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI))); Visited.insert(EntryBB); OnStack.insert(EntryBB); do { dfs_next_succ: - TerminatorInst *TI = cast(&SuccStack.back().first->back()); - succ_iterator End = succ_iterator(TI, true); - while (SuccStack.back().second != End) { - BasicBlock *BB = *SuccStack.back().second++; - if (Visited.insert(BB)) { - SuccStack.push_back(std::make_pair(BB, succ_begin(BB))); - OnStack.insert(BB); + BasicBlock *CurrBB = SuccStack.back().first; + TerminatorInst *TI = cast(&CurrBB->back()); + succ_iterator SE(TI, false); + + // If the terminator is an invoke marked with the + // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be + // ignored, for ARC purposes. + if (isa(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind)) + --SE; + + while (SuccStack.back().second != SE) { + BasicBlock *SuccBB = *SuccStack.back().second++; + if (Visited.insert(SuccBB)) { + TerminatorInst *TI = cast(&SuccBB->back()); + SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI))); + BBStates[CurrBB].addSucc(SuccBB); + BBState &SuccStates = BBStates[SuccBB]; + SuccStates.addPred(CurrBB); + OnStack.insert(SuccBB); goto dfs_next_succ; } - if (OnStack.count(BB)) - Backedges.insert(std::make_pair(SuccStack.back().first, BB)); + + if (!OnStack.count(SuccBB)) { + BBStates[CurrBB].addSucc(SuccBB); + BBStates[SuccBB].addPred(CurrBB); + } } - OnStack.erase(SuccStack.back().first); - PostOrder.push_back(SuccStack.pop_back_val().first); + OnStack.erase(CurrBB); + PostOrder.push_back(CurrBB); + SuccStack.pop_back(); } while (!SuccStack.empty()); Visited.clear(); - // Compute the exits, which are the starting points for reverse-CFG DFS. - // This includes blocks where all the successors are backedges that - // we're skipping. - SmallVector Exits; + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + // Functions may have many exits, and there also blocks which we treat + // as exits due to ignored edges. + SmallVector, 16> PredStack; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - TerminatorInst *TI = cast(&BB->back()); - for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI) - if (!Backedges.count(std::make_pair(BB, *SI))) - goto HasNonBackedgeSucc; - Exits.push_back(BB); - HasNonBackedgeSucc:; - } + BasicBlock *ExitBB = I; + BBState &MyStates = BBStates[ExitBB]; + if (!MyStates.isExit()) + continue; - // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. - SmallVector, 16> PredStack; - for (SmallVectorImpl::iterator I = Exits.begin(), E = Exits.end(); - I != E; ++I) { - BasicBlock *ExitBB = *I; - PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB))); + MyStates.SetAsExit(); + + PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin())); Visited.insert(ExitBB); while (!PredStack.empty()) { reverse_dfs_next_succ: - pred_iterator End = pred_end(PredStack.back().first); - while (PredStack.back().second != End) { + BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end(); + while (PredStack.back().second != PE) { BasicBlock *BB = *PredStack.back().second++; - // Skip backedges detected in the forward-CFG DFS. - if (Backedges.count(std::make_pair(BB, PredStack.back().first))) - continue; if (Visited.insert(BB)) { - PredStack.push_back(std::make_pair(BB, pred_begin(BB))); + PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin())); goto reverse_dfs_next_succ; } } @@ -3105,7 +3062,9 @@ ObjCARCOpt::Visit(Function &F, // function exit point, and we want to ignore selected cycle edges. SmallVector PostOrder; SmallVector ReverseCFGPostOrder; - ComputePostOrders(F, PostOrder, ReverseCFGPostOrder); + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder, + NoObjCARCExceptionsMDKind, + BBStates); // Use reverse-postorder on the reverse CFG for bottom-up. bool BottomUpNestingDetected = false; @@ -3214,7 +3173,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap // not being managed by ObjC reference counting, so we can delete pairs // regardless of what possible decrements or uses lie between them. bool KnownSafe = isa(Arg) || isa(Arg); - + // A constant pointer can't be pointing to an object on the heap. It may // be reference-counted, but it won't be deleted. if (const LoadInst *LI = dyn_cast(Arg)) @@ -3375,6 +3334,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap // Ok, everything checks out and we're all set. Let's move some code! Changed = true; + assert(OldCount != 0 && "Unreachable code?"); AnyPairsCompletelyEliminated = NewCount == 0; NumRRs += OldCount - NewCount; MoveCalls(Arg, RetainsToMove, ReleasesToMove, @@ -3515,7 +3475,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { if (AllocaInst *Alloca = dyn_cast(Arg)) { for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ++UI) { - Instruction *UserInst = cast(*UI); + const Instruction *UserInst = cast(*UI); switch (GetBasicInstructionClass(UserInst)) { case IC_InitWeak: case IC_StoreWeak: @@ -3529,8 +3489,18 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { CallInst *UserInst = cast(*UI++); - if (!UserInst->use_empty()) - UserInst->replaceAllUsesWith(UserInst->getArgOperand(0)); + switch (GetBasicInstructionClass(UserInst)) { + case IC_InitWeak: + case IC_StoreWeak: + // These functions return their second argument. + UserInst->replaceAllUsesWith(UserInst->getArgOperand(1)); + break; + case IC_DestroyWeak: + // No return value. + break; + default: + llvm_unreachable("alloca really is used!"); + } UserInst->eraseFromParent(); } Alloca->eraseFromParent(); @@ -3598,8 +3568,7 @@ void ObjCARCOpt::OptimizeReturns(Function &F) { dyn_cast_or_null(*DependingInstructions.begin()); if (!Autorelease) goto next_block; - InstructionClass AutoreleaseClass = - GetBasicInstructionClass(Autorelease); + InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); if (!IsAutorelease(AutoreleaseClass)) goto next_block; if (GetObjCArg(Autorelease) != Arg) @@ -3690,7 +3659,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release - // calls finalizers. + // calls finalizers which can have arbitrary side effects. // These are initialized lazily. RetainRVCallee = 0; @@ -3742,8 +3711,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) { while (OptimizeSequences(F)) {} // Optimizations if objc_autorelease is used. - if (UsedInThisFunction & - ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV))) + if (UsedInThisFunction & ((1 << IC_Autorelease) | + (1 << IC_AutoreleaseRV))) OptimizeReturns(F); return Changed; @@ -3791,7 +3760,7 @@ namespace { /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If /// at the end of walking the function we have found no alloca /// instructions, these calls can be marked "tail". - DenseSet StoreStrongCalls; + SmallPtrSet StoreStrongCalls; Constant *getStoreStrongCallee(Module *M); Constant *getRetainAutoreleaseCallee(Module *M); @@ -3842,13 +3811,11 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); Type *I8XX = PointerType::getUnqual(I8X); - std::vector Params; - Params.push_back(I8XX); - Params.push_back(I8X); + Type *Params[] = { I8XX, I8X }; - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); - Attributes.addAttr(1, Attribute::NoCapture); + AttrListPtr Attributes = AttrListPtr() + .addAttr(~0u, Attribute::NoUnwind) + .addAttr(1, Attribute::NoCapture); StoreStrongCallee = M->getOrInsertFunction( @@ -3863,12 +3830,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { if (!RetainAutoreleaseCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainAutoreleaseCallee = M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes); } @@ -3879,12 +3843,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { if (!RetainAutoreleaseRVCallee) { LLVMContext &C = M->getContext(); Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector Params; - Params.push_back(I8X); - FunctionType *FTy = - FunctionType::get(I8X, Params, /*isVarArg=*/false); - AttrListPtr Attributes; - Attributes.addAttr(~0u, Attribute::NoUnwind); + Type *Params[] = { I8X }; + FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); + AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attribute::NoUnwind); RetainAutoreleaseRVCallee = M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy, Attributes); @@ -3892,8 +3853,7 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { return RetainAutoreleaseRVCallee; } -/// ContractAutorelease - Merge an autorelease with a retain into a fused -/// call. +/// ContractAutorelease - Merge an autorelease with a retain into a fused call. bool ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, InstructionClass Class, @@ -3954,18 +3914,41 @@ void ObjCARCContract::ContractRelease(Instruction *Release, BasicBlock *BB = Release->getParent(); if (Load->getParent() != BB) return; - // Walk down to find the store. + // Walk down to find the store and the release, which may be in either order. BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); - while (I != End && - (&*I == Release || - IsRetain(GetBasicInstructionClass(I)) || - !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod))) - ++I; - StoreInst *Store = dyn_cast(I); - if (!Store || !Store->isSimple()) return; - if (Store->getPointerOperand() != Loc.Ptr) return; + StoreInst *Store = 0; + bool SawRelease = false; + for (; !Store || !SawRelease; ++I) { + if (I == End) + return; + + Instruction *Inst = I; + if (Inst == Release) { + SawRelease = true; + continue; + } + + InstructionClass Class = GetBasicInstructionClass(Inst); + + // Unrelated retains are harmless. + if (IsRetain(Class)) + continue; + + if (Store) { + // The store is the point where we're going to put the objc_storeStrong, + // so make sure there are no uses after it. + if (CanUse(Inst, Load, PA, Class)) + return; + } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) { + // We are moving the load down to the store, so check for anything + // else which writes to the memory between the load and the store. + Store = dyn_cast(Inst); + if (!Store || !Store->isSimple()) return; + if (Store->getPointerOperand() != Loc.Ptr) return; + } + } Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand()); @@ -4053,7 +4036,8 @@ bool ObjCARCContract::runOnFunction(Function &F) { // It seems that functions which "return twice" are also unsafe for the // "tail" argument, because they are setjmp, which could need to // return to an earlier stack state. - bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice(); + bool TailOkForStoreStrongs = !F.isVarArg() && + !F.callsFunctionThatReturnsTwice(); // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This @@ -4083,8 +4067,22 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; - --BBI; - while (isNoopInstruction(BBI)) --BBI; + BasicBlock *InstParent = Inst->getParent(); + + // Step up to see if the call immediately precedes the RetainRV call. + // If it's an invoke, we have to cross a block boundary. And we have + // to carefully dodge no-op instructions. + do { + if (&*BBI == InstParent->begin()) { + BasicBlock *Pred = InstParent->getSinglePredecessor(); + if (!Pred) + goto decline_rv_optimization; + BBI = Pred->getTerminator(); + break; + } + --BBI; + } while (isNoopInstruction(BBI)); + if (&*BBI == GetObjCArg(Inst)) { Changed = true; InlineAsm *IA = @@ -4094,6 +4092,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } + decline_rv_optimization: break; } case IC_InitWeak: { @@ -4143,25 +4142,21 @@ bool ObjCARCContract::runOnFunction(Function &F) { // trivially dominate itself, which would lead us to rewriting its // argument in terms of its return value, which would lead to // infinite loops in GetObjCArg. - if (DT->isReachableFromEntry(U) && - DT->dominates(Inst, U)) { + if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = - PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = - PHI->getIncomingBlock(ValNo); + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = PHI->getIncomingBlock(ValNo); if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); // While we're here, rewrite all edges for this PHI, rather // than just one use at a time, to minimize the number of // bitcasts we emit. - for (unsigned i = 0, e = PHI->getNumIncomingValues(); - i != e; ++i) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { // Keep the UI iterator valid. if (&PHI->getOperandUse( @@ -4179,8 +4174,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { } } - // If Arg is a no-op casted pointer, strip one level of casts and - // iterate. + // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast(Arg)) Arg = BI->getOperand(0); else if (isa(Arg) && @@ -4197,7 +4191,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { // If this function has no escaping allocas or suspicious vararg usage, // objc_storeStrong calls can be marked with the "tail" keyword. if (TailOkForStoreStrongs) - for (DenseSet::iterator I = StoreStrongCalls.begin(), + for (SmallPtrSet::iterator I = StoreStrongCalls.begin(), E = StoreStrongCalls.end(); I != E; ++I) (*I)->setTailCall(); StoreStrongCalls.clear(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 5de00d1..09687d8 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -26,21 +26,23 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DenseMap.h" #include using namespace llvm; -STATISTIC(NumLinear , "Number of insts linearized"); STATISTIC(NumChanged, "Number of insts reassociated"); STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); @@ -70,13 +72,51 @@ static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { } } #endif - + +namespace { + /// \brief Utility class representing a base and exponent pair which form one + /// factor of some product. + struct Factor { + Value *Base; + unsigned Power; + + Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} + + /// \brief Sort factors by their Base. + struct BaseSorter { + bool operator()(const Factor &LHS, const Factor &RHS) { + return LHS.Base < RHS.Base; + } + }; + + /// \brief Compare factors for equal bases. + struct BaseEqual { + bool operator()(const Factor &LHS, const Factor &RHS) { + return LHS.Base == RHS.Base; + } + }; + + /// \brief Sort factors in descending order by their power. + struct PowerDescendingSorter { + bool operator()(const Factor &LHS, const Factor &RHS) { + return LHS.Power > RHS.Power; + } + }; + + /// \brief Compare factors for equal powers. + struct PowerEqual { + bool operator()(const Factor &LHS, const Factor &RHS) { + return LHS.Power == RHS.Power; + } + }; + }; +} + namespace { class Reassociate : public FunctionPass { DenseMap RankMap; DenseMap, unsigned> ValueRankMap; - SmallVector RedoInsts; - SmallVector DeadInsts; + SetVector > RedoInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -92,18 +132,19 @@ namespace { private: void BuildRankMap(Function &F); unsigned getRank(Value *V); - Value *ReassociateExpression(BinaryOperator *I); - void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops, - unsigned Idx = 0); + void ReassociateExpression(BinaryOperator *I); + void RewriteExprTree(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops); Value *OptimizeAdd(Instruction *I, SmallVectorImpl &Ops); - void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl &Ops); - void LinearizeExpr(BinaryOperator *I); + bool collectMultiplyFactors(SmallVectorImpl &Ops, + SmallVectorImpl &Factors); + Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, + SmallVectorImpl &Factors); + Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateInst(BasicBlock::iterator &BBI); - - void RemoveDeadBinaryOp(Value *V); + void EraseInst(Instruction *I); + void OptimizeInst(Instruction *I); }; } @@ -114,28 +155,24 @@ INITIALIZE_PASS(Reassociate, "reassociate", // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } -void Reassociate::RemoveDeadBinaryOp(Value *V) { - Instruction *Op = dyn_cast(V); - if (!Op || !isa(Op)) - return; - - Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); - - ValueRankMap.erase(Op); - DeadInsts.push_back(Op); - RemoveDeadBinaryOp(LHS); - RemoveDeadBinaryOp(RHS); +/// isReassociableOp - Return true if V is an instruction of the specified +/// opcode and if it only has one use. +static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { + if (V->hasOneUse() && isa(V) && + cast(V)->getOpcode() == Opcode) + return cast(V); + return 0; } - static bool isUnmovableInstruction(Instruction *I) { if (I->getOpcode() == Instruction::PHI || + I->getOpcode() == Instruction::LandingPad || I->getOpcode() == Instruction::Alloca || I->getOpcode() == Instruction::Load || I->getOpcode() == Instruction::Invoke || (I->getOpcode() == Instruction::Call && !isa(I)) || - I->getOpcode() == Instruction::UDiv || + I->getOpcode() == Instruction::UDiv || I->getOpcode() == Instruction::SDiv || I->getOpcode() == Instruction::FDiv || I->getOpcode() == Instruction::URem || @@ -198,211 +235,572 @@ unsigned Reassociate::getRank(Value *V) { return ValueRankMap[I] = Rank; } -/// isReassociableOp - Return true if V is an instruction of the specified -/// opcode and if it only has one use. -static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { - if ((V->hasOneUse() || V->use_empty()) && isa(V) && - cast(V)->getOpcode() == Opcode) - return cast(V); - return 0; -} - /// LowerNegateToMultiply - Replace 0-X with X*-1. /// -static Instruction *LowerNegateToMultiply(Instruction *Neg, - DenseMap, unsigned> &ValueRankMap) { +static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); - Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); - ValueRankMap.erase(Neg); + BinaryOperator *Res = + BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); + Neg->setOperand(1, Constant::getNullValue(Neg->getType())); // Drop use of op. Res->takeName(Neg); Neg->replaceAllUsesWith(Res); Res->setDebugLoc(Neg->getDebugLoc()); - Neg->eraseFromParent(); return Res; } -// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. -// Note that if D is also part of the expression tree that we recurse to -// linearize it as well. Besides that case, this does not recurse into A,B, or -// C. -void Reassociate::LinearizeExpr(BinaryOperator *I) { - BinaryOperator *LHS = cast(I->getOperand(0)); - BinaryOperator *RHS = cast(I->getOperand(1)); - assert(isReassociableOp(LHS, I->getOpcode()) && - isReassociableOp(RHS, I->getOpcode()) && - "Not an expression that needs linearization?"); - - DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); - - // Move the RHS instruction to live immediately before I, avoiding breaking - // dominator properties. - RHS->moveBefore(I); - - // Move operands around to do the linearization. - I->setOperand(1, RHS->getOperand(0)); - RHS->setOperand(0, LHS); - I->setOperand(0, RHS); - - // Conservatively clear all the optional flags, which may not hold - // after the reassociation. - I->clearSubclassOptionalData(); - LHS->clearSubclassOptionalData(); - RHS->clearSubclassOptionalData(); - - ++NumLinear; - MadeChange = true; - DEBUG(dbgs() << "Linearized: " << *I << '\n'); - - // If D is part of this expression tree, tail recurse. - if (isReassociableOp(I->getOperand(1), I->getOpcode())) - LinearizeExpr(I); +/// CarmichaelShift - Returns k such that lambda(2^Bitwidth) = 2^k, where lambda +/// is the Carmichael function. This means that x^(2^k) === 1 mod 2^Bitwidth for +/// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. +/// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every +/// even x in Bitwidth-bit arithmetic. +static unsigned CarmichaelShift(unsigned Bitwidth) { + if (Bitwidth < 3) + return Bitwidth - 1; + return Bitwidth - 2; +} + +/// IncorporateWeight - Add the extra weight 'RHS' to the existing weight 'LHS', +/// reducing the combined weight using any special properties of the operation. +/// The existing weight LHS represents the computation X op X op ... op X where +/// X occurs LHS times. The combined weight represents X op X op ... op X with +/// X occurring LHS + RHS times. If op is "Xor" for example then the combined +/// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; +/// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. +static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { + // If we were working with infinite precision arithmetic then the combined + // weight would be LHS + RHS. But we are using finite precision arithmetic, + // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct + // for nilpotent operations and addition, but not for idempotent operations + // and multiplication), so it is important to correctly reduce the combined + // weight back into range if wrapping would be wrong. + + // If RHS is zero then the weight didn't change. + if (RHS.isMinValue()) + return; + // If LHS is zero then the combined weight is RHS. + if (LHS.isMinValue()) { + LHS = RHS; + return; + } + // From this point on we know that neither LHS nor RHS is zero. + + if (Instruction::isIdempotent(Opcode)) { + // Idempotent means X op X === X, so any non-zero weight is equivalent to a + // weight of 1. Keeping weights at zero or one also means that wrapping is + // not a problem. + assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); + return; // Return a weight of 1. + } + if (Instruction::isNilpotent(Opcode)) { + // Nilpotent means X op X === 0, so reduce weights modulo 2. + assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); + LHS = 0; // 1 + 1 === 0 modulo 2. + return; + } + if (Opcode == Instruction::Add) { + // TODO: Reduce the weight by exploiting nsw/nuw? + LHS += RHS; + return; + } + + assert(Opcode == Instruction::Mul && "Unknown associative operation!"); + unsigned Bitwidth = LHS.getBitWidth(); + // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth + // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth + // bit number x, since either x is odd in which case x^CM = 1, or x is even in + // which case both x^W and x^(W - CM) are zero. By subtracting off multiples + // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) + // which by a happy accident means that they can always be represented using + // Bitwidth bits. + // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than + // the Carmichael number). + if (Bitwidth > 3) { + /// CM - The value of Carmichael's lambda function. + APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); + // Any weight W >= Threshold can be replaced with W - CM. + APInt Threshold = CM + Bitwidth; + assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); + // For Bitwidth 4 or more the following sum does not overflow. + LHS += RHS; + while (LHS.uge(Threshold)) + LHS -= CM; + } else { + // To avoid problems with overflow do everything the same as above but using + // a larger type. + unsigned CM = 1U << CarmichaelShift(Bitwidth); + unsigned Threshold = CM + Bitwidth; + assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && + "Weights not reduced!"); + unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); + while (Total >= Threshold) + Total -= CM; + LHS = Total; + } } +/// EvaluateRepeatedConstant - Compute C op C op ... op C where the constant C +/// is repeated Weight times. +static Constant *EvaluateRepeatedConstant(unsigned Opcode, Constant *C, + APInt Weight) { + // For addition the result can be efficiently computed as the product of the + // constant and the weight. + if (Opcode == Instruction::Add) + return ConstantExpr::getMul(C, ConstantInt::get(C->getContext(), Weight)); + + // The weight might be huge, so compute by repeated squaring to ensure that + // compile time is proportional to the logarithm of the weight. + Constant *Result = 0; + Constant *Power = C; // Successively C, C op C, (C op C) op (C op C) etc. + // Visit the bits in Weight. + while (Weight != 0) { + // If the current bit in Weight is non-zero do Result = Result op Power. + if (Weight[0]) + Result = Result ? ConstantExpr::get(Opcode, Result, Power) : Power; + // Move on to the next bit if any more are non-zero. + Weight = Weight.lshr(1); + if (Weight.isMinValue()) + break; + // Square the power. + Power = ConstantExpr::get(Opcode, Power, Power); + } + + assert(Result && "Only positive weights supported!"); + return Result; +} -/// LinearizeExprTree - Given an associative binary expression tree, traverse -/// all of the uses putting it into canonical form. This forces a left-linear -/// form of the expression (((a+b)+c)+d), and collects information about the -/// rank of the non-tree operands. +typedef std::pair RepeatedValue; + +/// LinearizeExprTree - Given an associative binary expression, return the leaf +/// nodes in Ops along with their weights (how many times the leaf occurs). The +/// original expression is the same as +/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times +/// op +/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times +/// op +/// ... +/// op +/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times +/// +/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct, and +/// they are all non-constant except possibly for the last one, which if it is +/// constant will have weight one (Ops[N].second === 1). +/// +/// This routine may modify the function, in which case it returns 'true'. The +/// changes it makes may well be destructive, changing the value computed by 'I' +/// to something completely different. Thus if the routine returns 'true' then +/// you MUST either replace I with a new expression computed from the Ops array, +/// or use RewriteExprTree to put the values back in. +/// +/// A leaf node is either not a binary operation of the same kind as the root +/// node 'I' (i.e. is not a binary operator at all, or is, but with a different +/// opcode), or is the same kind of binary operator but has a use which either +/// does not belong to the expression, or does belong to the expression but is +/// a leaf node. Every leaf node has at least one use that is a non-leaf node +/// of the expression, while for non-leaf nodes (except for the root 'I') every +/// use is a non-leaf node of the expression. +/// +/// For example: +/// expression graph node names +/// +/// + | I +/// / \ | +/// + + | A, B +/// / \ / \ | +/// * + * | C, D, E +/// / \ / \ / \ | +/// + * | F, G +/// +/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in +/// that order) (C, 1), (E, 1), (F, 2), (G, 2). /// -/// NOTE: These intentionally destroys the expression tree operands (turning -/// them into undef values) to reduce #uses of the values. This means that the -/// caller MUST use something like RewriteExprTree to put the values back in. +/// The expression is maximal: if some instruction is a binary operator of the +/// same kind as 'I', and all of its uses are non-leaf nodes of the expression, +/// then the instruction also belongs to the expression, is not a leaf node of +/// it, and its operands also belong to the expression (but may be leaf nodes). /// -void Reassociate::LinearizeExprTree(BinaryOperator *I, - SmallVectorImpl &Ops) { - Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); +/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in +/// order to ensure that every non-root node in the expression has *exactly one* +/// use by a non-leaf node of the expression. This destruction means that the +/// caller MUST either replace 'I' with a new expression or use something like +/// RewriteExprTree to put the values back in if the routine indicates that it +/// made a change by returning 'true'. +/// +/// In the above example either the right operand of A or the left operand of B +/// will be replaced by undef. If it is B's operand then this gives: +/// +/// + | I +/// / \ | +/// + + | A, B - operand of B replaced with undef +/// / \ \ | +/// * + * | C, D, E +/// / \ / \ / \ | +/// + * | F, G +/// +/// Note that such undef operands can only be reached by passing through 'I'. +/// For example, if you visit operands recursively starting from a leaf node +/// then you will never see such an undef operand unless you get back to 'I', +/// which requires passing through a phi node. +/// +/// Note that this routine may also mutate binary operators of the wrong type +/// that have all uses inside the expression (i.e. only used by non-leaf nodes +/// of the expression) if it can turn them into binary operators of the right +/// type and thus make the expression bigger. + +static bool LinearizeExprTree(BinaryOperator *I, + SmallVectorImpl &Ops) { + DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); + unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); unsigned Opcode = I->getOpcode(); + assert(Instruction::isAssociative(Opcode) && + Instruction::isCommutative(Opcode) && + "Expected an associative and commutative operation!"); + // If we see an absorbing element then the entire expression must be equal to + // it. For example, if this is a multiplication expression and zero occurs as + // an operand somewhere in it then the result of the expression must be zero. + Constant *Absorber = ConstantExpr::getBinOpAbsorber(Opcode, I->getType()); + + // Visit all operands of the expression, keeping track of their weight (the + // number of paths from the expression root to the operand, or if you like + // the number of times that operand occurs in the linearized expression). + // For example, if I = X + A, where X = A + B, then I, X and B have weight 1 + // while A has weight two. + + // Worklist of non-leaf nodes (their operands are in the expression too) along + // with their weights, representing a certain number of paths to the operator. + // If an operator occurs in the worklist multiple times then we found multiple + // ways to get to it. + SmallVector, 8> Worklist; // (Op, Weight) + Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); + bool MadeChange = false; + + // Leaves of the expression are values that either aren't the right kind of + // operation (eg: a constant, or a multiply in an add tree), or are, but have + // some uses that are not inside the expression. For example, in I = X + X, + // X = A + B, the value X has two uses (by I) that are in the expression. If + // X has any other uses, for example in a return instruction, then we consider + // X to be a leaf, and won't analyze it further. When we first visit a value, + // if it has more than one use then at first we conservatively consider it to + // be a leaf. Later, as the expression is explored, we may discover some more + // uses of the value from inside the expression. If all uses turn out to be + // from within the expression (and the value is a binary operator of the right + // kind) then the value is no longer considered to be a leaf, and its operands + // are explored. + + // Leaves - Keeps track of the set of putative leaves as well as the number of + // paths to each leaf seen so far. + typedef DenseMap LeafMap; + LeafMap Leaves; // Leaf -> Total weight so far. + SmallVector LeafOrder; // Ensure deterministic leaf output order. - // First step, linearize the expression if it is in ((A+B)+(C+D)) form. - BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); - BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); +#ifndef NDEBUG + SmallPtrSet Visited; // For sanity checking the iteration scheme. +#endif + while (!Worklist.empty()) { + std::pair P = Worklist.pop_back_val(); + I = P.first; // We examine the operands of this binary operator. + + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { // Visit operands. + Value *Op = I->getOperand(OpIdx); + APInt Weight = P.second; // Number of paths to this operand. + DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); + assert(!Op->use_empty() && "No uses, so how did we get to it?!"); + + // If the expression contains an absorbing element then there is no need + // to analyze it further: it must evaluate to the absorbing element. + if (Op == Absorber && !Weight.isMinValue()) { + Ops.push_back(std::make_pair(Absorber, APInt(Bitwidth, 1))); + return MadeChange; + } - // If this is a multiply expression tree and it contains internal negations, - // transform them into multiplies by -1 so they can be reassociated. - if (I->getOpcode() == Instruction::Mul) { - if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { - LHS = LowerNegateToMultiply(cast(LHS), ValueRankMap); - LHSBO = isReassociableOp(LHS, Opcode); - } - if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { - RHS = LowerNegateToMultiply(cast(RHS), ValueRankMap); - RHSBO = isReassociableOp(RHS, Opcode); + // If this is a binary operation of the right kind with only one use then + // add its operands to the expression. + if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { + assert(Visited.insert(Op) && "Not first visit!"); + DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n"); + Worklist.push_back(std::make_pair(BO, Weight)); + continue; + } + + // Appears to be a leaf. Is the operand already in the set of leaves? + LeafMap::iterator It = Leaves.find(Op); + if (It == Leaves.end()) { + // Not in the leaf map. Must be the first time we saw this operand. + assert(Visited.insert(Op) && "Not first visit!"); + if (!Op->hasOneUse()) { + // This value has uses not accounted for by the expression, so it is + // not safe to modify. Mark it as being a leaf. + DEBUG(dbgs() << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n"); + LeafOrder.push_back(Op); + Leaves[Op] = Weight; + continue; + } + // No uses outside the expression, try morphing it. + } else if (It != Leaves.end()) { + // Already in the leaf map. + assert(Visited.count(Op) && "In leaf map but not visited!"); + + // Update the number of paths to the leaf. + IncorporateWeight(It->second, Weight, Opcode); + +#if 0 // TODO: Re-enable once PR13021 is fixed. + // The leaf already has one use from inside the expression. As we want + // exactly one such use, drop this new use of the leaf. + assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); + I->setOperand(OpIdx, UndefValue::get(I->getType())); + MadeChange = true; + + // If the leaf is a binary operation of the right kind and we now see + // that its multiple original uses were in fact all by nodes belonging + // to the expression, then no longer consider it to be a leaf and add + // its operands to the expression. + if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { + DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n"); + Worklist.push_back(std::make_pair(BO, It->second)); + Leaves.erase(It); + continue; + } +#endif + + // If we still have uses that are not accounted for by the expression + // then it is not safe to modify the value. + if (!Op->hasOneUse()) + continue; + + // No uses outside the expression, try morphing it. + Weight = It->second; + Leaves.erase(It); // Since the value may be morphed below. + } + + // At this point we have a value which, first of all, is not a binary + // expression of the right kind, and secondly, is only used inside the + // expression. This means that it can safely be modified. See if we + // can usefully morph it into an expression of the right kind. + assert((!isa(Op) || + cast(Op)->getOpcode() != Opcode) && + "Should have been handled above!"); + assert(Op->hasOneUse() && "Has uses outside the expression tree!"); + + // If this is a multiply expression, turn any internal negations into + // multiplies by -1 so they can be reassociated. + BinaryOperator *BO = dyn_cast(Op); + if (Opcode == Instruction::Mul && BO && BinaryOperator::isNeg(BO)) { + DEBUG(dbgs() << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + BO = LowerNegateToMultiply(BO); + DEBUG(dbgs() << *BO << 'n'); + Worklist.push_back(std::make_pair(BO, Weight)); + MadeChange = true; + continue; + } + + // Failed to morph into an expression of the right type. This really is + // a leaf. + DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n"); + assert(!isReassociableOp(Op, Opcode) && "Value was morphed?"); + LeafOrder.push_back(Op); + Leaves[Op] = Weight; } } - if (!LHSBO) { - if (!RHSBO) { - // Neither the LHS or RHS as part of the tree, thus this is a leaf. As - // such, just remember these operands and their rank. - Ops.push_back(ValueEntry(getRank(LHS), LHS)); - Ops.push_back(ValueEntry(getRank(RHS), RHS)); - - // Clear the leaves out. - I->setOperand(0, UndefValue::get(I->getType())); - I->setOperand(1, UndefValue::get(I->getType())); - return; + // The leaves, repeated according to their weights, represent the linearized + // form of the expression. + Constant *Cst = 0; // Accumulate constants here. + for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { + Value *V = LeafOrder[i]; + LeafMap::iterator It = Leaves.find(V); + if (It == Leaves.end()) + // Node initially thought to be a leaf wasn't. + continue; + assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!"); + APInt Weight = It->second; + if (Weight.isMinValue()) + // Leaf already output or weight reduction eliminated it. + continue; + // Ensure the leaf is only output once. + It->second = 0; + // Glob all constants together into Cst. + if (Constant *C = dyn_cast(V)) { + C = EvaluateRepeatedConstant(Opcode, C, Weight); + Cst = Cst ? ConstantExpr::get(Opcode, Cst, C) : C; + continue; } - - // Turn X+(Y+Z) -> (Y+Z)+X - std::swap(LHSBO, RHSBO); - std::swap(LHS, RHS); - bool Success = !I->swapOperands(); - assert(Success && "swapOperands failed"); - (void)Success; - MadeChange = true; - } else if (RHSBO) { - // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not - // part of the expression tree. - LinearizeExpr(I); - LHS = LHSBO = cast(I->getOperand(0)); - RHS = I->getOperand(1); - RHSBO = 0; + // Add non-constant + Ops.push_back(std::make_pair(V, Weight)); } - // Okay, now we know that the LHS is a nested expression and that the RHS is - // not. Perform reassociation. - assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); - - // Move LHS right before I to make sure that the tree expression dominates all - // values. - LHSBO->moveBefore(I); + // Add any constants back into Ops, all globbed together and reduced to having + // weight 1 for the convenience of users. + Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); + if (Cst && Cst != Identity) { + // If combining multiple constants resulted in the absorber then the entire + // expression must evaluate to the absorber. + if (Cst == Absorber) + Ops.clear(); + Ops.push_back(std::make_pair(Cst, APInt(Bitwidth, 1))); + } - // Linearize the expression tree on the LHS. - LinearizeExprTree(LHSBO, Ops); + // For nilpotent operations or addition there may be no operands, for example + // because the expression was "X xor X" or consisted of 2^Bitwidth additions: + // in both cases the weight reduces to 0 causing the value to be skipped. + if (Ops.empty()) { + assert(Identity && "Associative operation without identity!"); + Ops.push_back(std::make_pair(Identity, APInt(Bitwidth, 1))); + } - // Remember the RHS operand and its rank. - Ops.push_back(ValueEntry(getRank(RHS), RHS)); - - // Clear the RHS leaf out. - I->setOperand(1, UndefValue::get(I->getType())); + return MadeChange; } // RewriteExprTree - Now that the operands for this expression tree are -// linearized and optimized, emit them in-order. This function is written to be -// tail recursive. +// linearized and optimized, emit them in-order. void Reassociate::RewriteExprTree(BinaryOperator *I, - SmallVectorImpl &Ops, - unsigned i) { - if (i+2 == Ops.size()) { - if (I->getOperand(0) != Ops[i].Op || - I->getOperand(1) != Ops[i+1].Op) { - Value *OldLHS = I->getOperand(0); - DEBUG(dbgs() << "RA: " << *I << '\n'); - I->setOperand(0, Ops[i].Op); - I->setOperand(1, Ops[i+1].Op); - - // Clear all the optional flags, which may not hold after the - // reassociation if the expression involved more than just this operation. - if (Ops.size() != 2) - I->clearSubclassOptionalData(); - - DEBUG(dbgs() << "TO: " << *I << '\n'); + SmallVectorImpl &Ops) { + assert(Ops.size() > 1 && "Single values should be used directly!"); + + // Since our optimizations never increase the number of operations, the new + // expression can always be written by reusing the existing binary operators + // from the original expression tree, without creating any new instructions, + // though the rewritten expression may have a completely different topology. + // We take care to not change anything if the new expression will be the same + // as the original. If more than trivial changes (like commuting operands) + // were made then we are obliged to clear out any optional subclass data like + // nsw flags. + + /// NodesToRewrite - Nodes from the original expression available for writing + /// the new expression into. + SmallVector NodesToRewrite; + unsigned Opcode = I->getOpcode(); + BinaryOperator *Op = I; + + // ExpressionChanged - Non-null if the rewritten expression differs from the + // original in some non-trivial way, requiring the clearing of optional flags. + // Flags are cleared from the operator in ExpressionChanged up to I inclusive. + BinaryOperator *ExpressionChanged = 0; + for (unsigned i = 0; ; ++i) { + // The last operation (which comes earliest in the IR) is special as both + // operands will come from Ops, rather than just one with the other being + // a subexpression. + if (i+2 == Ops.size()) { + Value *NewLHS = Ops[i].Op; + Value *NewRHS = Ops[i+1].Op; + Value *OldLHS = Op->getOperand(0); + Value *OldRHS = Op->getOperand(1); + + if (NewLHS == OldLHS && NewRHS == OldRHS) + // Nothing changed, leave it alone. + break; + + if (NewLHS == OldRHS && NewRHS == OldLHS) { + // The order of the operands was reversed. Swap them. + DEBUG(dbgs() << "RA: " << *Op << '\n'); + Op->swapOperands(); + DEBUG(dbgs() << "TO: " << *Op << '\n'); + MadeChange = true; + ++NumChanged; + break; + } + + // The new operation differs non-trivially from the original. Overwrite + // the old operands with the new ones. + DEBUG(dbgs() << "RA: " << *Op << '\n'); + if (NewLHS != OldLHS) { + if (BinaryOperator *BO = isReassociableOp(OldLHS, Opcode)) + NodesToRewrite.push_back(BO); + Op->setOperand(0, NewLHS); + } + if (NewRHS != OldRHS) { + if (BinaryOperator *BO = isReassociableOp(OldRHS, Opcode)) + NodesToRewrite.push_back(BO); + Op->setOperand(1, NewRHS); + } + DEBUG(dbgs() << "TO: " << *Op << '\n'); + + ExpressionChanged = Op; + MadeChange = true; + ++NumChanged; + + break; + } + + // Not the last operation. The left-hand side will be a sub-expression + // while the right-hand side will be the current element of Ops. + Value *NewRHS = Ops[i].Op; + if (NewRHS != Op->getOperand(1)) { + DEBUG(dbgs() << "RA: " << *Op << '\n'); + if (NewRHS == Op->getOperand(0)) { + // The new right-hand side was already present as the left operand. If + // we are lucky then swapping the operands will sort out both of them. + Op->swapOperands(); + } else { + // Overwrite with the new right-hand side. + if (BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode)) + NodesToRewrite.push_back(BO); + Op->setOperand(1, NewRHS); + ExpressionChanged = Op; + } + DEBUG(dbgs() << "TO: " << *Op << '\n'); MadeChange = true; ++NumChanged; - - // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) - // delete the extra, now dead, nodes. - RemoveDeadBinaryOp(OldLHS); } - return; - } - assert(i+2 < Ops.size() && "Ops index out of range!"); - if (I->getOperand(1) != Ops[i].Op) { - DEBUG(dbgs() << "RA: " << *I << '\n'); - I->setOperand(1, Ops[i].Op); + // Now deal with the left-hand side. If this is already an operation node + // from the original expression then just rewrite the rest of the expression + // into it. + if (BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode)) { + Op = BO; + continue; + } - // Conservatively clear all the optional flags, which may not hold - // after the reassociation. - I->clearSubclassOptionalData(); + // Otherwise, grab a spare node from the original expression and use that as + // the left-hand side. If there are no nodes left then the optimizers made + // an expression with more nodes than the original! This usually means that + // they did something stupid but it might mean that the problem was just too + // hard (finding the mimimal number of multiplications needed to realize a + // multiplication expression is NP-complete). Whatever the reason, smart or + // stupid, create a new node if there are none left. + BinaryOperator *NewOp; + if (NodesToRewrite.empty()) { + Constant *Undef = UndefValue::get(I->getType()); + NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), + Undef, Undef, "", I); + } else { + NewOp = NodesToRewrite.pop_back_val(); + } - DEBUG(dbgs() << "TO: " << *I << '\n'); + DEBUG(dbgs() << "RA: " << *Op << '\n'); + Op->setOperand(0, NewOp); + DEBUG(dbgs() << "TO: " << *Op << '\n'); + ExpressionChanged = Op; MadeChange = true; ++NumChanged; + Op = NewOp; } - - BinaryOperator *LHS = cast(I->getOperand(0)); - assert(LHS->getOpcode() == I->getOpcode() && - "Improper expression tree!"); - - // Compactify the tree instructions together with each other to guarantee - // that the expression tree is dominated by all of Ops. - LHS->moveBefore(I); - RewriteExprTree(LHS, Ops, i+1); -} - + // If the expression changed non-trivially then clear out all subclass data + // starting from the operator specified in ExpressionChanged, and compactify + // the operators to just before the expression root to guarantee that the + // expression tree is dominated by all of Ops. + if (ExpressionChanged) + do { + ExpressionChanged->clearSubclassOptionalData(); + if (ExpressionChanged == I) + break; + ExpressionChanged->moveBefore(I); + ExpressionChanged = cast(*ExpressionChanged->use_begin()); + } while (1); + + // Throw away any left over nodes from the original expression. + for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i) + RedoInsts.insert(NodesToRewrite[i]); +} -// NegateValue - Insert instructions before the instruction pointed to by BI, -// that computes the negative version of the value specified. The negative -// version of the value is returned, and BI is left pointing at the instruction -// that should be processed next by the reassociation pass. -// +/// NegateValue - Insert instructions before the instruction pointed to by BI, +/// that computes the negative version of the value specified. The negative +/// version of the value is returned, and BI is left pointing at the instruction +/// that should be processed next by the reassociation pass. static Value *NegateValue(Value *V, Instruction *BI) { if (Constant *C = dyn_cast(V)) return ConstantExpr::getNeg(C); - + // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -412,22 +810,21 @@ static Value *NegateValue(Value *V, Instruction *BI) { // the constants. We assume that instcombine will clean up the mess later if // we introduce tons of unnecessary negation instructions. // - if (Instruction *I = dyn_cast(V)) - if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { - // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); - - // We must move the add instruction here, because the neg instructions do - // not dominate the old add instruction in general. By moving it, we are - // assured that the neg instructions we just inserted dominate the - // instruction we are about to insert after them. - // - I->moveBefore(BI); - I->setName(I->getName()+".neg"); - return I; - } - + if (BinaryOperator *I = isReassociableOp(V, Instruction::Add)) { + // Push the negates through the add. + I->setOperand(0, NegateValue(I->getOperand(0), BI)); + I->setOperand(1, NegateValue(I->getOperand(1), BI)); + + // We must move the add instruction here, because the neg instructions do + // not dominate the old add instruction in general. By moving it, we are + // assured that the neg instructions we just inserted dominate the + // instruction we are about to insert after them. + // + I->moveBefore(BI); + I->setName(I->getName()+".neg"); + return I; + } + // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ @@ -443,7 +840,7 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) continue; - + BasicBlock::iterator InsertPt; if (Instruction *InstInput = dyn_cast(V)) { if (InvokeInst *II = dyn_cast(InstInput)) { @@ -471,7 +868,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { // If this is a negation, we can't split it up! if (BinaryOperator::isNeg(Sub)) return false; - + // Don't bother to break this up unless either the LHS is an associable add or // subtract or if this is only used by one. if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || @@ -480,19 +877,18 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || isReassociableOp(Sub->getOperand(1), Instruction::Sub)) return true; - if (Sub->hasOneUse() && + if (Sub->hasOneUse() && (isReassociableOp(Sub->use_back(), Instruction::Add) || isReassociableOp(Sub->use_back(), Instruction::Sub))) return true; - + return false; } /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. -static Instruction *BreakUpSubtract(Instruction *Sub, - DenseMap, unsigned> &ValueRankMap) { +static BinaryOperator *BreakUpSubtract(Instruction *Sub) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // @@ -500,15 +896,15 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // and set it as the RHS of the add instruction we just made. // Value *NegVal = NegateValue(Sub->getOperand(1), Sub); - Instruction *New = + BinaryOperator *New = BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); + Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. + Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. New->takeName(Sub); // Everyone now refers to the add instruction. - ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); New->setDebugLoc(Sub->getDebugLoc()); - Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); return New; @@ -517,32 +913,23 @@ static Instruction *BreakUpSubtract(Instruction *Sub, /// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used /// by one, change this into a multiply by a constant to assist with further /// reassociation. -static Instruction *ConvertShiftToMul(Instruction *Shl, - DenseMap, unsigned> &ValueRankMap) { - // If an operand of this shift is a reassociable multiply, or if the shift - // is used by a reassociable multiply or add, turn into a multiply. - if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || - (Shl->hasOneUse() && - (isReassociableOp(Shl->use_back(), Instruction::Mul) || - isReassociableOp(Shl->use_back(), Instruction::Add)))) { - Constant *MulCst = ConstantInt::get(Shl->getType(), 1); - MulCst = ConstantExpr::getShl(MulCst, cast(Shl->getOperand(1))); - - Instruction *Mul = - BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); - ValueRankMap.erase(Shl); - Mul->takeName(Shl); - Shl->replaceAllUsesWith(Mul); - Mul->setDebugLoc(Shl->getDebugLoc()); - Shl->eraseFromParent(); - return Mul; - } - return 0; +static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { + Constant *MulCst = ConstantInt::get(Shl->getType(), 1); + MulCst = ConstantExpr::getShl(MulCst, cast(Shl->getOperand(1))); + + BinaryOperator *Mul = + BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); + Shl->setOperand(0, UndefValue::get(Shl->getType())); // Drop use of op. + Mul->takeName(Shl); + Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); + return Mul; } -// Scan backwards and forwards among values with the same rank as element i to -// see if X exists. If X does not exist, return i. This is useful when -// scanning for 'x' when we see '-x' because they both get the same rank. +/// FindInOperandList - Scan backwards and forwards among values with the same +/// rank as element i to see if X exists. If X does not exist, return i. This +/// is useful when scanning for 'x' when we see '-x' because they both get the +/// same rank. static unsigned FindInOperandList(SmallVectorImpl &Ops, unsigned i, Value *X) { unsigned XRank = Ops[i].Rank; @@ -562,22 +949,29 @@ static unsigned FindInOperandList(SmallVectorImpl &Ops, unsigned i, static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl &Ops){ if (Ops.size() == 1) return Ops.back(); - + Value *V1 = Ops.back(); Ops.pop_back(); Value *V2 = EmitAddTreeOfValues(I, Ops); return BinaryOperator::CreateAdd(V2, V1, "tmp", I); } -/// RemoveFactorFromExpression - If V is an expression tree that is a +/// RemoveFactorFromExpression - If V is an expression tree that is a /// multiplication sequence, and if this sequence contains a multiply by Factor, /// remove Factor from the tree and return the new tree. Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); if (!BO) return 0; - + + SmallVector Tree; + MadeChange |= LinearizeExprTree(BO, Tree); SmallVector Factors; - LinearizeExprTree(BO, Factors); + Factors.reserve(Tree.size()); + for (unsigned i = 0, e = Tree.size(); i != e; ++i) { + RepeatedValue E = Tree[i]; + Factors.append(E.second.getZExtValue(), + ValueEntry(getRank(E.first), E.first)); + } bool FoundFactor = false; bool NeedsNegate = false; @@ -587,7 +981,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { Factors.erase(Factors.begin()+i); break; } - + // If this is a negative version of this factor, remove it. if (ConstantInt *FC1 = dyn_cast(Factor)) if (ConstantInt *FC2 = dyn_cast(Factors[i].Op)) @@ -597,29 +991,28 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { break; } } - + if (!FoundFactor) { // Make sure to restore the operands to the expression tree. RewriteExprTree(BO, Factors); return 0; } - + BasicBlock::iterator InsertPt = BO; ++InsertPt; - + // If this was just a single multiply, remove the multiply and return the only // remaining operand. if (Factors.size() == 1) { - ValueRankMap.erase(BO); - DeadInsts.push_back(BO); + RedoInsts.insert(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); V = BO; } - + if (NeedsNegate) V = BinaryOperator::CreateNeg(V, "neg", InsertPt); - + return V; } @@ -629,31 +1022,16 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { /// Ops is the top-level list of add operands we're trying to factor. static void FindSingleUseMultiplyFactors(Value *V, SmallVectorImpl &Factors, - const SmallVectorImpl &Ops, - bool IsRoot) { - BinaryOperator *BO; - if (!(V->hasOneUse() || V->use_empty()) || // More than one use. - !(BO = dyn_cast(V)) || - BO->getOpcode() != Instruction::Mul) { + const SmallVectorImpl &Ops) { + BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); + if (!BO) { Factors.push_back(V); return; } - - // If this value has a single use because it is another input to the add - // tree we're reassociating and we dropped its use, it actually has two - // uses and we can't factor it. - if (!IsRoot) { - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (Ops[i].Op == V) { - Factors.push_back(V); - return; - } - } - - + // Otherwise, add the LHS and RHS to the list of factors. - FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); - FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); + FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops); + FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops); } /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' @@ -673,12 +1051,12 @@ static Value *OptimizeAndOrXor(unsigned Opcode, if (FoundX != i) { if (Opcode == Instruction::And) // ...&X&~X = 0 return Constant::getNullValue(X->getType()); - + if (Opcode == Instruction::Or) // ...|X|~X = -1 return Constant::getAllOnesValue(X->getType()); } } - + // Next, check for duplicate pairs of values, which we assume are next to // each other, due to our sorting criteria. assert(i < Ops.size()); @@ -690,12 +1068,12 @@ static Value *OptimizeAndOrXor(unsigned Opcode, ++NumAnnihil; continue; } - + // Drop pairs of values for Xor. assert(Opcode == Instruction::Xor); if (e == 2) return Constant::getNullValue(Ops[0].Op->getType()); - + // Y ^ X^X -> Y Ops.erase(Ops.begin()+i, Ops.begin()+i+2); i -= 1; e -= 2; @@ -728,46 +1106,46 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Ops.erase(Ops.begin()+i); ++NumFound; } while (i != Ops.size() && Ops[i].Op == TheOp); - + DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); ++NumFactor; - + // Insert a new multiply. Value *Mul = ConstantInt::get(cast(I->getType()), NumFound); Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); - + // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - RedoInsts.push_back(Mul); - + RedoInsts.insert(cast(Mul)); + // If every add operand was a duplicate, return the multiply. if (Ops.empty()) return Mul; - + // Otherwise, we had some input that didn't have the dupe, such as // "A + A + B" -> "A*2 + B". Add the new multiply to the list of // things being added by this operation. Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); - + --i; e = Ops.size(); continue; } - + // Check for X and -X in the operand list. if (!BinaryOperator::isNeg(TheOp)) continue; - + Value *X = BinaryOperator::getNegArgument(TheOp); unsigned FoundX = FindInOperandList(Ops, i, X); if (FoundX == i) continue; - + // Remove X and -X from the operand list. if (Ops.size() == 2) return Constant::getNullValue(X->getType()); - + Ops.erase(Ops.begin()+i); if (i < FoundX) --FoundX; @@ -778,37 +1156,37 @@ Value *Reassociate::OptimizeAdd(Instruction *I, --i; // Revisit element. e -= 2; // Removed two elements. } - + // Scan the operand list, checking to see if there are any common factors // between operands. Consider something like A*A+A*B*C+D. We would like to // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. // To efficiently find this, we count the number of times a factor occurs // for any ADD operands that are MULs. DenseMap FactorOccurrences; - + // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) // where they are actually the same multiply. unsigned MaxOcc = 0; Value *MaxOccVal = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - BinaryOperator *BOp = dyn_cast(Ops[i].Op); - if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) + BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); + if (!BOp) continue; - + // Compute all of the factors of this added value. SmallVector Factors; - FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); + FindSingleUseMultiplyFactors(BOp, Factors, Ops); assert(Factors.size() > 1 && "Bad linearize!"); - + // Add one to FactorOccurrences for each unique factor in this op. SmallPtrSet Duplicates; for (unsigned i = 0, e = Factors.size(); i != e; ++i) { Value *Factor = Factors[i]; if (!Duplicates.insert(Factor)) continue; - + unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } - + // If Factor is a negative constant, add the negated value as a factor // because we can percolate the negate out. Watch for minint, which // cannot be positivified. @@ -817,13 +1195,13 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); - + unsigned Occ = ++FactorOccurrences[Factor]; if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } } } } - + // If any factor occurred more than one time, we can pull it out. if (MaxOcc > 1) { DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); @@ -831,16 +1209,16 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Create a new instruction that uses the MaxOccVal twice. If we don't do // this, we could otherwise run into situations where removing a factor - // from an expression will drop a use of maxocc, and this can cause + // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); SmallVector NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. - BinaryOperator *BOp = dyn_cast(Ops[i].Op); - if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) + BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); + if (!BOp) continue; - + if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { // The factorized operand may occur several times. Convert them all in // one fell swoop. @@ -854,7 +1232,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, --i; } } - + // No need for extra uses anymore. delete DummyInst; @@ -866,26 +1244,201 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); (void)NumAddedValues; - V = ReassociateExpression(cast(V)); + if (Instruction *VI = dyn_cast(V)) + RedoInsts.insert(VI); // Create the multiply. - Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); + Instruction *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); // Rerun associate on the multiply in case the inner expression turned into // a multiply. We want to make sure that we keep things in canonical form. - V2 = ReassociateExpression(cast(V2)); - + RedoInsts.insert(V2); + // If every add operand included the factor (e.g. "A*B + A*C"), then the // entire result expression is just the multiply "A*(B+C)". if (Ops.empty()) return V2; - + // Otherwise, we had some input that didn't have the factor, such as // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of // things being added by this operation. Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); } - + + return 0; +} + +namespace { + /// \brief Predicate tests whether a ValueEntry's op is in a map. + struct IsValueInMap { + const DenseMap ⤅ + + IsValueInMap(const DenseMap &Map) : Map(Map) {} + + bool operator()(const ValueEntry &Entry) { + return Map.find(Entry.Op) != Map.end(); + } + }; +} + +/// \brief Build up a vector of value/power pairs factoring a product. +/// +/// Given a series of multiplication operands, build a vector of factors and +/// the powers each is raised to when forming the final product. Sort them in +/// the order of descending power. +/// +/// (x*x) -> [(x, 2)] +/// ((x*x)*x) -> [(x, 3)] +/// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] +/// +/// \returns Whether any factors have a power greater than one. +bool Reassociate::collectMultiplyFactors(SmallVectorImpl &Ops, + SmallVectorImpl &Factors) { + // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this. + // Compute the sum of powers of simplifiable factors. + unsigned FactorPowerSum = 0; + for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) { + Value *Op = Ops[Idx-1].Op; + + // Count the number of occurrences of this value. + unsigned Count = 1; + for (; Idx < Size && Ops[Idx].Op == Op; ++Idx) + ++Count; + // Track for simplification all factors which occur 2 or more times. + if (Count > 1) + FactorPowerSum += Count; + } + + // We can only simplify factors if the sum of the powers of our simplifiable + // factors is 4 or higher. When that is the case, we will *always* have + // a simplification. This is an important invariant to prevent cyclicly + // trying to simplify already minimal formations. + if (FactorPowerSum < 4) + return false; + + // Now gather the simplifiable factors, removing them from Ops. + FactorPowerSum = 0; + for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) { + Value *Op = Ops[Idx-1].Op; + + // Count the number of occurrences of this value. + unsigned Count = 1; + for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx) + ++Count; + if (Count == 1) + continue; + // Move an even number of occurrences to Factors. + Count &= ~1U; + Idx -= Count; + FactorPowerSum += Count; + Factors.push_back(Factor(Op, Count)); + Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count); + } + + // None of the adjustments above should have reduced the sum of factor powers + // below our mininum of '4'. + assert(FactorPowerSum >= 4); + + std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter()); + return true; +} + +/// \brief Build a tree of multiplies, computing the product of Ops. +static Value *buildMultiplyTree(IRBuilder<> &Builder, + SmallVectorImpl &Ops) { + if (Ops.size() == 1) + return Ops.back(); + + Value *LHS = Ops.pop_back_val(); + do { + LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); + } while (!Ops.empty()); + + return LHS; +} + +/// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*... +/// +/// Given a vector of values raised to various powers, where no two values are +/// equal and the powers are sorted in decreasing order, compute the minimal +/// DAG of multiplies to compute the final product, and return that product +/// value. +Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder, + SmallVectorImpl &Factors) { + assert(Factors[0].Power); + SmallVector OuterProduct; + for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size(); + Idx < Size && Factors[Idx].Power > 0; ++Idx) { + if (Factors[Idx].Power != Factors[LastIdx].Power) { + LastIdx = Idx; + continue; + } + + // We want to multiply across all the factors with the same power so that + // we can raise them to that power as a single entity. Build a mini tree + // for that. + SmallVector InnerProduct; + InnerProduct.push_back(Factors[LastIdx].Base); + do { + InnerProduct.push_back(Factors[Idx].Base); + ++Idx; + } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power); + + // Reset the base value of the first factor to the new expression tree. + // We'll remove all the factors with the same power in a second pass. + Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct); + if (Instruction *MI = dyn_cast(M)) + RedoInsts.insert(MI); + + LastIdx = Idx; + } + // Unique factors with equal powers -- we've folded them into the first one's + // base. + Factors.erase(std::unique(Factors.begin(), Factors.end(), + Factor::PowerEqual()), + Factors.end()); + + // Iteratively collect the base of each factor with an add power into the + // outer product, and halve each power in preparation for squaring the + // expression. + for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) { + if (Factors[Idx].Power & 1) + OuterProduct.push_back(Factors[Idx].Base); + Factors[Idx].Power >>= 1; + } + if (Factors[0].Power) { + Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors); + OuterProduct.push_back(SquareRoot); + OuterProduct.push_back(SquareRoot); + } + if (OuterProduct.size() == 1) + return OuterProduct.front(); + + Value *V = buildMultiplyTree(Builder, OuterProduct); + return V; +} + +Value *Reassociate::OptimizeMul(BinaryOperator *I, + SmallVectorImpl &Ops) { + // We can only optimize the multiplies when there is a chain of more than + // three, such that a balanced tree might require fewer total multiplies. + if (Ops.size() < 4) + return 0; + + // Try to turn linear trees of multiplies without other uses of the + // intermediate stages into minimal multiply DAGs with perfect sub-expression + // re-use. + SmallVector Factors; + if (!collectMultiplyFactors(Ops, Factors)) + return 0; // All distinct factors, so nothing left for us to do. + + IRBuilder<> Builder(I); + Value *V = buildMinimalMultiplyDAG(Builder, Factors); + if (Ops.empty()) + return V; + + ValueEntry NewEntry = ValueEntry(getRank(V), V); + Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); return 0; } @@ -893,95 +1446,105 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - bool IterateOptimization = false; if (Ops.size() == 1) return Ops[0].Op; unsigned Opcode = I->getOpcode(); - - if (Constant *V1 = dyn_cast(Ops[Ops.size()-2].Op)) - if (Constant *V2 = dyn_cast(Ops.back().Op)) { - Ops.pop_back(); - Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); - return OptimizeExpression(I, Ops); - } - - // Check for destructive annihilation due to a constant being used. - if (ConstantInt *CstVal = dyn_cast(Ops.back().Op)) - switch (Opcode) { - default: break; - case Instruction::And: - if (CstVal->isZero()) // X & 0 -> 0 - return CstVal; - if (CstVal->isAllOnesValue()) // X & -1 -> X - Ops.pop_back(); - break; - case Instruction::Mul: - if (CstVal->isZero()) { // X * 0 -> 0 - ++NumAnnihil; - return CstVal; - } - - if (cast(CstVal)->isOne()) - Ops.pop_back(); // X * 1 -> X - break; - case Instruction::Or: - if (CstVal->isAllOnesValue()) // X | -1 -> -1 - return CstVal; - // FALLTHROUGH! - case Instruction::Add: - case Instruction::Xor: - if (CstVal->isZero()) // X [|^+] 0 -> X - Ops.pop_back(); - break; - } - if (Ops.size() == 1) return Ops[0].Op; // Handle destructive annihilation due to identities between elements in the // argument list here. + unsigned NumOps = Ops.size(); switch (Opcode) { default: break; case Instruction::And: case Instruction::Or: - case Instruction::Xor: { - unsigned NumOps = Ops.size(); + case Instruction::Xor: if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) return Result; - IterateOptimization |= Ops.size() != NumOps; break; - } - case Instruction::Add: { - unsigned NumOps = Ops.size(); + case Instruction::Add: if (Value *Result = OptimizeAdd(I, Ops)) return Result; - IterateOptimization |= Ops.size() != NumOps; - } + break; + case Instruction::Mul: + if (Value *Result = OptimizeMul(I, Ops)) + return Result; break; - //case Instruction::Mul: } - if (IterateOptimization) + if (Ops.size() != NumOps) return OptimizeExpression(I, Ops); return 0; } +/// EraseInst - Zap the given instruction, adding interesting operands to the +/// work list. +void Reassociate::EraseInst(Instruction *I) { + assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); + SmallVector Ops(I->op_begin(), I->op_end()); + // Erase the dead instruction. + ValueRankMap.erase(I); + RedoInsts.remove(I); + I->eraseFromParent(); + // Optimize its operands. + SmallPtrSet Visited; // Detect self-referential nodes. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (Instruction *Op = dyn_cast(Ops[i])) { + // If this is a node in an expression tree, climb to the expression root + // and add that since that's where optimization actually happens. + unsigned Opcode = Op->getOpcode(); + while (Op->hasOneUse() && Op->use_back()->getOpcode() == Opcode && + Visited.insert(Op)) + Op = Op->use_back(); + RedoInsts.insert(Op); + } +} + +/// OptimizeInst - Inspect and optimize the given instruction. Note that erasing +/// instructions is not allowed. +void Reassociate::OptimizeInst(Instruction *I) { + // Only consider operations that we understand. + if (!isa(I)) + return; -/// ReassociateInst - Inspect and reassociate the instruction at the -/// given position, post-incrementing the position. -void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + if (I->getOpcode() == Instruction::Shl && + isa(I->getOperand(1))) + // If an operand of this shift is a reassociable multiply, or if the shift + // is used by a reassociable multiply or add, turn into a multiply. + if (isReassociableOp(I->getOperand(0), Instruction::Mul) || + (I->hasOneUse() && + (isReassociableOp(I->use_back(), Instruction::Mul) || + isReassociableOp(I->use_back(), Instruction::Add)))) { + Instruction *NI = ConvertShiftToMul(I); + RedoInsts.insert(I); MadeChange = true; - BI = NI; + I = NI; + } + + // Floating point binary operators are not associative, but we can still + // commute (some) of them, to canonicalize the order of their operands. + // This can potentially expose more CSE opportunities, and makes writing + // other transformations simpler. + if ((I->getType()->isFloatingPointTy() || I->getType()->isVectorTy())) { + // FAdd and FMul can be commuted. + if (I->getOpcode() != Instruction::FMul && + I->getOpcode() != Instruction::FAdd) + return; + + Value *LHS = I->getOperand(0); + Value *RHS = I->getOperand(1); + unsigned LHSRank = getRank(LHS); + unsigned RHSRank = getRank(RHS); + + // Sort the operands by rank. + if (RHSRank < LHSRank) { + I->setOperand(0, RHS); + I->setOperand(1, LHS); } - // Reject cases where it is pointless to do this. - if (!isa(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - return; // Floating point ops are not associative. + return; + } // Do not reassociate boolean (i1) expressions. We want to preserve the // original order of evaluation for short-circuited comparisons that @@ -989,58 +1552,66 @@ void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { // is not further optimized, it is likely to be transformed back to a // short-circuited form for code gen, and the source order may have been // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) + if (I->getType()->isIntegerTy(1)) return; // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; + if (I->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(I)) { + Instruction *NI = BreakUpSubtract(I); + RedoInsts.insert(I); MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { + I = NI; + } else if (BinaryOperator::isNeg(I)) { // Otherwise, this is a negation. See if the operand is a multiply tree // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); + if (isReassociableOp(I->getOperand(1), Instruction::Mul) && + (!I->hasOneUse() || + !isReassociableOp(I->use_back(), Instruction::Mul))) { + Instruction *NI = LowerNegateToMultiply(I); + RedoInsts.insert(I); MadeChange = true; + I = NI; } } } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) return; - BinaryOperator *I = cast(BI); + // If this instruction is an associative binary operator, process it. + if (!I->isAssociative()) return; + BinaryOperator *BO = cast(I); // If this is an interior node of a reassociable tree, ignore it until we // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + unsigned Opcode = BO->getOpcode(); + if (BO->hasOneUse() && BO->use_back()->getOpcode() == Opcode) return; - // If this is an add tree that is used by a sub instruction, ignore it + // If this is an add tree that is used by a sub instruction, ignore it // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast(I->use_back())->getOpcode() == Instruction::Sub) + if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add && + cast(BO->use_back())->getOpcode() == Instruction::Sub) return; - ReassociateExpression(I); + ReassociateExpression(BO); } -Value *Reassociate::ReassociateExpression(BinaryOperator *I) { - +void Reassociate::ReassociateExpression(BinaryOperator *I) { + // First, walk the expression tree, linearizing the tree, collecting the // operand information. + SmallVector Tree; + MadeChange |= LinearizeExprTree(I, Tree); SmallVector Ops; - LinearizeExprTree(I, Ops); - + Ops.reserve(Tree.size()); + for (unsigned i = 0, e = Tree.size(); i != e; ++i) { + RepeatedValue E = Tree[i]; + Ops.append(E.second.getZExtValue(), + ValueEntry(getRank(E.first), E.first)); + } + DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); - + // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a // stable_sort so that values with equal ranks will have their relative @@ -1048,21 +1619,24 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // this sorts so that the highest ranking values end up at the beginning of // the vector. std::stable_sort(Ops.begin(), Ops.end()); - + // OptimizeExpression - Now that we have the expression tree in a convenient // sorted form, optimize it globally if possible. if (Value *V = OptimizeExpression(I, Ops)) { + if (V == I) + // Self-referential expression in unreachable code. + return; // This expression tree simplified to something that isn't a tree, // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); if (Instruction *VI = dyn_cast(V)) VI->setDebugLoc(I->getDebugLoc()); - RemoveDeadBinaryOp(I); + RedoInsts.insert(I); ++NumAnnihil; - return V; + return; } - + // We want to sink immediates as deeply as possible except in the case where // this is a multiply tree used only by an add, and the immediate is a -1. // In this case we reassociate to put the negation on the outside so that we @@ -1074,51 +1648,57 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { ValueEntry Tmp = Ops.pop_back_val(); Ops.insert(Ops.begin(), Tmp); } - + DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); - + if (Ops.size() == 1) { + if (Ops[0].Op == I) + // Self-referential expression in unreachable code. + return; + // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); if (Instruction *OI = dyn_cast(Ops[0].Op)) OI->setDebugLoc(I->getDebugLoc()); - RemoveDeadBinaryOp(I); - return Ops[0].Op; + RedoInsts.insert(I); + return; } - + // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. RewriteExprTree(I, Ops); - return I; } - bool Reassociate::runOnFunction(Function &F) { - // Recalculate the rank map for F + // Calculate the rank map for F BuildRankMap(F); MadeChange = false; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) - ReassociateInst(BBI); - - // Now that we're done, revisit any instructions which are likely to - // have secondary reassociation opportunities. - while (!RedoInsts.empty()) - if (Value *V = RedoInsts.pop_back_val()) { - BasicBlock::iterator BBI = cast(V); - ReassociateInst(BBI); - } + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Optimize every instruction in the basic block. + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ) + if (isInstructionTriviallyDead(II)) { + EraseInst(II++); + } else { + OptimizeInst(II); + assert(II->getParent() == BI && "Moved to a different block!"); + ++II; + } - // Now that we're done, delete any instructions which are no longer used. - while (!DeadInsts.empty()) - if (Value *V = DeadInsts.pop_back_val()) - RecursivelyDeleteTriviallyDeadInstructions(V); + // If this produced extra instructions to optimize, handle them now. + while (!RedoInsts.empty()) { + Instruction *I = RedoInsts.pop_back_val(); + if (isInstructionTriviallyDead(I)) + EraseInst(I); + else + OptimizeInst(I); + } + } // We are done with the rank map. RankMap.clear(); ValueRankMap.clear(); + return MadeChange; } - diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 47afc77..ea1de63 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file demotes all registers to memory references. It is intented to be +// This file demotes all registers to memory references. It is intended to be // the inverse of PromoteMemoryToRegister. By converting to loads, the only // values live across basic blocks are allocas and loads before phi nodes. // It is intended that this should make CFG hacking much easier. @@ -59,7 +59,7 @@ namespace { virtual bool runOnFunction(Function &F); }; } - + char RegToMem::ID = 0; INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots", false, false) @@ -68,25 +68,25 @@ INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots", false, false) bool RegToMem::runOnFunction(Function &F) { - if (F.isDeclaration()) + if (F.isDeclaration()) return false; - + // Insert all new allocas into entry block. BasicBlock *BBEntry = &F.getEntryBlock(); assert(pred_begin(BBEntry) == pred_end(BBEntry) && "Entry block to function must not have predecessors!"); - + // Find first non-alloca instruction and create insertion point. This is // safe if block is well-formed: it always have terminator, otherwise // we'll get and assertion. BasicBlock::iterator I = BBEntry->begin(); while (isa(I)) ++I; - + CastInst *AllocaInsertionPoint = new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), Type::getInt32Ty(F.getContext()), "reg2mem alloca point", I); - + // Find the escaped instructions. But don't create stack slots for // allocas in entry block. std::list WorkList; @@ -99,15 +99,15 @@ bool RegToMem::runOnFunction(Function &F) { WorkList.push_front(&*iib); } } - + // Demote escaped instructions NumRegsDemoted += WorkList.size(); - for (std::list::iterator ilb = WorkList.begin(), + for (std::list::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemoteRegToStack(**ilb, false, AllocaInsertionPoint); - + WorkList.clear(); - + // Find all phi's for (Function::iterator ibb = F.begin(), ibe = F.end(); ibb != ibe; ++ibb) @@ -115,19 +115,18 @@ bool RegToMem::runOnFunction(Function &F) { iib != iie; ++iib) if (isa(iib)) WorkList.push_front(&*iib); - + // Demote phi nodes NumPhisDemoted += WorkList.size(); - for (std::list::iterator ilb = WorkList.begin(), + for (std::list::iterator ilb = WorkList.begin(), ile = WorkList.end(); ilb != ile; ++ilb) DemotePHIToStack(cast(*ilb), AllocaInsertionPoint); - + return true; } // createDemoteRegisterToMemory - Provide an entry point to create this pass. -// char &llvm::DemoteRegisterToMemoryID = RegToMem::ID; FunctionPass *llvm::createDemoteRegisterToMemoryPass() { return new RegToMem(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 16b64a5..2c39aab 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -409,7 +409,7 @@ private: if (Constant *C = dyn_cast(V)) { Constant *Elt = C->getAggregateElement(i); - + if (Elt == 0) LV.markOverdefined(); // Unknown sort of constant. else if (isa(Elt)) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 7d65bcc..48318c8 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements common infrastructure for libLLVMScalarOpts.a, which +// This file implements common infrastructure for libLLVMScalarOpts.a, which // implements several scalar transformations over the LLVM intermediate // representation, including the C bindings for that library. // @@ -24,7 +24,7 @@ using namespace llvm; -/// initializeScalarOptsPasses - Initialize all passes linked into the +/// initializeScalarOptsPasses - Initialize all passes linked into the /// ScalarOpts library. void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeADCEPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 026fea1..6637126 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -22,33 +22,34 @@ #define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -59,12 +60,25 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { - SROA(int T, bool hasDT, char &ID) + SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT) : FunctionPass(ID), HasDomTree(hasDT) { if (T == -1) SRThreshold = 128; else SRThreshold = T; + if (ST == -1) + StructMemberThreshold = 32; + else + StructMemberThreshold = ST; + if (AT == -1) + ArrayElementThreshold = 8; + else + ArrayElementThreshold = AT; + if (SLT == -1) + // Do not limit the scalar integer load size if no threshold is given. + ScalarLoadThreshold = -1; + else + ScalarLoadThreshold = SLT; } bool runOnFunction(Function &F); @@ -86,11 +100,11 @@ namespace { struct AllocaInfo { /// The alloca to promote. AllocaInst *AI; - + /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite /// looping and avoid redundant work. SmallPtrSet CheckedPHIs; - + /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; @@ -104,19 +118,32 @@ namespace { /// ever accessed, or false if the alloca is only accessed with mem /// intrinsics or load/store that only access the entire alloca at once. bool hasSubelementAccess : 1; - + /// hasALoadOrStore - This is true if there are any loads or stores to it. /// The alloca may just be accessed with memcpy, for example, which would /// not set this. bool hasALoadOrStore : 1; - + explicit AllocaInfo(AllocaInst *ai) : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), hasSubelementAccess(false), hasALoadOrStore(false) {} }; + /// SRThreshold - The maximum alloca size to considered for SROA. unsigned SRThreshold; + /// StructMemberThreshold - The maximum number of members a struct can + /// contain to be considered for SROA. + unsigned StructMemberThreshold; + + /// ArrayElementThreshold - The maximum number of elements an array can + /// have to be considered for SROA. + unsigned ArrayElementThreshold; + + /// ScalarLoadThreshold - The maximum size in bits of scalars to load when + /// converting to scalar + unsigned ScalarLoadThreshold; + void MarkUnsafe(AllocaInfo &I, Instruction *User) { I.isUnsafe = true; DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); @@ -155,19 +182,21 @@ namespace { SmallVector &NewElts); void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts); + bool ShouldAttemptScalarRepl(AllocaInst *AI); static MemTransferInst *isOnlyCopiedFromConstantGlobal( AllocaInst *AI, SmallVector &ToDelete); }; - + // SROA_DT - SROA that uses DominatorTree. struct SROA_DT : public SROA { static char ID; public: - SROA_DT(int T = -1) : SROA(T, true, ID) { + SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : + SROA(T, true, ID, ST, AT, SLT) { initializeSROA_DTPass(*PassRegistry::getPassRegistry()); } - + // getAnalysisUsage - This pass does not require any passes, but we know it // will not alter the CFG, so say so. virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -175,22 +204,23 @@ namespace { AU.setPreservesCFG(); } }; - + // SROA_SSAUp - SROA that uses SSAUpdater. struct SROA_SSAUp : public SROA { static char ID; public: - SROA_SSAUp(int T = -1) : SROA(T, false, ID) { + SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) : + SROA(T, false, ID, ST, AT, SLT) { initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); } - + // getAnalysisUsage - This pass does not require any passes, but we know it // will not alter the CFG, so say so. virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); } }; - + } char SROA_DT::ID = 0; @@ -209,10 +239,15 @@ INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", // Public interface to the ScalarReplAggregates pass FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, - bool UseDomTree) { + bool UseDomTree, + int StructMemberThreshold, + int ArrayElementThreshold, + int ScalarLoadThreshold) { if (UseDomTree) - return new SROA_DT(Threshold); - return new SROA_SSAUp(Threshold); + return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold, + ScalarLoadThreshold); + return new SROA_SSAUp(Threshold, StructMemberThreshold, + ArrayElementThreshold, ScalarLoadThreshold); } @@ -228,6 +263,7 @@ class ConvertToScalarInfo { /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; + unsigned ScalarLoadThreshold; /// IsNotTrivial - This is set to true if there is some access to the object /// which means that mem2reg can't promote it. @@ -258,28 +294,38 @@ class ConvertToScalarInfo { /// isn't possible to turn into a vector type, it gets set to VoidTy. VectorType *VectorTy; - /// HadNonMemTransferAccess - True if there is at least one access to the + /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into /// large integers unless there is some potential for optimization. bool HadNonMemTransferAccess; + /// HadDynamicAccess - True if some element of this alloca was dynamic. + /// We don't yet have support for turning a dynamic access into a large + /// integer. + bool HadDynamicAccess; + public: - explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), - VectorTy(0), HadNonMemTransferAccess(false) { } + explicit ConvertToScalarInfo(unsigned Size, const TargetData &td, + unsigned SLT) + : AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false), + ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false), + HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: - bool CanConvertToScalar(Value *V, uint64_t Offset); + bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx); void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); - void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); + void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset, + Value *NonConstantIdx); Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, - uint64_t Offset, IRBuilder<> &Builder); + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder); }; } // end anonymous namespace. @@ -290,7 +336,7 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) + if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) return 0; // If an alloca has only memset / memcpy uses, it may still have an Unknown @@ -315,16 +361,27 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { NewTy = VectorTy; // Use the vector type. } else { unsigned BitWidth = AllocaSize * 8; + + // Do not convert to scalar integer if the alloca size exceeds the + // scalar load threshold. + if (BitWidth > ScalarLoadThreshold) + return 0; + if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) return 0; + // Dynamic accesses on integers aren't yet supported. They need us to shift + // by a dynamic amount which could be difficult to work out as we might not + // know whether to use a left or right shift. + if (ScalarKind == Integer && HadDynamicAccess) + return 0; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0); + ConvertUsesToScalar(AI, NewAI, 0, 0); return NewAI; } @@ -411,7 +468,8 @@ bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { +bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, + Value* NonConstantIdx) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast(*UI); @@ -441,24 +499,35 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (BitCastInst *BCI = dyn_cast(User)) { if (!onlyUsedByLifetimeMarkers(BCI)) IsNotTrivial = true; // Can't be mem2reg'd. - if (!CanConvertToScalar(BCI, Offset)) + if (!CanConvertToScalar(BCI, Offset, NonConstantIdx)) return false; continue; } if (GetElementPtrInst *GEP = dyn_cast(User)) { // If this is a GEP with a variable indices, we can't handle it. - if (!GEP->hasAllConstantIndices()) + PointerType* PtrTy = dyn_cast(GEP->getPointerOperandType()); + if (!PtrTy) return false; // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->getPointerOperandType()->isPointerTy()) - return false; - uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), + Value *GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + if (!isa(PtrTy->getElementType())) + return false; + if (NonConstantIdx) + return false; + GEPNonConstantIdx = Indices.pop_back_val(); + if (!GEPNonConstantIdx->getType()->isIntegerTy(32)) + return false; + HadDynamicAccess = true; + } else + GEPNonConstantIdx = NonConstantIdx; + uint64_t GEPOffset = TD.getIndexedOffset(PtrTy, Indices); // See if all uses can be converted. - if (!CanConvertToScalar(GEP, Offset+GEPOffset)) + if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx)) return false; IsNotTrivial = true; // Can't be mem2reg'd. HadNonMemTransferAccess = true; @@ -468,6 +537,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. if (MemSetInst *MSI = dyn_cast(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; // Store of constant value. if (!isa(MSI->getValue())) return false; @@ -492,6 +564,9 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // If this is a memcpy or memmove into or out of the whole allocation, we // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast(User)) { + // Store to dynamic index. + if (NonConstantIdx) + return false; ConstantInt *Len = dyn_cast(MTI->getLength()); if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) return false; @@ -523,12 +598,13 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. By the end of this, there should be no uses of Ptr. void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, - uint64_t Offset) { + uint64_t Offset, + Value* NonConstantIdx) { while (!Ptr->use_empty()) { Instruction *User = cast(Ptr->use_back()); if (BitCastInst *CI = dyn_cast(User)) { - ConvertUsesToScalar(CI, NewAI, Offset); + ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx); CI->eraseFromParent(); continue; } @@ -536,9 +612,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); + Value* GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + assert(!NonConstantIdx && + "Dynamic GEP reading from dynamic GEP unsupported"); + GEPNonConstantIdx = Indices.pop_back_val(); + } else + GEPNonConstantIdx = NonConstantIdx; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); GEP->eraseFromParent(); continue; } @@ -549,7 +632,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // The load is a bit extract from NewAI shifted right by Offset bits. Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal - = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, + NonConstantIdx, Builder); LI->replaceAllUsesWith(NewLoadVal); LI->eraseFromParent(); continue; @@ -559,7 +643,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(SI->getOperand(0) != Ptr && "Consistency error!"); Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, - Builder); + NonConstantIdx, Builder); Builder.CreateStore(New, NewAI); SI->eraseFromParent(); @@ -574,6 +658,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); + assert(!NonConstantIdx && "Cannot replace dynamic memset with insert"); int64_t SNumBytes = cast(MSI->getLength())->getSExtValue(); if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { unsigned NumBytes = static_cast(SNumBytes); @@ -590,7 +675,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, Builder); + Old, Offset, 0, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -606,6 +691,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // can handle it like a load or store of the scalar type. if (MemTransferInst *MTI = dyn_cast(User)) { assert(Offset == 0 && "must be store to start of alloca"); + assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert"); // If the source and destination are both to the same alloca, then this is // a noop copy-to-self, just delete it. Otherwise, emit a load and store @@ -678,7 +764,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, /// shifted to the right. Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) @@ -700,7 +787,17 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + Value *V = Builder.CreateExtractElement(FromVal, Idx); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType); return V; @@ -709,23 +806,27 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. if (StructType *ST = dyn_cast(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } if (ArrayType *AT = dyn_cast(ToType)) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, Builder); + Offset+i*EltSize, 0, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -791,9 +892,14 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, /// /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. +/// +/// NonConstantIdx is an index value if there was a GEP with a non-constant +/// index value. If this is 0 then all GEPs used to find this insert address +/// are constant. Value *ConvertToScalarInfo:: ConvertScalar_InsertValue(Value *SV, Value *Old, - uint64_t Offset, IRBuilder<> &Builder) { + uint64_t Offset, Value* NonConstantIdx, + IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. Type *AllocaType = Old->getType(); @@ -814,26 +920,40 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, SV = Builder.CreateBitCast(SV, EltTy); uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); + Value *Idx; + if (NonConstantIdx) { + if (Elt) + Idx = Builder.CreateAdd(NonConstantIdx, + Builder.getInt32(Elt), + "dyn.offset"); + else + Idx = NonConstantIdx; + } else + Idx = Builder.getInt32(Elt); + return Builder.CreateInsertElement(Old, SV, Idx); } // If SV is a first-class aggregate value, insert each value recursively. if (StructType *ST = dyn_cast(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into struct types not supported"); const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - Builder); + 0, Builder); } return Old; } if (ArrayType *AT = dyn_cast(SV->getType())) { + assert(!NonConstantIdx && + "Dynamic indexing into array types not supported"); uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); } return Old; } @@ -935,7 +1055,7 @@ public: AllocaPromoter(const SmallVectorImpl &Insts, SSAUpdater &S, DIBuilder *DB) : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} - + void run(AllocaInst *AI, const SmallVectorImpl &Insts) { // Remember which alloca we're promoting (for isInstInList). this->AI = AI; @@ -950,18 +1070,18 @@ public: LoadAndStorePromoter::run(Insts); AI->eraseFromParent(); - for (SmallVector::iterator I = DDIs.begin(), + for (SmallVector::iterator I = DDIs.begin(), E = DDIs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; DDI->eraseFromParent(); } - for (SmallVector::iterator I = DVIs.begin(), + for (SmallVector::iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; DVI->eraseFromParent(); } } - + virtual bool isInstInList(Instruction *I, const SmallVectorImpl &Insts) const { if (LoadInst *LI = dyn_cast(I)) @@ -970,7 +1090,7 @@ public: } virtual void updateDebugInfo(Instruction *Inst) const { - for (SmallVector::const_iterator I = DDIs.begin(), + for (SmallVector::const_iterator I = DDIs.begin(), E = DDIs.end(); I != E; ++I) { DbgDeclareInst *DDI = *I; if (StoreInst *SI = dyn_cast(Inst)) @@ -978,7 +1098,7 @@ public: else if (LoadInst *LI = dyn_cast(Inst)) ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); } - for (SmallVector::const_iterator I = DVIs.begin(), + for (SmallVector::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; Value *Arg = NULL; @@ -1021,12 +1141,12 @@ public: static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); - + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast(*UI); if (LI == 0 || !LI->isSimple()) return false; - + // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, @@ -1036,7 +1156,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { LI->getAlignment(), TD)) return false; } - + return true; } @@ -1067,20 +1187,20 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { UI != UE; ++UI) { LoadInst *LI = dyn_cast(*UI); if (LI == 0 || !LI->isSimple()) return false; - + // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. if (LI->getParent() != BB) return false; - + // Ensure that there are no instructions between the PHI and the load that // could store. for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) if (BBI->mayWriteToMemory()) return false; - + MaxAlign = std::max(MaxAlign, LI->getAlignment()); } - + // Okay, we know that we have one or more loads in the same block as the PHI. // We can transform this if it is safe to push the loads into the predecessor // blocks. The only thing to watch out for is that we can't put a possibly @@ -1108,10 +1228,10 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { if (InVal->isDereferenceablePointer() || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) continue; - + return false; } - + return true; } @@ -1123,7 +1243,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { SetVector, SmallPtrSet > InstsToRewrite; - + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); UI != UE; ++UI) { User *U = *UI; @@ -1132,7 +1252,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { return false; continue; } - + if (StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == AI || !SI->isSimple()) return false; // Don't allow a store OF the AI, only INTO the AI. @@ -1146,7 +1266,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { Value *Result = SI->getOperand(1+CI->isZero()); SI->replaceAllUsesWith(Result); SI->eraseFromParent(); - + // This is very rare and we just scrambled the use list of AI, start // over completely. return tryToMakeAllocaBePromotable(AI, TD); @@ -1156,33 +1276,33 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // loads, then we can transform this by rewriting the select. if (!isSafeSelectToSpeculate(SI, TD)) return false; - + InstsToRewrite.insert(SI); continue; } - + if (PHINode *PN = dyn_cast(U)) { if (PN->use_empty()) { // Dead PHIs can be stripped. InstsToRewrite.insert(PN); continue; } - + // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads // in the pred blocks, then we can transform this by rewriting the PHI. if (!isSafePHIToSpeculate(PN, TD)) return false; - + InstsToRewrite.insert(PN); continue; } - + if (BitCastInst *BCI = dyn_cast(U)) { if (onlyUsedByLifetimeMarkers(BCI)) { InstsToRewrite.insert(BCI); continue; } } - + return false; } @@ -1190,7 +1310,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // we're done! if (InstsToRewrite.empty()) return true; - + // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { @@ -1211,13 +1331,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // loads with a new select. while (!SI->use_empty()) { LoadInst *LI = cast(SI->use_back()); - + IRBuilder<> Builder(LI); - LoadInst *TrueLoad = + LoadInst *TrueLoad = Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); - LoadInst *FalseLoad = + LoadInst *FalseLoad = Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); - + // Transfer alignment and TBAA info if present. TrueLoad->setAlignment(LI->getAlignment()); FalseLoad->setAlignment(LI->getAlignment()); @@ -1225,18 +1345,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); } - + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); V->takeName(LI); LI->replaceAllUsesWith(V); LI->eraseFromParent(); } - + // Now that all the loads are gone, the select is gone too. SI->eraseFromParent(); continue; } - + // Otherwise, we have a PHI node which allows us to push the loads into the // predecessors. PHINode *PN = cast(InstsToRewrite[i]); @@ -1244,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { PN->eraseFromParent(); continue; } - + Type *LoadTy = cast(PN->getType())->getElementType(); PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), PN->getName()+".ld", PN); @@ -1254,18 +1374,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { LoadInst *SomeLoad = cast(PN->use_back()); MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); unsigned Align = SomeLoad->getAlignment(); - + // Rewrite all loads of the PN to use the new PHI. while (!PN->use_empty()) { LoadInst *LI = cast(PN->use_back()); LI->replaceAllUsesWith(NewPN); LI->eraseFromParent(); } - + // Inject loads into all of the pred blocks. Keep track of which blocks we // insert them into in case we have multiple edges from the same block. DenseMap InsertedLoads; - + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); LoadInst *&Load = InsertedLoads[Pred]; @@ -1276,13 +1396,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { Load->setAlignment(Align); if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); } - + NewPN->addIncoming(Load, Pred); } - + PN->eraseFromParent(); } - + ++NumAdjusted; return true; } @@ -1315,7 +1435,7 @@ bool SROA::performPromotion(Function &F) { SSAUpdater SSA; for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { AllocaInst *AI = Allocas[i]; - + // Build list of instructions to promote. for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) @@ -1334,18 +1454,36 @@ bool SROA::performPromotion(Function &F) { /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. -static bool ShouldAttemptScalarRepl(AllocaInst *AI) { +bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { Type *T = AI->getAllocatedType(); - // Do not promote any struct into more than 32 separate vars. + // Do not promote any struct that has too many members. if (StructType *ST = dyn_cast(T)) - return ST->getNumElements() <= 32; - // Arrays are much less likely to be safe for SROA; only consider - // them if they are very small. + return ST->getNumElements() <= StructMemberThreshold; + // Do not promote any array that has too many elements. if (ArrayType *AT = dyn_cast(T)) - return AT->getNumElements() <= 8; + return AT->getNumElements() <= ArrayElementThreshold; return false; } +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} + // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them @@ -1379,23 +1517,26 @@ bool SROA::performScalarRepl(Function &F) { continue; // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global. If this is the case, we can change all users to use + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; + if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + ToDelete[i]->eraseFromParent(); + Constant *TheSrc = cast(Copy->getSource()); + AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); + Copy->eraseFromParent(); // Don't mutate the global. + AI->eraseFromParent(); + ++NumGlobals; + Changed = true; + continue; + } } // Check to see if we can perform the core SROA transformation. We cannot @@ -1425,8 +1566,8 @@ bool SROA::performScalarRepl(Function &F) { // promoted itself. If so, we don't want to transform it needlessly. Note // that we can't just check based on the type: the alloca may be of an i32 // but that has pointer arithmetic to set byte 3 of it or something. - if (AllocaInst *NewAI = - ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { + if (AllocaInst *NewAI = ConvertToScalarInfo( + (unsigned)AllocaSize, *TD, ScalarLoadThreshold).TryConvert(AI)) { NewAI->takeName(AI); AI->eraseFromParent(); ++NumConverted; @@ -1531,12 +1672,12 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; - + } else if (StoreInst *SI = dyn_cast(User)) { // Store is ok if storing INTO the pointer, not storing the pointer if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, true /*AllowWholeAccess*/); @@ -1553,7 +1694,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, if (Info.isUnsafe) return; } } - + /// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer /// derived from the alloca, we can often still split the alloca into elements. @@ -1570,10 +1711,10 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, if (PHINode *PN = dyn_cast(I)) if (!Info.CheckedPHIs.insert(PN)) return; - + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { Instruction *User = cast(*UI); - + if (BitCastInst *BC = dyn_cast(User)) { isSafePHISelectUseForScalarRepl(BC, Offset, Info); } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { @@ -1590,12 +1731,12 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; - + } else if (StoreInst *SI = dyn_cast(User)) { // Store is ok if storing INTO the pointer, not storing the pointer if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, false /*AllowWholeAccess*/); @@ -1619,6 +1760,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); if (GEPIt == E) return; + bool NonConstant = false; + unsigned NonConstantIdxSize = 0; // Walk through the GEP type indices, checking the types that this indexes // into. @@ -1628,15 +1771,30 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, continue; ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info, GEPI); + if (!IdxVal) { + // Non constant GEPs are only a problem on arrays, structs, and pointers + // Vectors can be dynamically indexed. + // FIXME: Add support for dynamic indexing on arrays. This should be + // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0] + // isn't. + if (!(*GEPIt)->isVectorTy()) + return MarkUnsafe(Info, GEPI); + NonConstant = true; + NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt); + } } // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If this GEP is non constant then the last operand must have been a + // dynamic index into a vector. Pop this now as it has no impact on the + // constant part of the offset. + if (NonConstant) + Indices.pop_back(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); - if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) + if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, + NonConstantIdxSize)) MarkUnsafe(Info, GEPI); } @@ -1741,6 +1899,12 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { if (Offset >= AT->getNumElements() * EltSize) return false; Offset %= EltSize; + } else if (VectorType *VT = dyn_cast(T)) { + EltTy = VT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + if (Offset >= VT->getNumElements() * EltSize) + return false; + Offset %= EltSize; } else { return false; } @@ -1766,12 +1930,12 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, RewriteBitCast(BC, AI, Offset, NewElts); continue; } - + if (GetElementPtrInst *GEPI = dyn_cast(User)) { RewriteGEP(GEPI, AI, Offset, NewElts); continue; } - + if (MemIntrinsic *MI = dyn_cast(User)) { ConstantInt *Length = dyn_cast(MI->getLength()); uint64_t MemSize = Length->getZExtValue(); @@ -1790,10 +1954,10 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (LoadInst *LI = dyn_cast(User)) { Type *LIType = LI->getType(); - + if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: // %res = load { i32, i32 }* %alloc @@ -1819,7 +1983,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (StoreInst *SI = dyn_cast(User)) { Value *Val = SI->getOperand(0); Type *SIType = Val->getType(); @@ -1846,16 +2010,16 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, } continue; } - + if (isa(User) || isa(User)) { - // If we have a PHI user of the alloca itself (as opposed to a GEP or + // If we have a PHI user of the alloca itself (as opposed to a GEP or // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to // the new pointer. if (!isa(I)) continue; - + assert(Offset == 0 && NewElts[0] && "Direct alloca use should have a zero offset"); - + // If we have a use of the alloca, we know the derived uses will be // utilizing just the first element of the scalarized result. Insert a // bitcast of the first alloca before the user as required. @@ -1908,9 +2072,16 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Offset -= Layout->getElementOffset(Idx); IdxTy = Type::getInt32Ty(T->getContext()); return Idx; + } else if (ArrayType *AT = dyn_cast(T)) { + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = Offset / EltSize; + Offset -= Idx * EltSize; + IdxTy = Type::getInt64Ty(T->getContext()); + return Idx; } - ArrayType *AT = cast(T); - T = AT->getElementType(); + VectorType *VT = cast(T); + T = VT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; Offset -= Idx * EltSize; @@ -1925,6 +2096,13 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector &NewElts) { uint64_t OldOffset = Offset; SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + // If the GEP was dynamic then it must have been a dynamic vector lookup. + // In this case, it must be the last GEP operand which is dynamic so keep that + // aside until we've found the constant GEP offset then add it back in at the + // end. + Value* NonConstantIdx = 0; + if (!GEPI->hasAllConstantIndices()) + NonConstantIdx = Indices.pop_back_val(); Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); @@ -1951,6 +2129,17 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); } + if (NonConstantIdx) { + Type* GepTy = T; + // This GEP has a dynamic index. We need to add "i32 0" to index through + // any structs or arrays in the original type until we get to the vector + // to index. + while (!isa(GepTy)) { + NewArgs.push_back(Constant::getNullValue(i32Ty)); + GepTy = cast(GepTy)->getTypeAtIndex(0U); + } + NewArgs.push_back(NonConstantIdx); + } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); @@ -2202,7 +2391,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); - + // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = Builder.CreateZExt(SrcVal, @@ -2464,7 +2653,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return false; } } - + return true; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index a66b3e3..d13e4ab 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -67,7 +67,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); - + // Insert a call to llvm.trap right before this. This turns the undefined // behavior into a hard fail instead of falling through into random code. if (UseLLVMTrap) { @@ -77,7 +77,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); - + // All instructions after this are dead. BasicBlock::iterator BBI = I, BBE = BB->end(); while (BBI != BBE) { @@ -89,7 +89,6 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// ChangeToCall - Convert the specified invoke into a normal call. static void ChangeToCall(InvokeInst *II) { - BasicBlock *BB = II->getParent(); SmallVector Args(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); @@ -102,19 +101,19 @@ static void ChangeToCall(InvokeInst *II) { BranchInst::Create(II->getNormalDest(), II); // Update PHI nodes in the unwind destination - II->getUnwindDest()->removePredecessor(BB); - BB->getInstList().erase(II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); } static bool MarkAliveBlocks(BasicBlock *BB, SmallPtrSet &Reachable) { - + SmallVector Worklist; Worklist.push_back(BB); bool Changed = false; do { BB = Worklist.pop_back_val(); - + if (!Reachable.insert(BB)) continue; @@ -136,7 +135,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, break; } } - + // Store to undef and store to null are undefined and used to signal that // they should be changed to unreachable by passes that can't modify the // CFG. @@ -145,7 +144,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (SI->isVolatile()) continue; Value *Ptr = SI->getOperand(1); - + if (isa(Ptr) || (isa(Ptr) && SI->getPointerAddressSpace() == 0)) { @@ -157,11 +156,22 @@ static bool MarkAliveBlocks(BasicBlock *BB, } // Turn invokes that call 'nounwind' functions into ordinary calls. - if (InvokeInst *II = dyn_cast(BB->getTerminator())) - if (II->doesNotThrow()) { - ChangeToCall(II); + if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + Value *Callee = II->getCalledValue(); + if (isa(Callee) || isa(Callee)) { + ChangeToUnreachable(II, true); + Changed = true; + } else if (II->doesNotThrow()) { + if (II->use_empty() && II->onlyReadsMemory()) { + // jump to the normal destination branch. + BranchInst::Create(II->getNormalDest(), II); + II->getUnwindDest()->removePredecessor(II->getParent()); + II->eraseFromParent(); + } else + ChangeToCall(II); Changed = true; } + } Changed |= ConstantFoldTerminator(BB, true); for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) @@ -170,38 +180,38 @@ static bool MarkAliveBlocks(BasicBlock *BB, return Changed; } -/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even -/// if they are in a dead cycle. Return true if a change was made, false +/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// if they are in a dead cycle. Return true if a change was made, false /// otherwise. static bool RemoveUnreachableBlocksFromFn(Function &F) { SmallPtrSet Reachable; bool Changed = MarkAliveBlocks(F.begin(), Reachable); - + // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) return Changed; - + assert(Reachable.size() < F.size()); NumSimpl += F.size()-Reachable.size(); - + // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references... for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { if (Reachable.count(BB)) continue; - + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(BB); BB->dropAllReferences(); } - + for (Function::iterator I = ++F.begin(); I != F.end();) if (!Reachable.count(I)) I = F.getBasicBlockList().erase(I); else ++I; - + return true; } @@ -209,17 +219,17 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) { /// node) return blocks, merge them together to promote recursive block merging. static bool MergeEmptyReturnBlocks(Function &F) { bool Changed = false; - + BasicBlock *RetBlock = 0; - + // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { BasicBlock &BB = *BBI++; - + // Only look at return blocks. ReturnInst *Ret = dyn_cast(BB.getTerminator()); if (Ret == 0) continue; - + // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. if (Ret != &BB.front()) { @@ -241,21 +251,21 @@ static bool MergeEmptyReturnBlocks(Function &F) { RetBlock = &BB; continue; } - + // Otherwise, we found a duplicate return block. Merge the two. Changed = true; - + // Case when there is no input to the return or when the returned values // agree is trivial. Note that they can't agree if there are phis in the // blocks. if (Ret->getNumOperands() == 0 || - Ret->getOperand(0) == + Ret->getOperand(0) == cast(RetBlock->getTerminator())->getOperand(0)) { BB.replaceAllUsesWith(RetBlock); BB.eraseFromParent(); continue; } - + // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast(RetBlock->begin()); if (RetBlockPHI == 0) { @@ -264,12 +274,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), std::distance(PB, PE), "merge", &RetBlock->front()); - + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } - + // Turn BB into a block that just unconditionally branches to the return // block. This handles the case when the two return blocks have a common // predecessor but that return different things. @@ -277,7 +287,7 @@ static bool MergeEmptyReturnBlocks(Function &F) { BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); } - + return Changed; } @@ -288,7 +298,7 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) { bool LocalChange = true; while (LocalChange) { LocalChange = false; - + // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { @@ -317,7 +327,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { // IterativeSimplifyCFG can (rarely) make some loops dead. If this happens, // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to - // avoid reruning IterativeSimplifyCFG if the second pass of + // avoid reruning IterativeSimplifyCFG if the second pass of // RemoveUnreachableBlocksFromFn doesn't do anything. if (!RemoveUnreachableBlocksFromFn(F)) return true; diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index f7b6941..f110320 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -18,20 +18,20 @@ #define DEBUG_TYPE "simplify-libcalls" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/IRBuilder.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! using namespace llvm; @@ -100,7 +100,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } - + static bool CallHasFloatingPointArgument(const CallInst *CI) { for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); it != e; ++it) { @@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitStrLenMemCpy(Src, Dst, Len, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, Len, B); } - void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD); + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization { // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; } }; @@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt { // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further - EmitStrLenMemCpy(Src, Dst, SrcLen, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, SrcLen, B); } }; @@ -254,9 +255,9 @@ struct StrChrOpt : public LibCallOptimization { return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD); + B, TD, TLI); } - + // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; @@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD); + return EmitStrChr(SrcStr, '\0', B, TD, TLI); return 0; } @@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD); + std::min(Len1, Len2)), B, TD, TLI); } return 0; @@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || + !EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getArgOperand(2), B, TD, TLI)) B.CreateMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; @@ -459,6 +459,51 @@ struct StrCpyOpt : public LibCallOptimization { }; //===---------------------------------------===// +// 'stpcpy' Optimizations + +struct StpCpyOpt: public LibCallOptimization { + bool OptChkCall; // True if it's optimizing a __stpcpy_chk libcall. + + StpCpyOpt(bool c) : OptChkCall(c) {} + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "stpcpy" function prototype. + unsigned NumParams = OptChkCall ? 3 : 2; + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != NumParams || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + // These optimizations require TargetData. + if (!TD) return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + Value *LenV = ConstantInt::get(TD->getIntPtrType(*Context), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(*Context), + Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, + TD, TLI)) + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; + } +}; + +//===---------------------------------------===// // 'strncpy' Optimizations struct StrNCpyOpt : public LibCallOptimization { @@ -565,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization { // strpbrk(s, "a") -> strchr(s, 'a') if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); return 0; } @@ -654,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization { // strcspn(s, "") -> strlen(s) if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD); + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); return 0; } @@ -678,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD); + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { ICmpInst *Old = cast(*UI++); @@ -716,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization { } // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), - ToFindStr[0], B, TD), CI->getType()); + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } return 0; } }; @@ -1135,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1147,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - EmitPutS(GV, B, TD); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), FormatStr.size()+1); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); - if (CI->use_empty()) return CI; + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - EmitPutS(CI->getArgOperand(1), B, TD); - return CI; + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); } return 0; } @@ -1253,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1320,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - EmitFPutC(Char, CI->getArgOperand(3), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } return 0; @@ -1346,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization { // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - return CI; // Known to have no uses (see above). + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); } }; @@ -1373,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return ConstantInt::get(CI->getType(), FormatStr.size()); + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1390,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - return CI; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } return 0; } @@ -1450,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1470,12 +1522,15 @@ namespace { /// class SimplifyLibCalls : public FunctionPass { TargetLibraryInfo *TLI; - + StringMap Optimizations; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; - StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; - StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk; + StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; + StrCpyOpt StrCpy; StrCpyOpt StrCpyChk; + StpCpyOpt StpCpy; StpCpyOpt StpCpyChk; + StrNCpyOpt StrNCpy; + StrLenOpt StrLen; StrPBrkOpt StrPBrk; StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr; MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet; // Math Library Optimizations @@ -1487,11 +1542,12 @@ namespace { SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; PutsOpt Puts; - + bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification - SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) { + SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true), + StpCpy(false), StpCpyChk(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); @@ -1542,6 +1598,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strncmp"] = &StrNCmp; Optimizations["strcpy"] = &StrCpy; Optimizations["strncpy"] = &StrNCpy; + Optimizations["stpcpy"] = &StpCpy; Optimizations["strlen"] = &StrLen; Optimizations["strpbrk"] = &StrPBrk; Optimizations["strtol"] = &StrTo; @@ -1561,6 +1618,7 @@ void SimplifyLibCalls::InitOptimizations() { // _chk variants of String and Memory LibCall Optimizations. Optimizations["__strcpy_chk"] = &StrCpyChk; + Optimizations["__stpcpy_chk"] = &StpCpyChk; // Math Library Optimizations Optimizations["cosf"] = &Cos; @@ -1717,7 +1775,7 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { FunctionType *FTy = F.getFunctionType(); - + StringRef Name = F.getName(); switch (Name[0]) { case 's': @@ -1746,6 +1804,7 @@ void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { Name == "strtold" || Name == "strncat" || Name == "strncpy" || + Name == "stpncpy" || Name == "strtoull") { if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) @@ -2406,10 +2465,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * sqrt(Nroot(x)) -> pow(x,1/(2*N)) // * sqrt(pow(x,y)) -> pow(|x|,y*0.5) // -// stpcpy: -// * stpcpy(str, "literal") -> -// llvm.memcpy(str,"literal",strlen("literal")+1,1) -// // strchr: // * strchr(p, 0) -> strlen(p) // tan, tanf, tanl: diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index ef65c0a..34f1d6c 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -27,6 +27,7 @@ using namespace llvm; STATISTIC(NumSunk, "Number of instructions sunk"); +STATISTIC(NumSinkIter, "Number of sinking iterations"); namespace { class Sinking : public FunctionPass { @@ -39,9 +40,9 @@ namespace { Sinking() : FunctionPass(ID) { initializeSinkingPass(*PassRegistry::getPassRegistry()); } - + virtual bool runOnFunction(Function &F); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); FunctionPass::getAnalysisUsage(AU); @@ -55,9 +56,10 @@ namespace { bool ProcessBlock(BasicBlock &BB); bool SinkInstruction(Instruction *I, SmallPtrSet &Stores); bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const; + bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo) const; }; } // end anonymous namespace - + char Sinking::ID = 0; INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -69,7 +71,7 @@ FunctionPass *llvm::createSinkingPass() { return new Sinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified value /// occur in blocks dominated by the specified block. -bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, +bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const { // Ignoring debug uses is necessary so debug info doesn't affect the code. // This may leave a referencing dbg_value in the original block, before @@ -98,20 +100,19 @@ bool Sinking::runOnFunction(Function &F) { LI = &getAnalysis(); AA = &getAnalysis(); - bool EverMadeChange = false; - - while (1) { - bool MadeChange = false; + bool MadeChange, EverMadeChange = false; + do { + MadeChange = false; + DEBUG(dbgs() << "Sinking iteration " << NumSinkIter << "\n"); // Process all basic blocks. - for (Function::iterator I = F.begin(), E = F.end(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); - - // If this iteration over the code changed anything, keep iterating. - if (!MadeChange) break; - EverMadeChange = true; - } + EverMadeChange |= MadeChange; + NumSinkIter++; + } while (MadeChange); + return EverMadeChange; } @@ -120,8 +121,8 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being - // unprofitable, it can also lead to infinite looping, because in an unreachable - // loop there may be nowhere to stop. + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&BB)) return false; bool MadeChange = false; @@ -133,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { SmallPtrSet Stores; do { Instruction *Inst = I; // The instruction to sink. - + // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == BB.begin(); @@ -145,10 +146,10 @@ bool Sinking::ProcessBlock(BasicBlock &BB) { if (SinkInstruction(Inst, Stores)) ++NumSunk, MadeChange = true; - + // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); - + return MadeChange; } @@ -174,6 +175,45 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA, return true; } +/// IsAcceptableTarget - Return true if it is possible to sink the instruction +/// in the specified basic block. +bool Sinking::IsAcceptableTarget(Instruction *Inst, + BasicBlock *SuccToSinkTo) const { + assert(Inst && "Instruction to be sunk is null"); + assert(SuccToSinkTo && "Candidate sink target is null"); + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (Inst->getParent() == SuccToSinkTo) + return false; + + // If the block has multiple predecessors, this would introduce computation + // on different code paths. We could split the critical edge, but for now we + // just punt. + // FIXME: Split critical edges if not backedges. + if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + if (!isSafeToSpeculativelyExecute(Inst)) + return false; + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!DT->dominates(Inst->getParent(), SuccToSinkTo)) + return false; + + // Don't sink instructions into a loop. + Loop *succ = LI->getLoopFor(SuccToSinkTo); + Loop *cur = LI->getLoopFor(Inst->getParent()); + if (succ != 0 && succ != cur) + return false; + } + + // Finally, check that all the uses of the instruction are actually + // dominated by the candidate + return AllUsesDominatedByBlock(Inst, SuccToSinkTo); +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool Sinking::SinkInstruction(Instruction *Inst, @@ -181,7 +221,7 @@ bool Sinking::SinkInstruction(Instruction *Inst, // Check if it's safe to move the instruction. if (!isSafeToMove(Inst, AA, Stores)) return false; - + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to @@ -189,86 +229,42 @@ bool Sinking::SinkInstruction(Instruction *Inst, // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - - // Loop over all the operands of the specified instruction. If there is - // anything we can't handle, bail out. - BasicBlock *ParentBlock = Inst->getParent(); - + // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. BasicBlock *SuccToSinkTo = 0; - - // FIXME: This picks a successor to sink into based on having one - // successor that dominates all the uses. However, there are cases where - // sinking can happen but where the sink point isn't a successor. For - // example: - // x = computation - // if () {} else {} - // use x - // the instruction could be sunk over the whole diamond for the - // if/then/else (or loop, etc), allowing it to be sunk into other blocks - // after that. - + // Instructions can only be sunk if all their uses are in blocks // dominated by one of the successors. - // Look at all the successors and decide which one - // we should sink to. - for (succ_iterator SI = succ_begin(ParentBlock), - E = succ_end(ParentBlock); SI != E; ++SI) { - if (AllUsesDominatedByBlock(Inst, *SI)) { - SuccToSinkTo = *SI; - break; - } + // Look at all the postdominators and see if we can sink it in one. + DomTreeNode *DTN = DT->getNode(Inst->getParent()); + for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); + I != E && SuccToSinkTo == 0; ++I) { + BasicBlock *Candidate = (*I)->getBlock(); + if ((*I)->getIDom()->getBlock() == Inst->getParent() && + IsAcceptableTarget(Inst, Candidate)) + SuccToSinkTo = Candidate; + } + + // If no suitable postdominator was found, look at all the successors and + // decide which one we should sink to, if any. + for (succ_iterator I = succ_begin(Inst->getParent()), + E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) { + if (IsAcceptableTarget(Inst, *I)) + SuccToSinkTo = *I; } - + // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; - - // It is not possible to sink an instruction into its own block. This can - // happen with loops. - if (Inst->getParent() == SuccToSinkTo) - return false; - - DEBUG(dbgs() << "Sink instr " << *Inst); - DEBUG(dbgs() << "to block "; - WriteAsOperand(dbgs(), SuccToSinkTo, false)); - - // If the block has multiple predecessors, this would introduce computation on - // a path that it doesn't already exist. We could split the critical edge, - // but for now we just punt. - // FIXME: Split critical edges if not backedges. - if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { - // We cannot sink a load across a critical edge - there may be stores in - // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) { - DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); - return false; - } - // We don't want to sink across a critical edge if we don't dominate the - // successor. We could be introducing calculations to new code paths. - if (!DT->dominates(ParentBlock, SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Critical edge found\n"); - return false; - } - - // Don't sink instructions into a loop. - if (LI->isLoopHeader(SuccToSinkTo)) { - DEBUG(dbgs() << " *** PUNTING: Loop header found\n"); - return false; - } + DEBUG(dbgs() << "Sink" << *Inst << " ("; + WriteAsOperand(dbgs(), Inst->getParent(), false); + dbgs() << " -> "; + WriteAsOperand(dbgs(), SuccToSinkTo, false); + dbgs() << ")\n"); - // Otherwise we are OK with sinking along a critical edge. - DEBUG(dbgs() << "Sinking along critical edge.\n"); - } - - // Determine where to insert into. Skip phi nodes. - BasicBlock::iterator InsertPos = SuccToSinkTo->begin(); - while (InsertPos != SuccToSinkTo->end() && isa(InsertPos)) - ++InsertPos; - // Move the instruction. - Inst->moveBefore(InsertPos); + Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt()); return true; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index e21eb9d..6557d63 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -172,7 +172,7 @@ bool TailCallElim::runOnFunction(Function &F) { FunctionContainsEscapingAllocas |= CheckForEscapingAllocas(BB, CannotTCETailMarkedCall); } - + /// FIXME: The code generator produces really bad code when an 'escaping /// alloca' is changed from being a static alloca to being a dynamic alloca. /// Until this is resolved, disable this transformation if that would ever @@ -234,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) { // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. return false; - + if (LoadInst *L = dyn_cast(I)) { // Loads may always be moved above calls without side effects. if (CI->mayHaveSideEffects()) { @@ -364,7 +364,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, if (&BB->front() == TI) // Make sure there is something before the terminator. return 0; - + // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. CallInst *CI = 0; @@ -388,10 +388,10 @@ TailCallElim::FindTRECandidate(Instruction *TI, // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call // and disable this xform in this case, because the code generator will // lower the call to fabs into inline code. - if (BB == &F->getEntryBlock() && + if (BB == &F->getEntryBlock() && FirstNonDbg(BB->front()) == CI && FirstNonDbg(llvm::next(BB->begin())) == TI && - callIsSmall(F)) { + callIsSmall(CI)) { // A single-block function with just a call and a return. Check that // the arguments match. CallSite::arg_iterator I = CallSite(CI).arg_begin(), @@ -432,7 +432,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock::iterator BBI = CI; for (++BBI; &*BBI != Ret; ++BBI) { if (CanMoveAboveCall(BBI, CI)) continue; - + // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 3859a1a..2679b93 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } // Update any PHI nodes in the returning block to realize that we no // longer branch to them. @@ -671,12 +687,3 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast(NewRet); } -/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a -/// given basic block. -DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { - if (const Instruction *I = BB->getFirstNonPHI()) - return I->getDebugLoc(); - // Scanning entire block may be too expensive, if the first instruction - // does not have valid location info. - return DebugLoc(); -} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 2a8e9b8..6b04e3d 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -122,7 +122,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB /// is the new loop exit block, and DestBB is the old loop exit, now the /// successor of SplitBB. -static void createPHIsForSplitLoopExit(SmallVectorImpl &Preds, +static void createPHIsForSplitLoopExit(ArrayRef Preds, BasicBlock *SplitBB, BasicBlock *DestBB) { // SplitBB shouldn't have anything non-trivial in it yet. @@ -341,11 +341,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, "Split point for loop exit is contained in loop!"); // Update LCSSA form in the newly created exit block. - if (P->mustPreserveAnalysisID(LCSSAID)) { - SmallVector OrigPred; - OrigPred.push_back(TIBB); - createPHIsForSplitLoopExit(OrigPred, NewBB, DestBB); - } + if (P->mustPreserveAnalysisID(LCSSAID)) + createPHIsForSplitLoopExit(TIBB, NewBB, DestBB); // For each unique exit block... // FIXME: This code is functionally equivalent to the corresponding diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index a808303..e13fd71 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -12,18 +12,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/Intrinsics.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" +#include "llvm/Type.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/LLVMContext.h" -#include "llvm/Intrinsics.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -42,7 +46,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), + Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -53,18 +57,48 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { return CI; } +/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the +/// specified pointer. Ptr is required to be some pointer type, MaxLen must +/// be of size_t type, and the return value has 'intptr_t' type. +Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strnlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + if (const Function *F = dyn_cast(StrNLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1), + Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -84,7 +122,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3), + Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -101,13 +139,17 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, StringRef Name) { + const TargetData *TD, const TargetLibraryInfo *TLI, + StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), + Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -119,13 +161,17 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, StringRef Name) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2), + Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -139,13 +185,17 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(&AWI, 1), + AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -162,12 +212,16 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), + Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -192,7 +250,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), + Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty(), NULL); @@ -254,33 +316,40 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), + Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast(PutS->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); - + return CI; } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. -void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD) { +Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. -void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { +Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -309,7 +382,7 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -337,7 +414,7 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, if (const Function *Fn = dyn_cast(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI) { // We really need TargetData for later. if (!TD) return false; @@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // string lengths for varying. if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - Name.substr(2, 6)); + TLI, Name.substr(2, 6)); + if (!Ret) + return false; replaceCall(Ret); return true; } @@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, Name.substr(2, 7)); + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + if (!Ret) + return false; replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7f5cb5e..4ff31ca 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -29,3 +29,5 @@ add_llvm_library(LLVMTransformUtils Utils.cpp ValueMapper.cpp ) + +add_dependencies(LLVMTransformUtils intrinsics_gen) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 20052a4..99237b8 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -28,7 +29,6 @@ #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" #include using namespace llvm; diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index a0e027b..1dac6b5 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -53,7 +53,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { I->isConstant(), I->getLinkage(), (Constant*) 0, I->getName(), (GlobalVariable*) 0, - I->isThreadLocal(), + I->getThreadLocalMode(), I->getType()->getAddressSpace()); GV->copyAttributesFrom(I); VMap[I] = GV; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e8c0b80..c545cd6 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" @@ -23,6 +23,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/CommandLine.h" @@ -43,61 +45,139 @@ static cl::opt AggregateArgsOpt("aggregate-extracted-args", cl::Hidden, cl::desc("Aggregate arguments to code-extracted functions")); -namespace { - class CodeExtractor { - typedef SetVector Values; - SetVector BlocksToExtract; - DominatorTree* DT; - bool AggregateArgs; - unsigned NumExitBlocks; - Type *RetTy; - public: - CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false) - : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {} - - Function *ExtractCodeRegion(ArrayRef code); - - bool isEligible(ArrayRef code); - - private: - /// definedInRegion - Return true if the specified value is defined in the - /// extracted region. - bool definedInRegion(Value *V) const { - if (Instruction *I = dyn_cast(V)) - if (BlocksToExtract.count(I->getParent())) - return true; - return false; - } +/// \brief Test whether a block is valid for extraction. +static bool isBlockValidForExtraction(const BasicBlock &BB) { + // Landing pads must be in the function where they were inserted for cleanup. + if (BB.isLandingPad()) + return false; - /// definedInCaller - Return true if the specified value is defined in the - /// function being code extracted, but not in the region being extracted. - /// These values must be passed in as live-ins to the function. - bool definedInCaller(Value *V) const { - if (isa(V)) return true; - if (Instruction *I = dyn_cast(V)) - if (!BlocksToExtract.count(I->getParent())) - return true; + // Don't hoist code containing allocas, invokes, or vastarts. + for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { + if (isa(I) || isa(I)) return false; + if (const CallInst *CI = dyn_cast(I)) + if (const Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::vastart) + return false; + } + + return true; +} + +/// \brief Build a set of blocks to extract if the input blocks are viable. +template +static SetVector buildExtractionBlockSet(IteratorT BBBegin, + IteratorT BBEnd) { + SetVector Result; + + assert(BBBegin != BBEnd); + + // Loop over the blocks, adding them to our set-vector, and aborting with an + // empty set if we encounter invalid blocks. + for (IteratorT I = BBBegin, E = BBEnd; I != E; ++I) { + if (!Result.insert(*I)) + llvm_unreachable("Repeated basic blocks in extraction input"); + + if (!isBlockValidForExtraction(**I)) { + Result.clear(); + return Result; } + } + +#ifndef NDEBUG + for (SetVector::iterator I = llvm::next(Result.begin()), + E = Result.end(); + I != E; ++I) + for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); + PI != PE; ++PI) + assert(Result.count(*PI) && + "No blocks in this region may have entries from outside the region" + " except for the first block!"); +#endif + + return Result; +} + +/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. +static SetVector +buildExtractionBlockSet(ArrayRef BBs) { + return buildExtractionBlockSet(BBs.begin(), BBs.end()); +} + +/// \brief Helper to call buildExtractionBlockSet with a RegionNode. +static SetVector +buildExtractionBlockSet(const RegionNode &RN) { + if (!RN.isSubRegion()) + // Just a single BasicBlock. + return buildExtractionBlockSet(RN.getNodeAs()); - void severSplitPHINodes(BasicBlock *&Header); - void splitReturnBlocks(); - void findInputsOutputs(Values &inputs, Values &outputs); + const Region &R = *RN.getNodeAs(); - Function *constructFunction(const Values &inputs, - const Values &outputs, - BasicBlock *header, - BasicBlock *newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M); + return buildExtractionBlockSet(R.block_begin(), R.block_end()); +} - void moveCodeToFunction(Function *newFunction); +CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) + : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, + bool AggregateArgs) + : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs) + : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {} + +CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, + bool AggregateArgs) + : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} + +/// definedInRegion - Return true if the specified value is defined in the +/// extracted region. +static bool definedInRegion(const SetVector &Blocks, Value *V) { + if (Instruction *I = dyn_cast(V)) + if (Blocks.count(I->getParent())) + return true; + return false; +} - void emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *newHeader, - Values &inputs, - Values &outputs); +/// definedInCaller - Return true if the specified value is defined in the +/// function being code extracted, but not in the region being extracted. +/// These values must be passed in as live-ins to the function. +static bool definedInCaller(const SetVector &Blocks, Value *V) { + if (isa(V)) return true; + if (Instruction *I = dyn_cast(V)) + if (!Blocks.count(I->getParent())) + return true; + return false; +} - }; +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, + ValueSet &Outputs) const { + for (SetVector::const_iterator I = Blocks.begin(), + E = Blocks.end(); + I != E; ++I) { + BasicBlock *BB = *I; + + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); + II != IE; ++II) { + for (User::op_iterator OI = II->op_begin(), OE = II->op_end(); + OI != OE; ++OI) + if (definedInCaller(Blocks, *OI)) + Inputs.insert(*OI); + + for (Value::use_iterator UI = II->use_begin(), UE = II->use_end(); + UI != UE; ++UI) + if (!definedInRegion(Blocks, *UI)) { + Outputs.insert(II); + break; + } + } + } } /// severSplitPHINodes - If a PHI node has multiple inputs from outside of the @@ -115,7 +195,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // than one entry from outside the region. If so, we need to sever the // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (BlocksToExtract.count(PN->getIncomingBlock(i))) + if (Blocks.count(PN->getIncomingBlock(i))) ++NumPredsFromRegion; else ++NumPredsOutsideRegion; @@ -136,8 +216,8 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // We only want to code extract the second block now, and it becomes the new // header of the region. BasicBlock *OldPred = Header; - BlocksToExtract.remove(OldPred); - BlocksToExtract.insert(NewBB); + Blocks.remove(OldPred); + Blocks.insert(NewBB); Header = NewBB; // Okay, update dominator sets. The blocks that dominate the new one are the @@ -152,7 +232,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + if (Blocks.count(PN->getIncomingBlock(i))) { TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); TI->replaceUsesOfWith(OldPred, NewBB); } @@ -170,7 +250,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Loop over all of the incoming value in PN, moving them to NewPN if they // are from the extracted region. for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + if (Blocks.count(PN->getIncomingBlock(i))) { NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); PN->removeIncomingValue(i); --i; @@ -181,8 +261,8 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { } void CodeExtractor::splitReturnBlocks() { - for (SetVector::iterator I = BlocksToExtract.begin(), - E = BlocksToExtract.end(); I != E; ++I) + for (SetVector::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) { BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); if (DT) { @@ -203,45 +283,11 @@ void CodeExtractor::splitReturnBlocks() { } } -// findInputsOutputs - Find inputs to, outputs from the code region. -// -void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) { - std::set ExitBlocks; - for (SetVector::const_iterator ci = BlocksToExtract.begin(), - ce = BlocksToExtract.end(); ci != ce; ++ci) { - BasicBlock *BB = *ci; - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // If a used value is defined outside the region, it's an input. If an - // instruction is used outside the region, it's an output. - for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O) - if (definedInCaller(*O)) - inputs.insert(*O); - - // Consider uses of this instruction (outputs). - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - if (!definedInRegion(*UI)) { - outputs.insert(I); - break; - } - } // for: insts - - // Keep track of the exit blocks from the region. - TerminatorInst *TI = BB->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (!BlocksToExtract.count(TI->getSuccessor(i))) - ExitBlocks.insert(TI->getSuccessor(i)); - } // for: basic blocks - - NumExitBlocks = ExitBlocks.size(); -} - /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) /// -Function *CodeExtractor::constructFunction(const Values &inputs, - const Values &outputs, +Function *CodeExtractor::constructFunction(const ValueSet &inputs, + const ValueSet &outputs, BasicBlock *header, BasicBlock *newRootNode, BasicBlock *newHeader, @@ -261,15 +307,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs, std::vector paramTy; // Add the types of the input values to the function's argument list - for (Values::const_iterator i = inputs.begin(), - e = inputs.end(); i != e; ++i) { + for (ValueSet::const_iterator i = inputs.begin(), e = inputs.end(); + i != e; ++i) { const Value *value = *i; DEBUG(dbgs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. - for (Values::const_iterator I = outputs.begin(), E = outputs.end(); + for (ValueSet::const_iterator I = outputs.begin(), E = outputs.end(); I != E; ++I) { DEBUG(dbgs() << "instr used in func: " << **I << "\n"); if (AggregateArgs) @@ -326,7 +372,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, for (std::vector::iterator use = Users.begin(), useE = Users.end(); use != useE; ++use) if (Instruction* inst = dyn_cast(*use)) - if (BlocksToExtract.count(inst->getParent())) + if (Blocks.count(inst->getParent())) inst->replaceUsesOfWith(inputs[i], RewriteVal); } @@ -347,7 +393,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block if (TerminatorInst *TI = dyn_cast(Users[i])) - if (!BlocksToExtract.count(TI->getParent()) && + if (!Blocks.count(TI->getParent()) && TI->getParent()->getParent() == oldFunction) TI->replaceUsesOfWith(header, newHeader); @@ -373,7 +419,7 @@ static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { /// necessary. void CodeExtractor:: emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, - Values &inputs, Values &outputs) { + ValueSet &inputs, ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs std::vector params, StructValues, ReloadOutputs, Reloads; @@ -381,14 +427,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, LLVMContext &Context = newFunction->getContext(); // Add inputs as params, or to be filled into the struct - for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) + for (ValueSet::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) if (AggregateArgs) StructValues.push_back(*i); else params.push_back(*i); // Create allocas for the outputs - for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { + for (ValueSet::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) { if (AggregateArgs) { StructValues.push_back(*i); } else { @@ -403,7 +449,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, AllocaInst *Struct = 0; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector ArgTypes; - for (Values::iterator v = StructValues.begin(), + for (ValueSet::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -458,7 +504,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, std::vector Users(outputs[i]->use_begin(), outputs[i]->use_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast(Users[u]); - if (!BlocksToExtract.count(inst->getParent())) + if (!Blocks.count(inst->getParent())) inst->replaceUsesOfWith(outputs[i], load); } } @@ -476,11 +522,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, std::map ExitBlockMap; unsigned switchVal = 0; - for (SetVector::const_iterator i = BlocksToExtract.begin(), - e = BlocksToExtract.end(); i != e; ++i) { + for (SetVector::const_iterator i = Blocks.begin(), + e = Blocks.end(); i != e; ++i) { TerminatorInst *TI = (*i)->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (!BlocksToExtract.count(TI->getSuccessor(i))) { + if (!Blocks.count(TI->getSuccessor(i))) { BasicBlock *OldTarget = TI->getSuccessor(i); // add a new basic block which returns the appropriate value BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; @@ -618,18 +664,19 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); + SwitchInst::CaseIt ToBeRemoved(TheSwitch, NumExitBlocks-1); + TheSwitch->removeCase(ToBeRemoved); break; } } void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = (*BlocksToExtract.begin())->getParent(); + Function *oldFunc = (*Blocks.begin())->getParent(); Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - for (SetVector::const_iterator i = BlocksToExtract.begin(), - e = BlocksToExtract.end(); i != e; ++i) { + for (SetVector::const_iterator i = Blocks.begin(), + e = Blocks.end(); i != e; ++i) { // Delete the basic block from the old function, and the list of blocks oldBlocks.remove(*i); @@ -638,47 +685,15 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { } } -/// ExtractRegion - Removes a loop from a function, replaces it with a call to -/// new function. Returns pointer to the new function. -/// -/// algorithm: -/// -/// find inputs and outputs for the region -/// -/// for inputs: add to function as args, map input instr* to arg# -/// for outputs: add allocas for scalars, -/// add to func as args, map output instr* to arg# -/// -/// rewrite func to use argument #s instead of instr* -/// -/// for each scalar output in the function: at every exit, store intermediate -/// computed result back into memory. -/// -Function *CodeExtractor:: -ExtractCodeRegion(ArrayRef code) { - if (!isEligible(code)) +Function *CodeExtractor::extractCodeRegion() { + if (!isEligible()) return 0; - // 1) Find inputs, outputs - // 2) Construct new function - // * Add allocas for defs, pass as args by reference - // * Pass in uses as args - // 3) Move code region, add call instr to func - // - BlocksToExtract.insert(code.begin(), code.end()); - - Values inputs, outputs; + ValueSet inputs, outputs; // Assumption: this is a single-entry code region, and the header is the first // block in the region. - BasicBlock *header = code[0]; - - for (unsigned i = 1, e = code.size(); i != e; ++i) - for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]); - PI != E; ++PI) - assert(BlocksToExtract.count(*PI) && - "No blocks in this region may have entries from outside the region" - " except for the first block!"); + BasicBlock *header = *Blocks.begin(); // If we have to split PHI nodes or the entry block, do so now. severSplitPHINodes(header); @@ -703,6 +718,14 @@ ExtractCodeRegion(ArrayRef code) { // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs); + SmallPtrSet ExitBlocks; + for (SetVector::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + if (!Blocks.count(*SI)) + ExitBlocks.insert(*SI); + NumExitBlocks = ExitBlocks.size(); + // Construct new function based on inputs/outputs & add allocas for all defs. Function *newFunction = constructFunction(inputs, outputs, header, newFuncRoot, @@ -718,7 +741,7 @@ ExtractCodeRegion(ArrayRef code) { for (BasicBlock::iterator I = header->begin(); isa(I); ++I) { PHINode *PN = cast(I); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (!BlocksToExtract.count(PN->getIncomingBlock(i))) + if (!Blocks.count(PN->getIncomingBlock(i))) PN->setIncomingBlock(i, newFuncRoot); } @@ -732,7 +755,7 @@ ExtractCodeRegion(ArrayRef code) { PHINode *PN = cast(I); std::set ProcessedPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (BlocksToExtract.count(PN->getIncomingBlock(i))) { + if (Blocks.count(PN->getIncomingBlock(i))) { if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) PN->setIncomingBlock(i, codeReplacer); else { @@ -754,44 +777,3 @@ ExtractCodeRegion(ArrayRef code) { report_fatal_error("verifyFunction failed!")); return newFunction; } - -bool CodeExtractor::isEligible(ArrayRef code) { - // Deny a single basic block that's a landing pad block. - if (code.size() == 1 && code[0]->isLandingPad()) - return false; - - // Deny code region if it contains allocas or vastarts. - for (ArrayRef::iterator BB = code.begin(), e=code.end(); - BB != e; ++BB) - for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end(); - I != Ie; ++I) - if (isa(*I)) - return false; - else if (const CallInst *CI = dyn_cast(I)) - if (const Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::vastart) - return false; - return true; -} - - -/// ExtractCodeRegion - Slurp a sequence of basic blocks into a brand new -/// function. -/// -Function* llvm::ExtractCodeRegion(DominatorTree &DT, - ArrayRef code, - bool AggregateArgs) { - return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code); -} - -/// ExtractLoop - Slurp a natural loop into a brand new function. -/// -Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) { - return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks()); -} - -/// ExtractBasicBlock - Slurp a basic block into a brand new function. -/// -Function* llvm::ExtractBasicBlock(ArrayRef BBs, bool AggregateArgs){ - return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(BBs); -} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index d2b167a..89e89e7 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,22 +13,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Attributes.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" -#include "llvm/Attributes.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, @@ -43,10 +43,10 @@ bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, namespace { /// A class for recording information about inlining through an invoke. class InvokeInliningInfo { - BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind. - BasicBlock *InnerResumeDest; //< Destination for the callee's resume. - LandingPadInst *CallerLPad; //< LandingPadInst associated with the invoke. - PHINode *InnerEHValuesPHI; //< PHI for EH values from landingpad insts. + BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. + BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. + LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. + PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. SmallVector UnwindDestPHIValues; public: diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d1c4d59..bed7d72 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -14,31 +14,31 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Intrinsics.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -169,16 +169,21 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. SwitchInst::CaseIt FirstCase = SI->case_begin(); - Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - FirstCase.getCaseValue(), "cond"); - - // Insert the new branch. - Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); - - // Delete the old switch. - SI->eraseFromParent(); - return true; + IntegersSubset& Case = FirstCase.getCaseValueEx(); + if (Case.isSingleNumber()) { + // FIXME: Currently work with ConstantInt based numbers. + Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), + Case.getSingleNumber(0).toConstantInt(), + "cond"); + + // Insert the new branch. + Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + + // Delete the old switch. + SI->eraseFromParent(); + return true; + } } return false; } @@ -260,7 +265,7 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa(II->getArgOperand(1)); } - if (extractMallocCall(I)) return true; + if (isAllocLikeFn(I)) return true; if (CallInst *CI = isFreeCall(I)) if (Constant *C = dyn_cast(CI->getArgOperand(0))) @@ -700,7 +705,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { CollisionMap[PN] = Old; break; } - // Procede to the next PHI in the list. + // Proceed to the next PHI in the list. OtherPN = I->second; } } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index e15497a..2023750 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -95,9 +95,11 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // Erase basic block from the function... // ScalarEvolution holds references to loop exit blocks. - if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) { - if (Loop *L = LI->getLoopFor(BB)) - SE->forgetLoop(L); + if (LPM) { + if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable()) { + if (Loop *L = LI->getLoopFor(BB)) + SE->forgetLoop(L); + } } LI->removeBlock(BB); BB->eraseFromParent(); @@ -204,9 +206,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); - if (SE) - SE->forgetLoop(L); + if (LPM) { + ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); + if (SE) + SE->forgetLoop(L); + } // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; @@ -405,24 +409,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } } - // FIXME: Reconstruct dom info, because it is not preserved properly. - // Incrementally updating domtree after loop unrolling would be easy. - if (DominatorTree *DT = LPM->getAnalysisIfAvailable()) - DT->runOnFunction(*L->getHeader()->getParent()); - - // Simplify any new induction variables in the partially unrolled loop. - if (SE && !CompletelyUnroll) { - SmallVector DeadInsts; - simplifyLoopIVs(L, SE, LPM, DeadInsts); - - // Aggressively clean up dead instructions that simplifyLoopIVs already - // identified. Any remaining should be cleaned up below. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); + if (LPM) { + // FIXME: Reconstruct dom info, because it is not preserved properly. + // Incrementally updating domtree after loop unrolling would be easy. + if (DominatorTree *DT = LPM->getAnalysisIfAvailable()) + DT->runOnFunction(*L->getHeader()->getParent()); + + // Simplify any new induction variables in the partially unrolled loop. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); + if (SE && !CompletelyUnroll) { + SmallVector DeadInsts; + simplifyLoopIVs(L, SE, LPM, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } } - // At this point, the code is well formed. We now do a quick sweep over the // inserted code, doing constant propagation and dead code elimination as we // go. diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 3aa6bef..67e17f4 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -131,7 +131,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, /// There are two value maps that are defined and used. VMap is /// for the values in the current loop instance. LVMap contains /// the values from the last loop instance. We need the LVMap values -/// to update the inital values for the current loop instance. +/// to update the initial values for the current loop instance. /// static void CloneLoopBlocks(Loop *L, bool FirstCopy, @@ -237,6 +237,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Use Scalar Evolution to compute the trip count. This allows more // loops to be unrolled than relying on induction var simplification + if (!LPM) + return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); if (SE == 0) return false; diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index c70ced1..02bdcda 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -12,18 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/Function.h" -#include "llvm/BasicBlock.h" -#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" #include using namespace llvm; @@ -70,24 +71,18 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { if (!ExpectedValue) return false; - LLVMContext &Context = CI->getContext(); - Type *Int32Ty = Type::getInt32Ty(Context); - SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); - std::vector Vec; - unsigned n = SI->getNumCases(); - Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case - - Vec[0] = MDString::get(Context, "branch_weights"); - Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ? - LikelyBranchWeight : UnlikelyBranchWeight); - for (unsigned i = 0; i < n; ++i) { - Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ? - LikelyBranchWeight : UnlikelyBranchWeight); - } + unsigned n = SI->getNumCases(); // +1 for default case. + std::vector Weights(n + 1); - MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); - SI->setMetadata(LLVMContext::MD_prof, WeightsNode); + Weights[0] = Case == SI->case_default() ? LikelyBranchWeight + : UnlikelyBranchWeight; + for (unsigned i = 0; i != n; ++i) + Weights[i + 1] = i == Case.getCaseIndex() ? LikelyBranchWeight + : UnlikelyBranchWeight; + + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(CI->getContext()).createBranchWeights(Weights)); SI->setCondition(ArgValue); return true; @@ -120,20 +115,17 @@ bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { if (!ExpectedValue) return false; - LLVMContext &Context = CI->getContext(); - Type *Int32Ty = Type::getInt32Ty(Context); - bool Likely = ExpectedValue->isOne(); + MDBuilder MDB(CI->getContext()); + MDNode *Node; // If expect value is equal to 1 it means that we are more likely to take // branch 0, in other case more likely is branch 1. - Value *Ops[] = { - MDString::get(Context, "branch_weights"), - ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight), - ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight) - }; + if (ExpectedValue->isOne()) + Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); + else + Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); - MDNode *WeightsNode = MDNode::get(Context, Ops); - BI->setMetadata(LLVMContext::MD_prof, WeightsNode); + BI->setMetadata(LLVMContext::MD_prof, Node); CmpI->setOperand(0, ArgValue); return true; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index a16130d..1547439 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -66,18 +66,6 @@ namespace { BasicBlock* OrigBlock, BasicBlock* Default); unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); }; - - /// The comparison function for sorting the switch case values in the vector. - /// WARNING: Case ranges should be disjoint! - struct CaseCmp { - bool operator () (const LowerSwitch::CaseRange& C1, - const LowerSwitch::CaseRange& C2) { - - const ConstantInt* CI1 = cast(C1.Low); - const ConstantInt* CI2 = cast(C2.High); - return CI1->getValue().slt(CI2->getValue()); - } - }; } char LowerSwitch::ID = 0; @@ -159,7 +147,7 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_ULT, Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); @@ -234,40 +222,34 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, // Clusterify - Transform simple list of Cases into list of CaseRange's unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { - unsigned numCmps = 0; + + IntegersSubsetToBB TheClusterifier; // Start with "simple" cases - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) - Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), - i.getCaseSuccessor())); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + BasicBlock *SuccBB = i.getCaseSuccessor(); + IntegersSubset CaseRanges = i.getCaseValueEx(); + TheClusterifier.add(CaseRanges, SuccBB); + } - std::sort(Cases.begin(), Cases.end(), CaseCmp()); - - // Merge case into clusters - if (Cases.size()>=2) - for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { - int64_t nextValue = cast(J->Low)->getSExtValue(); - int64_t currentValue = cast(I->High)->getSExtValue(); - BasicBlock* nextBB = J->BB; - BasicBlock* currentBB = I->BB; - - // If the two neighboring cases go to the same destination, merge them - // into a single case. - if ((nextValue-currentValue==1) && (currentBB == nextBB)) { - I->High = J->High; - J = Cases.erase(J); - } else { - I = J++; - } - } - - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) + TheClusterifier.optimize(); + + size_t numCmps = 0; + for (IntegersSubsetToBB::RangeIterator i = TheClusterifier.begin(), + e = TheClusterifier.end(); i != e; ++i, ++numCmps) { + IntegersSubsetToBB::Cluster &C = *i; + + // FIXME: Currently work with ConstantInt based numbers. + // Changing it to APInt based is a pretty heavy for this commit. + Cases.push_back(CaseRange(C.first.getLow().toConstantInt(), + C.first.getHigh().toConstantInt(), C.second)); + if (C.first.isSingleNumber()) // A range counts double, since it requires two compares. ++numCmps; } - return numCmps; + return numCmps; } // processSwitchInst - Replace the specified switch instruction with a sequence diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 8491c55..dbcf3b2 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -14,8 +14,8 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Module.h" -#include "llvm/Support/IRBuilder.h" using namespace llvm; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 2357d81..dd5e20e 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -28,14 +28,14 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index e60a41b..e568a61 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -190,8 +190,11 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return V; } - // Set DebugLoc. - InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // Set the DebugLoc of the inserted PHI, if available. + DebugLoc DL; + if (const Instruction *I = BB->getFirstNonPHI()) + DL = I->getDebugLoc(); + InsertedPHI->setDebugLoc(DL); // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -211,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) { else V = GetValueInMiddleOfBlock(User->getParent()); + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + U.set(V); } @@ -230,28 +238,6 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) { U.set(V); } -/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator -/// in the SSAUpdaterImpl template. -namespace { - class PHIiter { - private: - PHINode *PHI; - unsigned idx; - - public: - explicit PHIiter(PHINode *P) // begin iterator - : PHI(P), idx(0) {} - PHIiter(PHINode *P, bool) // end iterator - : PHI(P), idx(PHI->getNumIncomingValues()) {} - - PHIiter &operator++() { ++idx; return *this; } - bool operator==(const PHIiter& x) const { return idx == x.idx; } - bool operator!=(const PHIiter& x) const { return !operator==(x); } - Value *getIncomingValue() { return PHI->getIncomingValue(idx); } - BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } - }; -} - /// SSAUpdaterTraits - Traits for the SSAUpdaterImpl template, /// specialized for SSAUpdater. namespace llvm { @@ -266,9 +252,26 @@ public: static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } - typedef PHIiter PHI_iterator; - static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } - static inline PHI_iterator PHI_end(PhiT *PHI) { + class PHI_iterator { + private: + PHINode *PHI; + unsigned idx; + + public: + explicit PHI_iterator(PHINode *P) // begin iterator + : PHI(P), idx(0) {} + PHI_iterator(PHINode *P, bool) // end iterator + : PHI(P), idx(PHI->getNumIncomingValues()) {} + + PHI_iterator &operator++() { ++idx; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + Value *getIncomingValue() { return PHI->getIncomingValue(idx); } + BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); } + }; + + static PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static PHI_iterator PHI_end(PhiT *PHI) { return PHI_iterator(PHI, true); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 66dd2c9..518df7c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -16,29 +16,30 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" #include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Type.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRBuilder.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include #include @@ -55,12 +56,26 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { + /// ValueEqualityComparisonCase - Represents a case of a switch. + struct ValueEqualityComparisonCase { + ConstantInt *Value; + BasicBlock *Dest; + + ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) + : Value(Value), Dest(Dest) {} + + bool operator<(ValueEqualityComparisonCase RHS) const { + // Comparing pointers is ok as we only rely on the order for uniquing. + return Value < RHS.Value; + } + }; + class SimplifyCFGOpt { const TargetData *const TD; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector > &Cases); + std::vector &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, BasicBlock *Pred, IRBuilder<> &Builder); @@ -107,6 +122,47 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { return true; } +/// isProfitableToFoldUnconditional - Return true if it is safe and profitable +/// to merge these two terminator instructions together, where SI1 is an +/// unconditional branch. PhiNodes will store all PHI nodes in common +/// successors. +/// +static bool isProfitableToFoldUnconditional(BranchInst *SI1, + BranchInst *SI2, + Instruction *Cond, + SmallVectorImpl &PhiNodes) { + if (SI1 == SI2) return false; // Can't merge with self! + assert(SI1->isUnconditional() && SI2->isConditional()); + + // We fold the unconditional branch if we can easily update all PHI nodes in + // common successors: + // 1> We have a constant incoming value for the conditional branch; + // 2> We have "Cond" as the incoming value for the unconditional branch; + // 3> SI2->getCondition() and Cond have same operands. + CmpInst *Ci2 = dyn_cast(SI2->getCondition()); + if (!Ci2) return false; + if (!(Cond->getOperand(0) == Ci2->getOperand(0) && + Cond->getOperand(1) == Ci2->getOperand(1)) && + !(Cond->getOperand(0) == Ci2->getOperand(1) && + Cond->getOperand(1) == Ci2->getOperand(0))) + return false; + + BasicBlock *SI1BB = SI1->getParent(); + BasicBlock *SI2BB = SI2->getParent(); + SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) + if (SI1Succs.count(*I)) + for (BasicBlock::iterator BBI = (*I)->begin(); + isa(BBI); ++BBI) { + PHINode *PN = cast(BBI); + if (PN->getIncomingValueForBlock(SI1BB) != Cond || + !isa(PN->getIncomingValueForBlock(SI2BB))) + return false; + PhiNodes.push_back(PN); + } + return true; +} + /// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will /// now be entries in it from the 'NewPred' block. The values that will be /// flowing into the PHI nodes will be the same as those coming in from @@ -476,21 +532,22 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { /// decode all of the 'cases' that it represents and return the 'default' block. BasicBlock *SimplifyCFGOpt:: GetValueEqualityComparisonCases(TerminatorInst *TI, - std::vector > &Cases) { + std::vector + &Cases) { if (SwitchInst *SI = dyn_cast(TI)) { Cases.reserve(SI->getNumCases()); for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) - Cases.push_back(std::make_pair(i.getCaseValue(), - i.getCaseSuccessor())); + Cases.push_back(ValueEqualityComparisonCase(i.getCaseValue(), + i.getCaseSuccessor())); return SI->getDefaultDest(); } BranchInst *BI = cast(TI); ICmpInst *ICI = cast(BI->getCondition()); - Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD), - BI->getSuccessor(ICI->getPredicate() == - ICmpInst::ICMP_NE))); + BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); + Cases.push_back(ValueEqualityComparisonCase(GetConstantInt(ICI->getOperand(1), + TD), + Succ)); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); } @@ -498,9 +555,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, /// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, - std::vector > &Cases) { + std::vector &Cases) { for (unsigned i = 0, e = Cases.size(); i != e; ++i) - if (Cases[i].second == BB) { + if (Cases[i].Dest == BB) { Cases.erase(Cases.begin()+i); --i; --e; } @@ -509,9 +566,9 @@ static void EliminateBlockCases(BasicBlock *BB, /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as /// well. static bool -ValuesOverlap(std::vector > &C1, - std::vector > &C2) { - std::vector > *V1 = &C1, *V2 = &C2; +ValuesOverlap(std::vector &C1, + std::vector &C2) { + std::vector *V1 = &C1, *V2 = &C2; // Make V1 be smaller than V2. if (V1->size() > V2->size()) @@ -520,9 +577,9 @@ ValuesOverlap(std::vector > &C1, if (V1->size() == 0) return false; if (V1->size() == 1) { // Just scan V2. - ConstantInt *TheVal = (*V1)[0].first; + ConstantInt *TheVal = (*V1)[0].Value; for (unsigned i = 0, e = V2->size(); i != e; ++i) - if (TheVal == (*V2)[i].first) + if (TheVal == (*V2)[i].Value) return true; } @@ -531,9 +588,9 @@ ValuesOverlap(std::vector > &C1, array_pod_sort(V2->begin(), V2->end()); unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); while (i1 != e1 && i2 != e2) { - if ((*V1)[i1].first == (*V2)[i2].first) + if ((*V1)[i1].Value == (*V2)[i2].Value) return true; - if ((*V1)[i1].first < (*V2)[i2].first) + if ((*V1)[i1].Value < (*V2)[i2].Value) ++i1; else ++i2; @@ -559,13 +616,13 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, if (ThisVal != PredVal) return false; // Different predicates. // Find out information about when control will move from Pred to TI's block. - std::vector > PredCases; + std::vector PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); EliminateBlockCases(PredDef, PredCases); // Remove default from cases. // Find information about how control leaves this block. - std::vector > ThisCases; + std::vector ThisCases; BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. @@ -587,7 +644,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, (void) NI; // Remove PHI node entries for the dead edge. - ThisCases[0].second->removePredecessor(TI->getParent()); + ThisCases[0].Dest->removePredecessor(TI->getParent()); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); @@ -600,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - DeadCases.insert(PredCases[i].first); + DeadCases.insert(PredCases[i].Value); DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); @@ -622,10 +679,10 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, ConstantInt *TIV = 0; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == TIBB) { + if (PredCases[i].Dest == TIBB) { if (TIV != 0) return false; // Cannot handle multiple values coming to this block. - TIV = PredCases[i].first; + TIV = PredCases[i].Value; } assert(TIV && "No edge from pred to succ?"); @@ -633,8 +690,8 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // BB. Find out which successor will unconditionally be branched to. BasicBlock *TheRealDest = 0; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) - if (ThisCases[i].first == TIV) { - TheRealDest = ThisCases[i].second; + if (ThisCases[i].Value == TIV) { + TheRealDest = ThisCases[i].Dest; break; } @@ -702,10 +759,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, if (PCV == CV && SafeToMergeTerminators(TI, PTI)) { // Figure out which 'cases' to copy from SI to PSI. - std::vector > BBCases; + std::vector BBCases; BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); - std::vector > PredCases; + std::vector PredCases; BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); // Based on whether the default edge from PTI goes to BB or not, fill in @@ -718,8 +775,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // that don't occur in PTI, or that branch to BB will be activated. std::set PTIHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second != BB) - PTIHandled.insert(PredCases[i].first); + if (PredCases[i].Dest != BB) + PTIHandled.insert(PredCases[i].Value); else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); @@ -734,10 +791,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, NewSuccessors.push_back(BBDefault); } for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (!PTIHandled.count(BBCases[i].first) && - BBCases[i].second != BBDefault) { + if (!PTIHandled.count(BBCases[i].Value) && + BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].second); + NewSuccessors.push_back(BBCases[i].Dest); } } else { @@ -746,8 +803,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // activated. std::set PTIHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - if (PredCases[i].second == BB) { - PTIHandled.insert(PredCases[i].first); + if (PredCases[i].Dest == BB) { + PTIHandled.insert(PredCases[i].Value); std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; @@ -756,11 +813,11 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, now we know which constants were sent to BB from the // predecessor. Figure out where they will all go now. for (unsigned i = 0, e = BBCases.size(); i != e; ++i) - if (PTIHandled.count(BBCases[i].first)) { + if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... PredCases.push_back(BBCases[i]); - NewSuccessors.push_back(BBCases[i].second); - PTIHandled.erase(BBCases[i].first);// This constant is taken care of + NewSuccessors.push_back(BBCases[i].Dest); + PTIHandled.erase(BBCases[i].Value);// This constant is taken care of } // If there are any constants vectored to BB that TI doesn't handle, @@ -768,7 +825,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (std::set::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { - PredCases.push_back(std::make_pair(*I, BBDefault)); + PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } } @@ -792,7 +849,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) - NewSI->addCase(PredCases[i].first, PredCases[i].second); + NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); EraseTerminatorInstAndDCECond(PTI); @@ -1273,7 +1330,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } - // If we folded the the first phi, PN dangles at this point. Refresh it. If + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast(BB->begin()); if (PN == 0) return true; @@ -1490,6 +1547,23 @@ static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, return Result; } +/// checkCSEInPredecessor - Return true if the given instruction is available +/// in its predecessor block. If yes, the instruction will be removed. +/// +static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { + if (!isa(Inst) && !isa(Inst)) + return false; + for (BasicBlock::iterator I = PB->begin(), E = PB->end(); I != E; I++) { + Instruction *PBI = &*I; + // Check whether Inst and PBI generate the same value. + if (Inst->isIdenticalTo(PBI)) { + Inst->replaceAllUsesWith(PBI); + Inst->eraseFromParent(); + return true; + } + } + return false; +} /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into @@ -1497,7 +1571,36 @@ static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); - Instruction *Cond = dyn_cast(BI->getCondition()); + Instruction *Cond = 0; + if (BI->isConditional()) + Cond = dyn_cast(BI->getCondition()); + else { + // For unconditional branch, check for a simple CFG pattern, where + // BB has a single predecessor and BB's successor is also its predecessor's + // successor. If such pattern exisits, check for CSE between BB and its + // predecessor. + if (BasicBlock *PB = BB->getSinglePredecessor()) + if (BranchInst *PBI = dyn_cast(PB->getTerminator())) + if (PBI->isConditional() && + (BI->getSuccessor(0) == PBI->getSuccessor(0) || + BI->getSuccessor(0) == PBI->getSuccessor(1))) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ) { + Instruction *Curr = I++; + if (isa(Curr)) { + Cond = Curr; + break; + } + // Quit if we can't remove this instruction. + if (!checkCSEInPredecessor(Curr, PB)) + return false; + } + } + + if (Cond == 0) + return false; + } + if (Cond == 0 || (!isa(Cond) && !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1549,7 +1652,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = BI->getSuccessor(1); + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; if (TrueDest == BB || FalseDest == BB) return false; @@ -1560,23 +1663,33 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) + SmallVector PHIs; + if (PBI == 0 || PBI->isUnconditional() || + (BI->isConditional() && + !SafeToMergeTerminators(BI, PBI)) || + (!BI->isConditional() && + !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond = false; - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; + if (BI->isConditional()) { + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + } else { + if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest) + continue; + } // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values @@ -1652,17 +1765,69 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { New->takeName(Cond); Cond->setName(New->getName()+".old"); - Instruction *NewCond = - cast(Builder.CreateBinOp(Opc, PBI->getCondition(), + if (BI->isConditional()) { + Instruction *NewCond = + cast(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); - PBI->setCondition(NewCond); - if (PBI->getSuccessor(0) == BB) { - AddPredecessorToBlock(TrueDest, PredBlock, BB); - PBI->setSuccessor(0, TrueDest); - } - if (PBI->getSuccessor(1) == BB) { - AddPredecessorToBlock(FalseDest, PredBlock, BB); - PBI->setSuccessor(1, FalseDest); + PBI->setCondition(NewCond); + + if (PBI->getSuccessor(0) == BB) { + AddPredecessorToBlock(TrueDest, PredBlock, BB); + PBI->setSuccessor(0, TrueDest); + } + if (PBI->getSuccessor(1) == BB) { + AddPredecessorToBlock(FalseDest, PredBlock, BB); + PBI->setSuccessor(1, FalseDest); + } + } else { + // Update PHI nodes in the common successors. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + ConstantInt *PBI_C = cast( + PHIs[i]->getIncomingValueForBlock(PBI->getParent())); + assert(PBI_C->getType()->isIntegerTy(1)); + Instruction *MergedCond = 0; + if (PBI->getSuccessor(0) == TrueDest) { + // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) + // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) + // is false: !PBI_Cond and BI_Value + Instruction *NotCond = + cast(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast(Builder.CreateBinOp(Instruction::And, + NotCond, New, + "and.cond")); + if (PBI_C->isOne()) + MergedCond = + cast(Builder.CreateBinOp(Instruction::Or, + PBI->getCondition(), MergedCond, + "or.cond")); + } else { + // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) + // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) + // is false: PBI_Cond and BI_Value + MergedCond = + cast(Builder.CreateBinOp(Instruction::And, + PBI->getCondition(), New, + "and.cond")); + if (PBI_C->isOne()) { + Instruction *NotCond = + cast(Builder.CreateNot(PBI->getCondition(), + "not.cond")); + MergedCond = + cast(Builder.CreateBinOp(Instruction::Or, + NotCond, MergedCond, + "or.cond")); + } + } + // Update PHI Node. + PHIs[i]->setIncomingValue(PHIs[i]->getBasicBlockIndex(PBI->getParent()), + MergedCond); + } + // Change PBI from Conditional to Unconditional. + BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI); + EraseTerminatorInstAndDCECond(PBI); + PBI = New_PBI; } // TODO: If BB is reachable from all paths through PredBlock, then we @@ -1670,7 +1835,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Merge probability data into PredBlock's branch. APInt A, B, C, D; - if (ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { + if (PBI->isConditional() && BI->isConditional() && + ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { // Given IR which does: // bbA: // br i1 %x, label %bbB, label %bbC @@ -1740,12 +1906,10 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { ProbTrue = ProbTrue.udiv(GCD); ProbFalse = ProbFalse.udiv(GCD); - LLVMContext &Context = BI->getContext(); - Value *Ops[3]; - Ops[0] = BI->getMetadata(LLVMContext::MD_prof)->getOperand(0); - Ops[1] = ConstantInt::get(Context, ProbTrue); - Ops[2] = ConstantInt::get(Context, ProbFalse); - PBI->setMetadata(LLVMContext::MD_prof, MDNode::get(Context, Ops)); + MDBuilder MDB(BI->getContext()); + MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(), + ProbFalse.getZExtValue()); + PBI->setMetadata(LLVMContext::MD_prof, N); } else { PBI->setMetadata(LLVMContext::MD_prof, NULL); } @@ -2758,6 +2922,12 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ return true; } + // If this basic block is ONLY a compare and a branch, and if a predecessor + // branches to us and our successor, fold the comparison into the + // predecessor and use logical operations to update the incoming value + // for PHI nodes in common successor. + if (FoldBranchToCommonDest(BI)) + return SimplifyCFG(BB) | true; return false; } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 4030bef..5d673f1 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -16,7 +16,6 @@ #define DEBUG_TYPE "indvars" #include "llvm/Instructions.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -44,7 +43,6 @@ namespace { class SimplifyIndvar { Loop *L; LoopInfo *LI; - DominatorTree *DT; ScalarEvolution *SE; const TargetData *TD; // May be NULL diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 9d62306..62d23cb 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -23,6 +23,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Pass.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -41,6 +42,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" #include #include @@ -66,6 +68,10 @@ static cl::opt MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden, cl::desc("The maximum number of pairing iterations")); +static cl::opt +Pow2LenOnly("bb-vectorize-pow2-len-only", cl::init(false), cl::Hidden, + cl::desc("Don't try to form non-2^n-length vectors")); + static cl::opt MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden, cl::desc("The maximum number of pairable instructions per group")); @@ -76,6 +82,10 @@ MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200), " a full cycle check")); static cl::opt +NoBools("bb-vectorize-no-bools", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize boolean (i1) values")); + +static cl::opt NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize integer values")); @@ -104,6 +114,10 @@ NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize select instructions")); static cl::opt +NoCmp("bb-vectorize-no-cmp", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize comparison instructions")); + +static cl::opt NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize getelementptr instructions")); @@ -182,12 +196,12 @@ namespace { // FIXME: const correct? - bool vectorizePairs(BasicBlock &BB); + bool vectorizePairs(BasicBlock &BB, bool NonPow2Len = false); bool getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap &CandidatePairs, - std::vector &PairableInsts); + std::vector &PairableInsts, bool NonPow2Len); void computeConnectedPairs(std::multimap &CandidatePairs, std::vector &PairableInsts, @@ -211,7 +225,7 @@ namespace { bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore); bool areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore); + bool IsSimpleLoadStore, bool NonPow2Len); bool trackUsesOfI(DenseSet &Users, AliasSetTracker &WriteSet, Instruction *I, @@ -263,26 +277,32 @@ namespace { bool UseCycleCheck); Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I, - Instruction *J, unsigned o, bool &FlipMemInputs); + Instruction *J, unsigned o, bool FlipMemInputs); void fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, - unsigned IdxOffset, std::vector &Mask); + unsigned MaskOffset, unsigned NumInElem, + unsigned NumInElem1, unsigned IdxOffset, + std::vector &Mask); Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I, Instruction *J); + bool expandIEChain(LLVMContext& Context, Instruction *I, Instruction *J, + unsigned o, Value *&LOp, unsigned numElemL, + Type *ArgTypeL, Type *ArgTypeR, + unsigned IdxOff = 0); + Value *getReplacementInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, bool FlipMemInputs); void getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector &ReplacedOperands, - bool &FlipMemInputs); + bool FlipMemInputs); void replaceOutputsOfPair(LLVMContext& Context, Instruction *I, Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, - Instruction *&K2, bool &FlipMemInputs); + Instruction *&K2, bool FlipMemInputs); void collectPairLoadMoveSet(BasicBlock &BB, DenseMap &ChosenPairs, @@ -294,6 +314,10 @@ namespace { DenseMap &ChosenPairs, std::multimap &LoadMoveSet); + void collectPtrInfo(std::vector &PairableInsts, + DenseMap &ChosenPairs, + DenseSet &LowPtrInsts); + bool canMoveUsesOfIAfterJ(BasicBlock &BB, std::multimap &LoadMoveSet, Instruction *I, Instruction *J); @@ -303,12 +327,15 @@ namespace { Instruction *&InsertionPt, Instruction *I, Instruction *J); + void combineMetadata(Instruction *K, const Instruction *J); + bool vectorizeBB(BasicBlock &BB) { bool changed = false; // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. - for (unsigned v = 2, n = 1; + unsigned n = 1; + for (unsigned v = 2; v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { DEBUG(dbgs() << "BBV: fusing loop #" << n << @@ -320,6 +347,16 @@ namespace { break; } + if (changed && !Pow2LenOnly) { + ++n; + for (; !Config.MaxIter || n <= Config.MaxIter; ++n) { + DEBUG(dbgs() << "BBV: fusing for non-2^n-length vectors loop #: " << + n << " for " << BB.getName() << " in " << + BB.getParent()->getName() << "...\n"); + if (!vectorizePairs(BB, true)) break; + } + } + DEBUG(dbgs() << "BBV: done!\n"); return changed; } @@ -341,15 +378,43 @@ namespace { AU.setPreservesCFG(); } - // This returns the vector type that holds a pair of the provided type. - // If the provided type is already a vector, then its length is doubled. - static inline VectorType *getVecTypeForPair(Type *ElemTy) { + static inline VectorType *getVecTypeForPair(Type *ElemTy, Type *Elem2Ty) { + assert(ElemTy->getScalarType() == Elem2Ty->getScalarType() && + "Cannot form vector from incompatible scalar types"); + Type *STy = ElemTy->getScalarType(); + + unsigned numElem; if (VectorType *VTy = dyn_cast(ElemTy)) { - unsigned numElem = VTy->getNumElements(); - return VectorType::get(ElemTy->getScalarType(), numElem*2); + numElem = VTy->getNumElements(); + } else { + numElem = 1; } - return VectorType::get(ElemTy, 2); + if (VectorType *VTy = dyn_cast(Elem2Ty)) { + numElem += VTy->getNumElements(); + } else { + numElem += 1; + } + + return VectorType::get(STy, numElem); + } + + static inline void getInstructionTypes(Instruction *I, + Type *&T1, Type *&T2) { + if (isa(I)) { + // For stores, it is the value type, not the pointer type that matters + // because the value is what will come from a vector register. + + Value *IVal = cast(I)->getValueOperand(); + T1 = IVal->getType(); + } else { + T1 = I->getType(); + } + + if (I->isCast()) + T2 = cast(I)->getSrcTy(); + else + T2 = T1; } // Returns the weight associated with the provided value. A chain of @@ -385,8 +450,7 @@ namespace { // true if the offset could be determined to be some constant value. // For example, if OffsetInElmts == 1, then J accesses the memory directly // after I; if OffsetInElmts == -1 then I accesses the memory - // directly after J. This function assumes that both instructions - // have the same type. + // directly after J. bool getPairPtrInfo(Instruction *I, Instruction *J, Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment, int64_t &OffsetInElmts) { @@ -418,7 +482,12 @@ namespace { Type *VTy = cast(IPtr->getType())->getElementType(); int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy); - assert(VTy == cast(JPtr->getType())->getElementType()); + Type *VTy2 = cast(JPtr->getType())->getElementType(); + if (VTy != VTy2 && Offset < 0) { + int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2); + OffsetInElmts = Offset/VTy2TSS; + return (abs64(Offset) % VTy2TSS) == 0; + } OffsetInElmts = Offset/VTyTSS; return (abs64(Offset) % VTyTSS) == 0; @@ -471,7 +540,7 @@ namespace { // This function implements one vectorization iteration on the provided // basic block. It returns true if the block is changed. - bool BBVectorize::vectorizePairs(BasicBlock &BB) { + bool BBVectorize::vectorizePairs(BasicBlock &BB, bool NonPow2Len) { bool ShouldContinue; BasicBlock::iterator Start = BB.getFirstInsertionPt(); @@ -482,7 +551,7 @@ namespace { std::vector PairableInsts; std::multimap CandidatePairs; ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs, - PairableInsts); + PairableInsts, NonPow2Len); if (PairableInsts.empty()) continue; // Now we have a map of all of the pairable instructions and we need to @@ -529,6 +598,10 @@ namespace { // passes should coalesce the build/extract combinations. fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs); + + // It is important to cleanup here so that future iterations of this + // function have less work to do. + (void) SimplifyInstructionsInBlock(&BB, TD); return true; } @@ -567,6 +640,9 @@ namespace { } else if (isa(I)) { if (!Config.VectorizeSelect) return false; + } else if (isa(I)) { + if (!Config.VectorizeCmp) + return false; } else if (GetElementPtrInst *G = dyn_cast(I)) { if (!Config.VectorizeGEP) return false; @@ -584,41 +660,39 @@ namespace { return false; Type *T1, *T2; - if (isa(I)) { - // For stores, it is the value type, not the pointer type that matters - // because the value is what will come from a vector register. - - Value *IVal = cast(I)->getValueOperand(); - T1 = IVal->getType(); - } else { - T1 = I->getType(); - } - - if (I->isCast()) - T2 = cast(I)->getSrcTy(); - else - T2 = T1; + getInstructionTypes(I, T1, T2); // Not every type can be vectorized... if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) || !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (!Config.VectorizeInts - && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) - return false; - + if (T1->getScalarSizeInBits() == 1 && T2->getScalarSizeInBits() == 1) { + if (!Config.VectorizeBools) + return false; + } else { + if (!Config.VectorizeInts + && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + return false; + } + if (!Config.VectorizeFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; + // Don't vectorize target-specific types. + if (T1->isX86_FP80Ty() || T1->isPPC_FP128Ty() || T1->isX86_MMXTy()) + return false; + if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) + return false; + if ((!Config.VectorizePointers || TD == 0) && (T1->getScalarType()->isPointerTy() || T2->getScalarType()->isPointerTy())) return false; - if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || - T2->getPrimitiveSizeInBits() > Config.VectorBits/2) + if (T1->getPrimitiveSizeInBits() >= Config.VectorBits || + T2->getPrimitiveSizeInBits() >= Config.VectorBits) return false; return true; @@ -629,36 +703,25 @@ namespace { // that I has already been determined to be vectorizable and that J is not // in the use tree of I. bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J, - bool IsSimpleLoadStore) { + bool IsSimpleLoadStore, bool NonPow2Len) { DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I << " <-> " << *J << "\n"); // Loads and stores can be merged if they have different alignments, // but are otherwise the same. - LoadInst *LI, *LJ; - StoreInst *SI, *SJ; - if ((LI = dyn_cast(I)) && (LJ = dyn_cast(J))) { - if (I->getType() != J->getType()) - return false; + if (!J->isSameOperationAs(I, Instruction::CompareIgnoringAlignment | + (NonPow2Len ? Instruction::CompareUsingScalarTypes : 0))) + return false; - if (LI->getPointerOperand()->getType() != - LJ->getPointerOperand()->getType() || - LI->isVolatile() != LJ->isVolatile() || - LI->getOrdering() != LJ->getOrdering() || - LI->getSynchScope() != LJ->getSynchScope()) - return false; - } else if ((SI = dyn_cast(I)) && (SJ = dyn_cast(J))) { - if (SI->getValueOperand()->getType() != - SJ->getValueOperand()->getType() || - SI->getPointerOperand()->getType() != - SJ->getPointerOperand()->getType() || - SI->isVolatile() != SJ->isVolatile() || - SI->getOrdering() != SJ->getOrdering() || - SI->getSynchScope() != SJ->getSynchScope()) - return false; - } else if (!J->isSameOperationAs(I)) { + Type *IT1, *IT2, *JT1, *JT2; + getInstructionTypes(I, IT1, IT2); + getInstructionTypes(J, JT1, JT2); + unsigned MaxTypeBits = std::max( + IT1->getPrimitiveSizeInBits() + JT1->getPrimitiveSizeInBits(), + IT2->getPrimitiveSizeInBits() + JT2->getPrimitiveSizeInBits()); + if (MaxTypeBits > Config.VectorBits) return false; - } + // FIXME: handle addsub-type operations! if (IsSimpleLoadStore) { @@ -668,8 +731,11 @@ namespace { if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts) && abs64(OffsetInElmts) == 1) { if (Config.AlignedOnly) { - Type *aType = isa(I) ? + Type *aTypeI = isa(I) ? cast(I)->getValueOperand()->getType() : I->getType(); + Type *aTypeJ = isa(J) ? + cast(J)->getValueOperand()->getType() : J->getType(); + // An aligned load or store is possible only if the instruction // with the lower offset has an alignment suitable for the // vector type. @@ -677,7 +743,7 @@ namespace { unsigned BottomAlignment = IAlignment; if (OffsetInElmts < 0) BottomAlignment = JAlignment; - Type *VType = getVecTypeForPair(aType); + Type *VType = getVecTypeForPair(aTypeI, aTypeJ); unsigned VecAlignment = TD->getPrefTypeAlignment(VType); if (BottomAlignment < VecAlignment) return false; @@ -685,11 +751,6 @@ namespace { } else { return false; } - } else if (isa(I)) { - // Only merge two shuffles if they're both constant - return isa(I->getOperand(2)) && - isa(J->getOperand(2)); - // FIXME: We may want to vectorize non-constant shuffles also. } // The powi intrinsic is special because only the first argument is @@ -772,7 +833,7 @@ namespace { bool BBVectorize::getCandidatePairs(BasicBlock &BB, BasicBlock::iterator &Start, std::multimap &CandidatePairs, - std::vector &PairableInsts) { + std::vector &PairableInsts, bool NonPow2Len) { BasicBlock::iterator E = BB.end(); if (Start == E) return false; @@ -808,7 +869,7 @@ namespace { // J does not use I, and comes before the first use of I, so it can be // merged with I if the instructions are compatible. - if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue; + if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len)) continue; // J is a candidate for merging with I. if (!PairableInsts.size() || @@ -1430,24 +1491,27 @@ namespace { // instruction that fuses I with J. Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context, Instruction *I, Instruction *J, unsigned o, - bool &FlipMemInputs) { + bool FlipMemInputs) { Value *IPtr, *JPtr; unsigned IAlignment, JAlignment; int64_t OffsetInElmts; + + // Note: the analysis might fail here, that is why FlipMemInputs has + // been precomputed (OffsetInElmts must be unused here). (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts); // The pointer value is taken to be the one with the lowest offset. Value *VPtr; - if (OffsetInElmts > 0) { + if (!FlipMemInputs) { VPtr = IPtr; } else { - FlipMemInputs = true; VPtr = JPtr; } - Type *ArgType = cast(IPtr->getType())->getElementType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = cast(IPtr->getType())->getElementType(); + Type *ArgTypeJ = cast(JPtr->getType())->getElementType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); Type *VArgPtrType = PointerType::get(VArgType, cast(IPtr->getType())->getAddressSpace()); return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o), @@ -1455,15 +1519,17 @@ namespace { } void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J, - unsigned NumElem, unsigned MaskOffset, unsigned NumInElem, - unsigned IdxOffset, std::vector &Mask) { - for (unsigned v = 0; v < NumElem/2; ++v) { + unsigned MaskOffset, unsigned NumInElem, + unsigned NumInElem1, unsigned IdxOffset, + std::vector &Mask) { + unsigned NumElem1 = cast(J->getType())->getNumElements(); + for (unsigned v = 0; v < NumElem1; ++v) { int m = cast(J)->getMaskValue(v); if (m < 0) { Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context)); } else { unsigned mm = m + (int) IdxOffset; - if (m >= (int) NumInElem) + if (m >= (int) NumInElem1) mm += (int) NumInElem; Mask[v+MaskOffset] = @@ -1479,8 +1545,11 @@ namespace { // This is the shuffle mask. We need to append the second // mask to the first, and the numbers need to be adjusted. - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = I->getType(); + Type *ArgTypeJ = J->getType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); + + unsigned NumElemI = cast(ArgTypeI)->getNumElements(); // Get the total number of elements in the fused vector type. // By definition, this must equal the number of elements in @@ -1488,19 +1557,81 @@ namespace { unsigned NumElem = cast(VArgType)->getNumElements(); std::vector Mask(NumElem); - Type *OpType = I->getOperand(0)->getType(); - unsigned NumInElem = cast(OpType)->getNumElements(); + Type *OpTypeI = I->getOperand(0)->getType(); + unsigned NumInElemI = cast(OpTypeI)->getNumElements(); + Type *OpTypeJ = J->getOperand(0)->getType(); + unsigned NumInElemJ = cast(OpTypeJ)->getNumElements(); + + // The fused vector will be: + // ----------------------------------------------------- + // | NumInElemI | NumInElemJ | NumInElemI | NumInElemJ | + // ----------------------------------------------------- + // from which we'll extract NumElem total elements (where the first NumElemI + // of them come from the mask in I and the remainder come from the mask + // in J. // For the mask from the first pair... - fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask); + fillNewShuffleMask(Context, I, 0, NumInElemJ, NumInElemI, + 0, Mask); // For the mask from the second pair... - fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem, - Mask); + fillNewShuffleMask(Context, J, NumElemI, NumInElemI, NumInElemJ, + NumInElemI, Mask); return ConstantVector::get(Mask); } + bool BBVectorize::expandIEChain(LLVMContext& Context, Instruction *I, + Instruction *J, unsigned o, Value *&LOp, + unsigned numElemL, + Type *ArgTypeL, Type *ArgTypeH, + unsigned IdxOff) { + bool ExpandedIEChain = false; + if (InsertElementInst *LIE = dyn_cast(LOp)) { + // If we have a pure insertelement chain, then this can be rewritten + // into a chain that directly builds the larger type. + bool PureChain = true; + InsertElementInst *LIENext = LIE; + do { + if (!isa(LIENext->getOperand(0)) && + !isa(LIENext->getOperand(0))) { + PureChain = false; + break; + } + } while ((LIENext = + dyn_cast(LIENext->getOperand(0)))); + + if (PureChain) { + SmallVector VectElemts(numElemL, + UndefValue::get(ArgTypeL->getScalarType())); + InsertElementInst *LIENext = LIE; + do { + unsigned Idx = + cast(LIENext->getOperand(2))->getSExtValue(); + VectElemts[Idx] = LIENext->getOperand(1); + } while ((LIENext = + dyn_cast(LIENext->getOperand(0)))); + + LIENext = 0; + Value *LIEPrev = UndefValue::get(ArgTypeH); + for (unsigned i = 0; i < numElemL; ++i) { + if (isa(VectElemts[i])) continue; + LIENext = InsertElementInst::Create(LIEPrev, VectElemts[i], + ConstantInt::get(Type::getInt32Ty(Context), + i + IdxOff), + getReplacementName(I, true, o, i+1)); + LIENext->insertBefore(J); + LIEPrev = LIENext; + } + + LOp = LIENext ? (Value*) LIENext : UndefValue::get(ArgTypeH); + ExpandedIEChain = true; + } + } + + return ExpandedIEChain; + } + // Returns the value to be used as the specified operand of the vector // instruction that fuses I with J. Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I, @@ -1508,84 +1639,333 @@ namespace { Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); - // Compute the fused vector type for this operand - Type *ArgType = I->getOperand(o)->getType(); - VectorType *VArgType = getVecTypeForPair(ArgType); + // Compute the fused vector type for this operand + Type *ArgTypeI = I->getOperand(o)->getType(); + Type *ArgTypeJ = J->getOperand(o)->getType(); + VectorType *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); Instruction *L = I, *H = J; + Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ; if (FlipMemInputs) { L = J; H = I; + ArgTypeL = ArgTypeJ; + ArgTypeH = ArgTypeI; } - if (ArgType->isVectorTy()) { - unsigned numElem = cast(VArgType)->getNumElements(); - std::vector Mask(numElem); - for (unsigned v = 0; v < numElem; ++v) - Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + unsigned numElemL; + if (ArgTypeL->isVectorTy()) + numElemL = cast(ArgTypeL)->getNumElements(); + else + numElemL = 1; - Instruction *BV = new ShuffleVectorInst(L->getOperand(o), - H->getOperand(o), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); - BV->insertBefore(J); - return BV; + unsigned numElemH; + if (ArgTypeH->isVectorTy()) + numElemH = cast(ArgTypeH)->getNumElements(); + else + numElemH = 1; + + Value *LOp = L->getOperand(o); + Value *HOp = H->getOperand(o); + unsigned numElem = VArgType->getNumElements(); + + // First, we check if we can reuse the "original" vector outputs (if these + // exist). We might need a shuffle. + ExtractElementInst *LEE = dyn_cast(LOp); + ExtractElementInst *HEE = dyn_cast(HOp); + ShuffleVectorInst *LSV = dyn_cast(LOp); + ShuffleVectorInst *HSV = dyn_cast(HOp); + + // FIXME: If we're fusing shuffle instructions, then we can't apply this + // optimization. The input vectors to the shuffle might be a different + // length from the shuffle outputs. Unfortunately, the replacement + // shuffle mask has already been formed, and the mask entries are sensitive + // to the sizes of the inputs. + bool IsSizeChangeShuffle = + isa(L) && + (LOp->getType() != L->getType() || HOp->getType() != H->getType()); + + if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { + // We can have at most two unique vector inputs. + bool CanUseInputs = true; + Value *I1, *I2 = 0; + if (LEE) { + I1 = LEE->getOperand(0); + } else { + I1 = LSV->getOperand(0); + I2 = LSV->getOperand(1); + if (I2 == I1 || isa(I2)) + I2 = 0; + } + + if (HEE) { + Value *I3 = HEE->getOperand(0); + if (!I2 && I3 != I1) + I2 = I3; + else if (I3 != I1 && I3 != I2) + CanUseInputs = false; + } else { + Value *I3 = HSV->getOperand(0); + if (!I2 && I3 != I1) + I2 = I3; + else if (I3 != I1 && I3 != I2) + CanUseInputs = false; + + if (CanUseInputs) { + Value *I4 = HSV->getOperand(1); + if (!isa(I4)) { + if (!I2 && I4 != I1) + I2 = I4; + else if (I4 != I1 && I4 != I2) + CanUseInputs = false; + } + } + } + + if (CanUseInputs) { + unsigned LOpElem = + cast(cast(LOp)->getOperand(0)->getType()) + ->getNumElements(); + unsigned HOpElem = + cast(cast(HOp)->getOperand(0)->getType()) + ->getNumElements(); + + // We have one or two input vectors. We need to map each index of the + // operands to the index of the original vector. + SmallVector, 8> II(numElem); + for (unsigned i = 0; i < numElemL; ++i) { + int Idx, INum; + if (LEE) { + Idx = + cast(LEE->getOperand(1))->getSExtValue(); + INum = LEE->getOperand(0) == I1 ? 0 : 1; + } else { + Idx = LSV->getMaskValue(i); + if (Idx < (int) LOpElem) { + INum = LSV->getOperand(0) == I1 ? 0 : 1; + } else { + Idx -= LOpElem; + INum = LSV->getOperand(1) == I1 ? 0 : 1; + } + } + + II[i] = std::pair(Idx, INum); + } + for (unsigned i = 0; i < numElemH; ++i) { + int Idx, INum; + if (HEE) { + Idx = + cast(HEE->getOperand(1))->getSExtValue(); + INum = HEE->getOperand(0) == I1 ? 0 : 1; + } else { + Idx = HSV->getMaskValue(i); + if (Idx < (int) HOpElem) { + INum = HSV->getOperand(0) == I1 ? 0 : 1; + } else { + Idx -= HOpElem; + INum = HSV->getOperand(1) == I1 ? 0 : 1; + } + } + + II[i + numElemL] = std::pair(Idx, INum); + } + + // We now have an array which tells us from which index of which + // input vector each element of the operand comes. + VectorType *I1T = cast(I1->getType()); + unsigned I1Elem = I1T->getNumElements(); + + if (!I2) { + // In this case there is only one underlying vector input. Check for + // the trivial case where we can use the input directly. + if (I1Elem == numElem) { + bool ElemInOrder = true; + for (unsigned i = 0; i < numElem; ++i) { + if (II[i].first != (int) i && II[i].first != -1) { + ElemInOrder = false; + break; + } + } + + if (ElemInOrder) + return I1; + } + + // A shuffle is needed. + std::vector Mask(numElem); + for (unsigned i = 0; i < numElem; ++i) { + int Idx = II[i].first; + if (Idx == -1) + Mask[i] = UndefValue::get(Type::getInt32Ty(Context)); + else + Mask[i] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + + Instruction *S = + new ShuffleVectorInst(I1, UndefValue::get(I1T), + ConstantVector::get(Mask), + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } + + VectorType *I2T = cast(I2->getType()); + unsigned I2Elem = I2T->getNumElements(); + + // This input comes from two distinct vectors. The first step is to + // make sure that both vectors are the same length. If not, the + // smaller one will need to grow before they can be shuffled together. + if (I1Elem < I2Elem) { + std::vector Mask(I2Elem); + unsigned v = 0; + for (; v < I1Elem; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < I2Elem; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + Instruction *NewI1 = + new ShuffleVectorInst(I1, UndefValue::get(I1T), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + NewI1->insertBefore(J); + I1 = NewI1; + I1T = I2T; + I1Elem = I2Elem; + } else if (I1Elem > I2Elem) { + std::vector Mask(I1Elem); + unsigned v = 0; + for (; v < I2Elem; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < I1Elem; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + Instruction *NewI2 = + new ShuffleVectorInst(I2, UndefValue::get(I2T), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + NewI2->insertBefore(J); + I2 = NewI2; + I2T = I1T; + I2Elem = I1Elem; + } + + // Now that both I1 and I2 are the same length we can shuffle them + // together (and use the result). + std::vector Mask(numElem); + for (unsigned v = 0; v < numElem; ++v) { + if (II[v].first == -1) { + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + } else { + int Idx = II[v].first + II[v].second * I1Elem; + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + } + + Instruction *NewOp = + new ShuffleVectorInst(I1, I2, ConstantVector::get(Mask), + getReplacementName(I, true, o)); + NewOp->insertBefore(J); + return NewOp; + } } - // If these two inputs are the output of another vector instruction, - // then we should use that output directly. It might be necessary to - // permute it first. [When pairings are fused recursively, you can - // end up with cases where a large vector is decomposed into scalars - // using extractelement instructions, then built into size-2 - // vectors using insertelement and the into larger vectors using - // shuffles. InstCombine does not simplify all of these cases well, - // and so we make sure that shuffles are generated here when possible. - ExtractElementInst *LEE - = dyn_cast(L->getOperand(o)); - ExtractElementInst *HEE - = dyn_cast(H->getOperand(o)); - - if (LEE && HEE && - LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) { - VectorType *EEType = cast(LEE->getOperand(0)->getType()); - unsigned LowIndx = cast(LEE->getOperand(1))->getZExtValue(); - unsigned HighIndx = cast(HEE->getOperand(1))->getZExtValue(); - if (LEE->getOperand(0) == HEE->getOperand(0)) { - if (LowIndx == 0 && HighIndx == 1) - return LEE->getOperand(0); - - std::vector Mask(2); - Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); - Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); - - Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), - UndefValue::get(EEType), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); - BV->insertBefore(J); - return BV; + Type *ArgType = ArgTypeL; + if (numElemL < numElemH) { + if (numElemL == 1 && expandIEChain(Context, I, J, o, HOp, numElemH, + ArgTypeL, VArgType, 1)) { + // This is another short-circuit case: we're combining a scalar into + // a vector that is formed by an IE chain. We've just expanded the IE + // chain, now insert the scalar and we're done. + + Instruction *S = InsertElementInst::Create(HOp, LOp, CV0, + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } else if (!expandIEChain(Context, I, J, o, LOp, numElemL, ArgTypeL, + ArgTypeH)) { + // The two vector inputs to the shuffle must be the same length, + // so extend the smaller vector to be the same length as the larger one. + Instruction *NLOp; + if (numElemL > 1) { + + std::vector Mask(numElemH); + unsigned v = 0; + for (; v < numElemL; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < numElemH; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + } else { + NLOp = InsertElementInst::Create(UndefValue::get(ArgTypeH), LOp, CV0, + getReplacementName(I, true, o, 1)); + } + + NLOp->insertBefore(J); + LOp = NLOp; } - std::vector Mask(2); - HighIndx += EEType->getNumElements(); - Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx); - Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx); + ArgType = ArgTypeH; + } else if (numElemL > numElemH) { + if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL, + ArgTypeH, VArgType)) { + Instruction *S = + InsertElementInst::Create(LOp, HOp, + ConstantInt::get(Type::getInt32Ty(Context), + numElemL), + getReplacementName(I, true, o)); + S->insertBefore(J); + return S; + } else if (!expandIEChain(Context, I, J, o, HOp, numElemH, ArgTypeH, + ArgTypeL)) { + Instruction *NHOp; + if (numElemH > 1) { + std::vector Mask(numElemL); + unsigned v = 0; + for (; v < numElemH; ++v) + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + for (; v < numElemL; ++v) + Mask[v] = UndefValue::get(Type::getInt32Ty(Context)); + + NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH), + ConstantVector::get(Mask), + getReplacementName(I, true, o, 1)); + } else { + NHOp = InsertElementInst::Create(UndefValue::get(ArgTypeL), HOp, CV0, + getReplacementName(I, true, o, 1)); + } + + NHOp->insertBefore(J); + HOp = NHOp; + } + } - Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0), - HEE->getOperand(0), - ConstantVector::get(Mask), - getReplacementName(I, true, o)); + if (ArgType->isVectorTy()) { + unsigned numElem = cast(VArgType)->getNumElements(); + std::vector Mask(numElem); + for (unsigned v = 0; v < numElem; ++v) { + unsigned Idx = v; + // If the low vector was expanded, we need to skip the extra + // undefined entries. + if (v >= numElemL && numElemH > numElemL) + Idx += (numElemH - numElemL); + Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), Idx); + } + + Instruction *BV = new ShuffleVectorInst(LOp, HOp, + ConstantVector::get(Mask), + getReplacementName(I, true, o)); BV->insertBefore(J); return BV; } Instruction *BV1 = InsertElementInst::Create( - UndefValue::get(VArgType), - L->getOperand(o), CV0, + UndefValue::get(VArgType), LOp, CV0, getReplacementName(I, true, o, 1)); BV1->insertBefore(I); - Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o), - CV1, + Instruction *BV2 = InsertElementInst::Create(BV1, HOp, CV1, getReplacementName(I, true, o, 2)); BV2->insertBefore(J); return BV2; @@ -1596,8 +1976,7 @@ namespace { void BBVectorize::getReplacementInputsForPair(LLVMContext& Context, Instruction *I, Instruction *J, SmallVector &ReplacedOperands, - bool &FlipMemInputs) { - FlipMemInputs = false; + bool FlipMemInputs) { unsigned NumOperands = I->getNumOperands(); for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) { @@ -1616,10 +1995,10 @@ namespace { BasicBlock &BB = *I->getParent(); Module *M = BB.getParent()->getParent(); - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Type *ArgTypeI = I->getType(); + Type *ArgTypeJ = J->getType(); + Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ); - // FIXME: is it safe to do this here? ReplacedOperands[o] = Intrinsic::getDeclaration(M, (Intrinsic::ID) IID, VArgType); continue; @@ -1648,36 +2027,60 @@ namespace { Instruction *J, Instruction *K, Instruction *&InsertionPt, Instruction *&K1, Instruction *&K2, - bool &FlipMemInputs) { - Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); - Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1); - + bool FlipMemInputs) { if (isa(I)) { AA->replaceWithNewValue(I, K); AA->replaceWithNewValue(J, K); } else { Type *IType = I->getType(); - Type *VType = getVecTypeForPair(IType); + Type *JType = J->getType(); + + VectorType *VType = getVecTypeForPair(IType, JType); + unsigned numElem = VType->getNumElements(); + + unsigned numElemI, numElemJ; + if (IType->isVectorTy()) + numElemI = cast(IType)->getNumElements(); + else + numElemI = 1; + + if (JType->isVectorTy()) + numElemJ = cast(JType)->getNumElements(); + else + numElemJ = 1; if (IType->isVectorTy()) { - unsigned numElem = cast(IType)->getNumElements(); - std::vector Mask1(numElem), Mask2(numElem); - for (unsigned v = 0; v < numElem; ++v) { - Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); - Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v); - } + std::vector Mask1(numElemI), Mask2(numElemI); + for (unsigned v = 0; v < numElemI; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v); + } - K1 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask2 : Mask1), - getReplacementName(K, false, 1)); - K2 = new ShuffleVectorInst(K, UndefValue::get(VType), - ConstantVector::get( - FlipMemInputs ? Mask1 : Mask2), - getReplacementName(K, false, 2)); + K1 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get( + FlipMemInputs ? Mask2 : Mask1), + getReplacementName(K, false, 1)); } else { + Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); + Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0, getReplacementName(K, false, 1)); + } + + if (JType->isVectorTy()) { + std::vector Mask1(numElemJ), Mask2(numElemJ); + for (unsigned v = 0; v < numElemJ; ++v) { + Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v); + Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v); + } + + K2 = new ShuffleVectorInst(K, UndefValue::get(VType), + ConstantVector::get( + FlipMemInputs ? Mask1 : Mask2), + getReplacementName(K, false, 2)); + } else { + Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0); + Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1); K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1, getReplacementName(K, false, 2)); } @@ -1778,6 +2181,61 @@ namespace { } } + // As with the aliasing information, SCEV can also change because of + // vectorization. This information is used to compute relative pointer + // offsets; the necessary information will be cached here prior to + // fusion. + void BBVectorize::collectPtrInfo(std::vector &PairableInsts, + DenseMap &ChosenPairs, + DenseSet &LowPtrInsts) { + for (std::vector::iterator PI = PairableInsts.begin(), + PIE = PairableInsts.end(); PI != PIE; ++PI) { + DenseMap::iterator P = ChosenPairs.find(*PI); + if (P == ChosenPairs.end()) continue; + + Instruction *I = cast(P->first); + Instruction *J = cast(P->second); + + if (!isa(I) && !isa(I)) + continue; + + Value *IPtr, *JPtr; + unsigned IAlignment, JAlignment; + int64_t OffsetInElmts; + if (!getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, + OffsetInElmts) || abs64(OffsetInElmts) != 1) + llvm_unreachable("Pre-fusion pointer analysis failed"); + + Value *LowPI = (OffsetInElmts > 0) ? I : J; + LowPtrInsts.insert(LowPI); + } + } + + // When the first instruction in each pair is cloned, it will inherit its + // parent's metadata. This metadata must be combined with that of the other + // instruction in a safe way. + void BBVectorize::combineMetadata(Instruction *K, const Instruction *J) { + SmallVector, 4> Metadata; + K->getAllMetadataOtherThanDebugLoc(Metadata); + for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { + unsigned Kind = Metadata[i].first; + MDNode *JMD = J->getMetadata(Kind); + MDNode *KMD = Metadata[i].second; + + switch (Kind) { + default: + K->setMetadata(Kind, 0); // Remove unknown metadata + break; + case LLVMContext::MD_tbaa: + K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); + break; + case LLVMContext::MD_fpmath: + K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD)); + break; + } + } + } + // This function fuses the chosen instruction pairs into vector instructions, // taking care preserve any needed scalar outputs and, then, it reorders the // remaining instructions as needed (users of the first member of the pair @@ -1804,6 +2262,9 @@ namespace { std::multimap LoadMoveSet; collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet); + DenseSet LowPtrInsts; + collectPtrInfo(PairableInsts, ChosenPairs, LowPtrInsts); + DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n"); for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) { @@ -1843,7 +2304,10 @@ namespace { continue; } - bool FlipMemInputs; + bool FlipMemInputs = false; + if (isa(I) || isa(I)) + FlipMemInputs = (LowPtrInsts.find(I) == LowPtrInsts.end()); + unsigned NumOperands = I->getNumOperands(); SmallVector ReplacedOperands(NumOperands); getReplacementInputsForPair(Context, I, J, ReplacedOperands, @@ -1855,7 +2319,9 @@ namespace { if (I->hasName()) K->takeName(I); if (!isa(K)) - K->mutateType(getVecTypeForPair(I->getType())); + K->mutateType(getVecTypeForPair(I->getType(), J->getType())); + + combineMetadata(K, J); for (unsigned o = 0; o < NumOperands; ++o) K->setOperand(o, ReplacedOperands[o]); @@ -1947,6 +2413,7 @@ llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { //===----------------------------------------------------------------------===// VectorizeConfig::VectorizeConfig() { VectorBits = ::VectorBits; + VectorizeBools = !::NoBools; VectorizeInts = !::NoInts; VectorizeFloats = !::NoFloats; VectorizePointers = !::NoPointers; @@ -1954,6 +2421,7 @@ VectorizeConfig::VectorizeConfig() { VectorizeMath = !::NoMath; VectorizeFMA = !::NoFMA; VectorizeSelect = !::NoSelect; + VectorizeCmp = !::NoCmp; VectorizeGEP = !::NoGEP; VectorizeMemOps = !::NoMemOps; AlignedOnly = ::AlignedOnly; @@ -1963,6 +2431,7 @@ VectorizeConfig::VectorizeConfig() { SplatBreaksChain = ::SplatBreaksChain; MaxInsts = ::MaxInsts; MaxIter = ::MaxIter; + Pow2LenOnly = ::Pow2LenOnly; NoMemOpBoost = ::NoMemOpBoost; FastDep = ::FastDep; } diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt index 4b66930..06cf1e4 100644 --- a/lib/Transforms/Vectorize/CMakeLists.txt +++ b/lib/Transforms/Vectorize/CMakeLists.txt @@ -2,3 +2,5 @@ add_llvm_library(LLVMVectorize BBVectorize.cpp Vectorize.cpp ) + +add_dependencies(LLVMVectorize intrinsics_gen) diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 7b39efb..7ef1131 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -20,11 +20,13 @@ #include "llvm/LLVMContext.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Module.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" @@ -99,7 +101,11 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { bool NeedsQuotes = isdigit(Name[0]); if (!NeedsQuotes) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { - char C = Name[i]; + // By making this unsigned, the value passed in to isalnum will always be + // in the range 0-255. This is important when building with MSVC because + // its implementation will assert. This situation can arise when dealing + // with UTF-8 multibyte characters. + unsigned char C = Name[i]; if (!isalnum(C) && C != '-' && C != '.' && C != '_') { NeedsQuotes = true; break; @@ -140,7 +146,7 @@ class TypePrinting { public: /// NamedTypes - The named types that are used by the current module. - std::vector NamedTypes; + TypeFinder NamedTypes; /// NumberedTypes - The numbered types, along with their value. DenseMap NumberedTypes; @@ -159,7 +165,7 @@ public: void TypePrinting::incorporateTypes(const Module &M) { - M.findUsedStructTypes(NamedTypes); + NamedTypes.run(M, false); // The list of struct types we got back includes all the struct types, split // the unnamed ones out to a numbering and remove the anonymous structs. @@ -708,8 +714,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantFP *CFP = dyn_cast(CV)) { - if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf || - &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle || + if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle || &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to @@ -759,16 +764,20 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, return; } - // Some form of long double. These appear as a magic letter identifying - // the type, then a fixed number of hex digits. + // Either half, or some form of long double. + // These appear as a magic letter identifying the type, then a + // fixed number of hex digits. Out << "0x"; + // Bit position, in the current word, of the next nibble to print. + int shiftcount; + if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { Out << 'K'; // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t* p = api.getRawData(); uint64_t word = p[1]; - int shiftcount=12; + shiftcount = 12; int width = api.getBitWidth(); for (int j=0; j>shiftcount) & 15; @@ -784,17 +793,21 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } } return; - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) { + shiftcount = 60; Out << 'L'; - else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) { + shiftcount = 60; Out << 'M'; - else + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) { + shiftcount = 12; + Out << 'H'; + } else llvm_unreachable("Unsupported floating point type"); // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t* p = api.getRawData(); uint64_t word = *p; - int shiftcount=60; int width = api.getBitWidth(); for (int j=0; j>shiftcount) & 15; @@ -1369,6 +1382,26 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis, } } +static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM, + formatted_raw_ostream &Out) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "thread_local "; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "thread_local(localdynamic) "; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "thread_local(initialexec) "; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "thread_local(localexec) "; + break; + } +} + void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->isMaterializable()) Out << "; Materializable\n"; @@ -1381,8 +1414,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { PrintLinkage(GV->getLinkage(), Out); PrintVisibility(GV->getVisibility(), Out); + PrintThreadLocalModel(GV->getThreadLocalMode(), Out); - if (GV->isThreadLocal()) Out << "thread_local "; if (unsigned AddressSpace = GV->getType()->getAddressSpace()) Out << "addrspace(" << AddressSpace << ") "; if (GV->hasUnnamedAddr()) Out << "unnamed_addr "; @@ -2004,19 +2037,22 @@ static void WriteMDNodeComment(const MDNode *Node, formatted_raw_ostream &Out) { if (Node->getNumOperands() < 1) return; - ConstantInt *CI = dyn_cast_or_null(Node->getOperand(0)); - if (!CI) return; - APInt Val = CI->getValue(); - APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask); - if (Val.ult(LLVMDebugVersion11)) + + Value *Op = Node->getOperand(0); + if (!Op || !isa(Op) || cast(Op)->getBitWidth() < 32) + return; + + DIDescriptor Desc(Node); + if (Desc.getVersion() < LLVMDebugVersion11) return; + unsigned Tag = Desc.getTag(); Out.PadToColumn(50); - if (Tag == dwarf::DW_TAG_user_base) + if (dwarf::TagString(Tag)) { + Out << "; "; + Desc.print(Out); + } else if (Tag == dwarf::DW_TAG_user_base) { Out << "; [ DW_TAG_user_base ]"; - else if (Tag.isIntN(32)) { - if (const char *TagName = dwarf::TagString(Tag.getZExtValue())) - Out << "; [ " << TagName << " ]"; } } diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index c05132b..c8219eb 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -88,6 +88,9 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); Result += " "; } + if (Attrs & Attribute::IANSDialect) + Result += "ia_nsdialect "; + // Trim the trailing space. assert(!Result.empty() && "Unknown attribute!"); Result.erase(Result.end()-1); @@ -131,8 +134,8 @@ class AttributeListImpl : public FoldingSetNode { public: SmallVector Attrs; - AttributeListImpl(const AttributeWithIndex *Attr, unsigned NumAttrs) - : Attrs(Attr, Attr+NumAttrs) { + AttributeListImpl(ArrayRef attrs) + : Attrs(attrs.begin(), attrs.end()) { RefCount = 0; } @@ -150,13 +153,12 @@ public: } void Profile(FoldingSetNodeID &ID) const { - Profile(ID, Attrs.data(), Attrs.size()); + Profile(ID, Attrs); } - static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr, - unsigned NumAttrs) { - for (unsigned i = 0; i != NumAttrs; ++i) { - ID.AddInteger(Attr[i].Attrs.Raw()); - ID.AddInteger(Attr[i].Index); + static void Profile(FoldingSetNodeID &ID, ArrayRef Attrs){ + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { + ID.AddInteger(Attrs[i].Attrs.Raw()); + ID.AddInteger(Attrs[i].Index); } } }; @@ -168,13 +170,13 @@ AttributeListImpl::~AttributeListImpl() { } -AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) { +AttrListPtr AttrListPtr::get(ArrayRef Attrs) { // If there are no attributes then return a null AttributesList pointer. - if (NumAttrs == 0) + if (Attrs.empty()) return AttrListPtr(); #ifndef NDEBUG - for (unsigned i = 0; i != NumAttrs; ++i) { + for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { assert(Attrs[i].Attrs != Attribute::None && "Pointless attribute!"); assert((!i || Attrs[i-1].Index < Attrs[i].Index) && @@ -184,7 +186,7 @@ AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) // Otherwise, build a key to look up the existing attributes. FoldingSetNodeID ID; - AttributeListImpl::Profile(ID, Attrs, NumAttrs); + AttributeListImpl::Profile(ID, Attrs); void *InsertPos; sys::SmartScopedLock Lock(*ALMutex); @@ -195,7 +197,7 @@ AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) // If we didn't find any existing attributes of the same shape then // create a new one and insert it. if (!PAL) { - PAL = new AttributeListImpl(Attrs, NumAttrs); + PAL = new AttributeListImpl(Attrs); AttributesLists->InsertNode(PAL, InsertPos); } @@ -308,7 +310,7 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const { OldAttrList.begin()+i, OldAttrList.end()); } - return get(NewAttrList.data(), NewAttrList.size()); + return get(NewAttrList); } AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { @@ -343,7 +345,7 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const { NewAttrList.insert(NewAttrList.end(), OldAttrList.begin()+i, OldAttrList.end()); - return get(NewAttrList.data(), NewAttrList.size()); + return get(NewAttrList); } void AttrListPtr::dump() const { diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 2e16372..094ca75 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -14,17 +14,32 @@ #include "llvm/AutoUpgrade.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/IRBuilder.h" #include "llvm/Instruction.h" +#include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IRBuilder.h" #include using namespace llvm; +// Upgrade the declarations of the SSE4.1 functions whose arguments have +// changed their type from v4f32 to v2i64. +static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID, + Function *&NewFn) { + // Check whether this is an old version of the function, which received + // v4f32 arguments. + Type *Arg0Type = F->getFunctionType()->getParamType(0); + if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4)) + return false; + + // Yes, it's old, replace it with new version. + F->setName(F->getName() + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), IID); + return true; +} static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { assert(F && "Illegal to upgrade a non-existent Function."); @@ -37,6 +52,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { switch (Name[0]) { default: break; + case 'a': { + if (Name.startswith("arm.neon.vclz")) { + Type* args[2] = { + F->arg_begin()->getType(), + Type::getInt1Ty(F->getContext()) + }; + // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to + // the end of the name. Change name from llvm.arm.neon.vclz.* to + // llvm.ctlz.* + FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); + NewFn = Function::Create(fType, F->getLinkage(), + "llvm.ctlz." + Name.substr(14), F->getParent()); + return true; + } + if (Name.startswith("arm.neon.vcnt")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, + F->arg_begin()->getType()); + return true; + } + break; + } case 'c': { if (Name.startswith("ctlz.") && F->arg_size() == 1) { F->setName(Name + ".old"); @@ -57,17 +93,49 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.sse2.pcmpgt.") || Name.startswith("x86.avx2.pcmpeq.") || Name.startswith("x86.avx2.pcmpgt.") || - Name.startswith("x86.avx.vpermil.")) { + Name.startswith("x86.avx.vpermil.") || + Name == "x86.avx.movnt.dq.256" || + Name == "x86.avx.movnt.pd.256" || + Name == "x86.avx.movnt.ps.256" || + (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = 0; return true; } + // SSE4.1 ptest functions may have an old signature. + if (Name.startswith("x86.sse41.ptest")) { + if (Name == "x86.sse41.ptestc") + return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn); + if (Name == "x86.sse41.ptestz") + return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn); + if (Name == "x86.sse41.ptestnzc") + return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); + } + // frcz.ss/sd may need to have an argument dropped + if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_ss); + return true; + } + if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_sd); + return true; + } + // Fix the FMA4 intrinsics to remove the 4 + if (Name.startswith("x86.fma4.")) { + F->setName("llvm.x86.fma" + Name.substr(8)); + NewFn = F; + return true; + } break; } } - // This may not belong here. This function is effectively being overloaded - // to both detect an intrinsic which needs upgrading, and to provide the - // upgraded form of the intrinsic. We should perhaps have two separate + // This may not belong here. This function is effectively being overloaded + // to both detect an intrinsic which needs upgrading, and to provide the + // upgraded form of the intrinsic. We should perhaps have two separate // functions for this. return false; } @@ -89,8 +157,8 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { return false; } -// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the -// upgraded intrinsic. All argument and return casting must be provided in +// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the +// upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); @@ -118,15 +186,85 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { "pcmpgt"); // need to sign extend since icmp returns vector of i1 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); + } else if (Name == "llvm.x86.avx.movnt.dq.256" || + Name == "llvm.x86.avx.movnt.ps.256" || + Name == "llvm.x86.avx.movnt.pd.256") { + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + Module *M = F->getParent(); + SmallVector Elts; + Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); + MDNode *Node = MDNode::get(C, Elts); + + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + + // Convert the type of the pointer to a pointer to the stored type. + Value *BC = Builder.CreateBitCast(Arg0, + PointerType::getUnqual(Arg1->getType()), + "cast"); + StoreInst *SI = Builder.CreateStore(Arg1, BC); + SI->setMetadata(M->getMDKindID("nontemporal"), Node); + SI->setAlignment(16); + + // Remove intrinsic. + CI->eraseFromParent(); + return; + } else if (Name.startswith("llvm.x86.xop.vpcom")) { + Intrinsic::ID intID; + if (Name.endswith("ub")) + intID = Intrinsic::x86_xop_vpcomub; + else if (Name.endswith("uw")) + intID = Intrinsic::x86_xop_vpcomuw; + else if (Name.endswith("ud")) + intID = Intrinsic::x86_xop_vpcomud; + else if (Name.endswith("uq")) + intID = Intrinsic::x86_xop_vpcomuq; + else if (Name.endswith("b")) + intID = Intrinsic::x86_xop_vpcomb; + else if (Name.endswith("w")) + intID = Intrinsic::x86_xop_vpcomw; + else if (Name.endswith("d")) + intID = Intrinsic::x86_xop_vpcomd; + else if (Name.endswith("q")) + intID = Intrinsic::x86_xop_vpcomq; + else + llvm_unreachable("Unknown suffix"); + + Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom" + unsigned Imm; + if (Name.startswith("lt")) + Imm = 0; + else if (Name.startswith("le")) + Imm = 1; + else if (Name.startswith("gt")) + Imm = 2; + else if (Name.startswith("ge")) + Imm = 3; + else if (Name.startswith("eq")) + Imm = 4; + else if (Name.startswith("ne")) + Imm = 5; + else if (Name.startswith("true")) + Imm = 6; + else if (Name.startswith("false")) + Imm = 7; + else + llvm_unreachable("Unknown condition"); + + Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); + Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0), + CI->getArgOperand(1), Builder.getInt8(Imm)); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; - if (Name.startswith("llvm.x86.avx.vpermil.pd.256")) + if (Name == "llvm.x86.avx.vpermil.pd.256") PD256 = true; - else if (Name.startswith("llvm.x86.avx.vpermil.pd")) + else if (Name == "llvm.x86.avx.vpermil.pd") PD128 = true; - else if (Name.startswith("llvm.x86.avx.vpermil.ps.256")) + else if (Name == "llvm.x86.avx.vpermil.ps.256") PS256 = true; - else if (Name.startswith("llvm.x86.avx.vpermil.ps")) + else if (Name == "llvm.x86.avx.vpermil.ps") PS128 = true; if (PD256 || PD128 || PS256 || PS128) { @@ -162,6 +300,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { return; } + std::string Name = CI->getName().str(); + CI->setName(Name + ".old"); + switch (NewFn->getIntrinsicID()) { default: llvm_unreachable("Unknown function for CallInst upgrade."); @@ -170,12 +311,60 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::cttz: assert(CI->getNumArgOperands() == 1 && "Mismatch between function args and call args"); - StringRef Name = CI->getName(); - CI->setName(Name + ".old"); CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0), Builder.getFalse(), Name)); CI->eraseFromParent(); return; + + case Intrinsic::arm_neon_vclz: { + // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.* + CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0), + Builder.getFalse(), + "llvm.ctlz." + Name.substr(14))); + CI->eraseFromParent(); + return; + } + case Intrinsic::ctpop: { + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0))); + CI->eraseFromParent(); + return; + } + + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1), + Name)); + CI->eraseFromParent(); + return; + + case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_sse41_ptestnzc: { + // The arguments for these intrinsics used to be v4f32, and changed + // to v2i64. This is purely a nop, since those are bitwise intrinsics. + // So, the only thing required is a bitcast for both arguments. + // First, check the arguments have the old type. + Value *Arg0 = CI->getArgOperand(0); + if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4)) + return; + + // Old intrinsic, add bitcasts + Value *Arg1 = CI->getArgOperand(1); + + Value *BC0 = + Builder.CreateBitCast(Arg0, + VectorType::get(Type::getInt64Ty(C), 2), + "cast"); + Value *BC1 = + Builder.CreateBitCast(Arg1, + VectorType::get(Type::getInt64Ty(C), 2), + "cast"); + + CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name); + CI->replaceAllUsesWith(NewCall); + CI->eraseFromParent(); + return; + } } } diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index e1efcda..6a20be6 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -8,7 +8,9 @@ add_llvm_library(LLVMCore ConstantFold.cpp Constants.cpp Core.cpp + DebugInfo.cpp DebugLoc.cpp + DIBuilder.cpp Dominators.cpp Function.cpp GCOV.cpp @@ -29,6 +31,7 @@ add_llvm_library(LLVMCore PassRegistry.cpp PrintModulePass.cpp Type.cpp + TypeFinder.cpp Use.cpp User.cpp Value.cpp @@ -36,3 +39,14 @@ add_llvm_library(LLVMCore ValueTypes.cpp Verifier.cpp ) + +# Workaround: It takes over 20 minutes to compile with msvc10. +# FIXME: Suppressing optimizations to core libraries would not be good thing. +if( MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE Function.cpp + PROPERTY COMPILE_FLAGS "/Og-" + ) +endif() + +add_dependencies(LLVMCore intrinsics_gen) diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index b743287..8e82876 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -55,13 +55,12 @@ static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) { Type *DstEltTy = DstTy->getElementType(); - // Check to verify that all elements of the input are simple. SmallVector Result; + Type *Ty = IntegerType::get(CV->getContext(), 32); for (unsigned i = 0; i != NumElts; ++i) { - Constant *C = CV->getAggregateElement(i); - if (C == 0) return 0; + Constant *C = + ConstantExpr::getExtractElement(CV, ConstantInt::get(Ty, i)); C = ConstantExpr::getBitCast(C, DstEltTy); - if (isa(C)) return 0; Result.push_back(C); } @@ -553,9 +552,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, SmallVector res; VectorType *DestVecTy = cast(DestTy); Type *DstEltTy = DestVecTy->getElementType(); - for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i) - res.push_back(ConstantExpr::getCast(opc, - V->getAggregateElement(i), DstEltTy)); + Type *Ty = IntegerType::get(V->getContext(), 32); + for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i) { + Constant *C = + ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i)); + res.push_back(ConstantExpr::getCast(opc, C, DstEltTy)); + } return ConstantVector::get(res); } @@ -696,12 +698,13 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, // If the condition is a vector constant, fold the result elementwise. if (ConstantVector *CondV = dyn_cast(Cond)) { SmallVector Result; + Type *Ty = IntegerType::get(CondV->getContext(), 32); for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){ ConstantInt *Cond = dyn_cast(CondV->getOperand(i)); if (Cond == 0) break; - Constant *Res = (Cond->getZExtValue() ? V2 : V1)->getAggregateElement(i); - if (Res == 0) break; + Constant *V = Cond->isNullValue() ? V2 : V1; + Constant *Res = ConstantExpr::getExtractElement(V, ConstantInt::get(Ty, i)); Result.push_back(Res); } @@ -721,12 +724,12 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, if (ConstantExpr *TrueVal = dyn_cast(V1)) { if (TrueVal->getOpcode() == Instruction::Select) if (TrueVal->getOperand(0) == Cond) - return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2); + return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2); } if (ConstantExpr *FalseVal = dyn_cast(V2)) { if (FalseVal->getOpcode() == Instruction::Select) if (FalseVal->getOperand(0) == Cond) - return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2)); + return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2)); } return 0; @@ -760,16 +763,16 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, const APInt &IdxVal = CIdx->getValue(); SmallVector Result; + Type *Ty = IntegerType::get(Val->getContext(), 32); for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){ if (i == IdxVal) { Result.push_back(Elt); continue; } - if (Constant *C = Val->getAggregateElement(i)) - Result.push_back(C); - else - return 0; + Constant *C = + ConstantExpr::getExtractElement(Val, ConstantInt::get(Ty, i)); + Result.push_back(C); } return ConstantVector::get(Result); @@ -801,11 +804,15 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *InElt; if (unsigned(Elt) >= SrcNumElts*2) InElt = UndefValue::get(EltTy); - else if (unsigned(Elt) >= SrcNumElts) - InElt = V2->getAggregateElement(Elt - SrcNumElts); - else - InElt = V1->getAggregateElement(Elt); - if (InElt == 0) return 0; + else if (unsigned(Elt) >= SrcNumElts) { + Type *Ty = IntegerType::get(V2->getContext(), 32); + InElt = + ConstantExpr::getExtractElement(V2, + ConstantInt::get(Ty, Elt - SrcNumElts)); + } else { + Type *Ty = IntegerType::get(V1->getContext(), 32); + InElt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, Elt)); + } Result.push_back(InElt); } @@ -1130,16 +1137,17 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } else if (VectorType *VTy = dyn_cast(C1->getType())) { // Perform elementwise folding. SmallVector Result; + Type *Ty = IntegerType::get(VTy->getContext(), 32); for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *LHS = C1->getAggregateElement(i); - Constant *RHS = C2->getAggregateElement(i); - if (LHS == 0 || RHS == 0) break; + Constant *LHS = + ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i)); + Constant *RHS = + ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i)); Result.push_back(ConstantExpr::get(Opcode, LHS, RHS)); } - if (Result.size() == VTy->getNumElements()) - return ConstantVector::get(Result); + return ConstantVector::get(Result); } if (ConstantExpr *CE1 = dyn_cast(C1)) { @@ -1697,17 +1705,18 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, // If we can constant fold the comparison of each element, constant fold // the whole vector comparison. SmallVector ResElts; + Type *Ty = IntegerType::get(C1->getContext(), 32); // Compare the elements, producing an i1 result or constant expr. for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){ - Constant *C1E = C1->getAggregateElement(i); - Constant *C2E = C2->getAggregateElement(i); - if (C1E == 0 || C2E == 0) break; + Constant *C1E = + ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i)); + Constant *C2E = + ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i)); ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E)); } - if (ResElts.size() == C1->getType()->getVectorNumElements()) - return ConstantVector::get(ResElts); + return ConstantVector::get(ResElts); } if (C1->getType()->isFloatingPointTy()) { diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 6dbc144..a4e21e1 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -46,7 +46,7 @@ bool Constant::isNegativeZeroValue() const { // Floating point values have an explicit -0.0 value. if (const ConstantFP *CFP = dyn_cast(this)) return CFP->isZero() && CFP->isNegative(); - + // Otherwise, just use +0.0. return isNullValue(); } @@ -55,7 +55,7 @@ bool Constant::isNullValue() const { // 0 is null. if (const ConstantInt *CI = dyn_cast(this)) return CI->isZero(); - + // +0.0 is null. if (const ConstantFP *CFP = dyn_cast(this)) return CFP->isZero() && !CFP->isNegative(); @@ -161,19 +161,19 @@ Constant *Constant::getAllOnesValue(Type *Ty) { Constant *Constant::getAggregateElement(unsigned Elt) const { if (const ConstantStruct *CS = dyn_cast(this)) return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0; - + if (const ConstantArray *CA = dyn_cast(this)) return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0; - + if (const ConstantVector *CV = dyn_cast(this)) return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0; - + if (const ConstantAggregateZero *CAZ =dyn_cast(this)) return CAZ->getElementValue(Elt); - + if (const UndefValue *UV = dyn_cast(this)) return UV->getElementValue(Elt); - + if (const ConstantDataSequential *CDS =dyn_cast(this)) return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0; return 0; @@ -222,10 +222,10 @@ bool Constant::canTrap() const { // The only thing that could possibly trap are constant exprs. const ConstantExpr *CE = dyn_cast(this); if (!CE) return false; - - // ConstantExpr traps if any operands can trap. + + // ConstantExpr traps if any operands can trap. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (CE->getOperand(i)->canTrap()) + if (CE->getOperand(i)->canTrap()) return true; // Otherwise, only specific operations can trap. @@ -252,7 +252,7 @@ bool Constant::isConstantUsed() const { const Constant *UC = dyn_cast(*UI); if (UC == 0 || isa(UC)) return true; - + if (UC->isConstantUsed()) return true; } @@ -302,12 +302,12 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { cast(RHS->getOperand(0))->getFunction()) return NoRelocation; } - + PossibleRelocationsTy Result = NoRelocation; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) Result = std::max(Result, cast(getOperand(i))->getRelocationInfo()); - + return Result; } @@ -316,14 +316,14 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const { /// constantexpr. static bool removeDeadUsersOfConstant(const Constant *C) { if (isa(C)) return false; // Cannot remove this - + while (!C->use_empty()) { const Constant *User = dyn_cast(C->use_back()); if (!User) return false; // Non-constant usage; if (!removeDeadUsersOfConstant(User)) return false; // Constant wasn't dead } - + const_cast(C)->destroyConstant(); return true; } @@ -343,7 +343,7 @@ void Constant::removeDeadConstantUsers() const { ++I; continue; } - + if (!removeDeadUsersOfConstant(User)) { // If the constant wasn't dead, remember that this was the last live use // and move on to the next constant. @@ -351,7 +351,7 @@ void Constant::removeDeadConstantUsers() const { ++I; continue; } - + // If the constant was dead, then the iterator is invalidated. if (LastNonDeadUser == E) { I = use_begin(); @@ -485,7 +485,7 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) { return &APFloat::x87DoubleExtended; else if (Ty->isFP128Ty()) return &APFloat::IEEEquad; - + assert(Ty->isPPC_FP128Ty() && "Unknown FP format"); return &APFloat::PPCDoubleDouble; } @@ -497,7 +497,7 @@ void ConstantFP::anchor() { } /// 2.0/1.0 etc, that are known-valid both as double and as the target format. Constant *ConstantFP::get(Type *Ty, double V) { LLVMContext &Context = Ty->getContext(); - + APFloat FV(V); bool ignored; FV.convert(*TypeToFloatSemantics(Ty->getScalarType()), @@ -550,11 +550,11 @@ Constant *ConstantFP::getZeroValueForNegation(Type *Ty) { // ConstantFP accessors. ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { DenseMapAPFloatKeyInfo::KeyTy Key(V); - + LLVMContextImpl* pImpl = Context.pImpl; - + ConstantFP *&Slot = pImpl->FPConstants[Key]; - + if (!Slot) { Type *Ty; if (&V.getSemantics() == &APFloat::IEEEhalf) @@ -574,7 +574,7 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { } Slot = new ConstantFP(Ty, V); } - + return Slot; } @@ -695,7 +695,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef V) { "Wrong type in array element initializer"); } LLVMContextImpl *pImpl = Ty->getContext().pImpl; - + // If this is an all-zero array, return a ConstantAggregateZero object. If // all undef, return an UndefValue, if "all simple", then return a // ConstantDataArray. @@ -751,7 +751,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef V) { return ConstantDataArray::get(C->getContext(), Elts); } } - + if (ConstantFP *CFP = dyn_cast(C)) { if (CFP->getType()->isFloatTy()) { SmallVector Elts; @@ -788,7 +788,7 @@ StructType *ConstantStruct::getTypeForElements(LLVMContext &Context, SmallVector EltTypes(VecSize); for (unsigned i = 0; i != VecSize; ++i) EltTypes[i] = V[i]->getType(); - + return StructType::get(Context, EltTypes, Packed); } @@ -833,12 +833,12 @@ Constant *ConstantStruct::get(StructType *ST, ArrayRef V) { isUndef = false; } } - } + } if (isZero) return ConstantAggregateZero::get(ST); if (isUndef) return UndefValue::get(ST); - + return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); } @@ -881,12 +881,12 @@ Constant *ConstantVector::get(ArrayRef V) { break; } } - + if (isZero) return ConstantAggregateZero::get(T); if (isUndef) return UndefValue::get(T); - + // Check to see if all of the elements are ConstantFP or ConstantInt and if // the element type is compatible with ConstantDataVector. If so, use it. if (ConstantDataSequential::isElementTypeCompatible(C->getType())) { @@ -932,7 +932,7 @@ Constant *ConstantVector::get(ArrayRef V) { return ConstantDataVector::get(C->getContext(), Elts); } } - + if (ConstantFP *CFP = dyn_cast(C)) { if (CFP->getType()->isFloatTy()) { SmallVector Elts; @@ -955,7 +955,7 @@ Constant *ConstantVector::get(ArrayRef V) { } } } - + // Otherwise, the element type isn't compatible with ConstantDataVector, or // the operand list constants a ConstantExpr or something else strange. return pImpl->VectorConstants.getOrCreate(T, V); @@ -967,7 +967,7 @@ Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) { if ((isa(V) || isa(V)) && ConstantDataSequential::isElementTypeCompatible(V->getType())) return ConstantDataVector::getSplat(NumElts, V); - + SmallVector Elts(NumElts, V); return get(Elts); } @@ -1039,7 +1039,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { SmallVector NewOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) NewOps.push_back(i == OpNo ? Op : getOperand(i)); - + return getWithOperands(NewOps); } @@ -1052,7 +1052,7 @@ getWithOperands(ArrayRef Ops, Type *Ty) const { bool AnyChange = Ty != getType(); for (unsigned i = 0; i != Ops.size(); ++i) AnyChange |= Ops[i] != getOperand(i); - + if (!AnyChange) // No operands changed, return self. return const_cast(this); @@ -1177,7 +1177,7 @@ ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty]; if (Entry == 0) Entry = new ConstantAggregateZero(Ty); - + return Entry; } @@ -1232,7 +1232,7 @@ ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty]; if (Entry == 0) Entry = new ConstantPointerNull(Ty); - + return Entry; } @@ -1252,7 +1252,7 @@ UndefValue *UndefValue::get(Type *Ty) { UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty]; if (Entry == 0) Entry = new UndefValue(Ty); - + return Entry; } @@ -1277,7 +1277,7 @@ BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)]; if (BA == 0) BA = new BlockAddress(F, BB); - + assert(BA->getFunction() == F && "Basic block moved between functions"); return BA; } @@ -1305,19 +1305,19 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { // case, we have to remove the map entry. Function *NewF = getFunction(); BasicBlock *NewBB = getBasicBlock(); - + if (U == &Op<0>()) NewF = cast(To); else NewBB = cast(To); - + // See if the 'new' entry already exists, if not, just update this in place // and return early. BlockAddress *&NewBA = getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)]; if (NewBA == 0) { getBasicBlock()->AdjustBlockAddressRefCount(-1); - + // Remove the old entry, this can't cause the map to rehash (just a // tombstone will get added). getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(), @@ -1331,10 +1331,10 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { // Otherwise, I do need to replace this with an existing value. assert(NewBA != this && "I didn't contain From!"); - + // Everyone using this now uses the replacement. replaceAllUsesWith(NewBA); - + destroyConstant(); } @@ -1355,10 +1355,10 @@ static inline Constant *getFoldedCast( // Look up the constant in the table first to ensure uniqueness std::vector argVec(1, C); ExprMapKeyType Key(opc, argVec); - + return pImpl->ExprConstants.getOrCreate(Ty, Key); } - + Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) { Instruction::CastOps opc = Instruction::CastOps(oc); assert(Instruction::isCast(opc) && "opcode out of range"); @@ -1381,7 +1381,7 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) { case Instruction::IntToPtr: return getIntToPtr(C, Ty); case Instruction::BitCast: return getBitCast(C, Ty); } -} +} Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) { if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits()) @@ -1572,11 +1572,11 @@ Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) { Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) { assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) && "Invalid constantexpr bitcast!"); - + // It is common to ask for a bitcast of a value to its own type, handle this // speedily. if (C->getType() == DstTy) return C; - + return getFoldedCast(Instruction::BitCast, C, DstTy); } @@ -1588,7 +1588,7 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, "Invalid opcode in binary constant expression"); assert(C1->getType() == C2->getType() && "Operand types in binary constant expression should match"); - + #ifndef NDEBUG switch (Opcode) { case Instruction::Add: @@ -1649,11 +1649,11 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2)) return FC; // Fold a few common cases. - + std::vector argVec(1, C1); argVec.push_back(C2); ExprMapKeyType Key(Opcode, argVec, 0, Flags); - + LLVMContextImpl *pImpl = C1->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(C1->getType(), Key); } @@ -1703,7 +1703,7 @@ Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) { Constant *ConstantExpr::getCompare(unsigned short Predicate, Constant *C1, Constant *C2) { assert(C1->getType() == C2->getType() && "Op types should be identical!"); - + switch (Predicate) { default: llvm_unreachable("Invalid CmpInst predicate"); case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT: @@ -1713,7 +1713,7 @@ Constant *ConstantExpr::getCompare(unsigned short Predicate, case CmpInst::FCMP_ULT: case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE: case CmpInst::FCMP_TRUE: return getFCmp(Predicate, C1, C2); - + case CmpInst::ICMP_EQ: case CmpInst::ICMP_NE: case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT: @@ -1732,7 +1732,7 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) { argVec[1] = V1; argVec[2] = V2; ExprMapKeyType Key(Instruction::Select, argVec); - + LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(V1->getType(), Key); } @@ -1747,7 +1747,7 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef Idxs, assert(Ty && "GEP indices invalid!"); unsigned AS = C->getType()->getPointerAddressSpace(); Type *ReqTy = Ty->getPointerTo(AS); - + assert(C->getType()->isPointerTy() && "Non-pointer type for constant GetElementPtr expression"); // Look up the constant in the table first to ensure uniqueness @@ -1758,7 +1758,7 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef Idxs, ArgVec.push_back(cast(Idxs[i])); const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0, InBounds ? GEPOperator::IsInBounds : 0); - + LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } @@ -1815,15 +1815,15 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { "Tried to create extractelement operation on non-vector type!"); assert(Idx->getType()->isIntegerTy(32) && "Extractelement index must be i32 type!"); - + if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx)) return FC; // Fold a few common cases. - + // Look up the constant in the table first to ensure uniqueness std::vector ArgVec(1, Val); ArgVec.push_back(Idx); const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); - + LLVMContextImpl *pImpl = Val->getContext().pImpl; Type *ReqTy = Val->getType()->getVectorElementType(); return pImpl->ExprConstants.getOrCreate(ReqTy, Key); @@ -1845,7 +1845,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, ArgVec.push_back(Elt); ArgVec.push_back(Idx); const ExprMapKeyType Key(Instruction::InsertElement,ArgVec); - + LLVMContextImpl *pImpl = Val->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(Val->getType(), Key); } @@ -1867,7 +1867,7 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, ArgVec.push_back(V2); ArgVec.push_back(Mask); const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec); - + LLVMContextImpl *pImpl = ShufTy->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ShufTy, Key); } @@ -1892,7 +1892,7 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs); (void)ReqTy; assert(ReqTy && "extractvalue indices invalid!"); - + assert(Agg->getType()->isFirstClassType() && "Non-first-class type for constant extractvalue expression"); Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs); @@ -2007,6 +2007,47 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) { isExact ? PossiblyExactOperator::IsExact : 0); } +/// getBinOpIdentity - Return the identity for the given binary operation, +/// i.e. a constant C such that X op C = X and C op X = X for every X. It +/// returns null if the operator doesn't have an identity. +Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) { + switch (Opcode) { + default: + // Doesn't have an identity. + return 0; + + case Instruction::Add: + case Instruction::Or: + case Instruction::Xor: + return Constant::getNullValue(Ty); + + case Instruction::Mul: + return ConstantInt::get(Ty, 1); + + case Instruction::And: + return Constant::getAllOnesValue(Ty); + } +} + +/// getBinOpAbsorber - Return the absorbing element for the given binary +/// operation, i.e. a constant C such that X op C = C and C op X = C for +/// every X. For example, this returns zero for integer multiplication. +/// It returns null if the operator doesn't have an absorbing element. +Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) { + switch (Opcode) { + default: + // Doesn't have an absorber. + return 0; + + case Instruction::Or: + return Constant::getAllOnesValue(Ty); + + case Instruction::And: + case Instruction::Mul: + return Constant::getNullValue(Ty); + } +} + // destroyConstant - Remove the constant from the constant table... // void ConstantExpr::destroyConstant() { @@ -2107,7 +2148,7 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { // Do a lookup to see if we have already formed one of these. StringMap::MapEntryTy &Slot = Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements); - + // The bucket can point to a linked list of different CDS's that have the same // body but different types. For example, 0,0,0,1 could be a 4 element array // of i8, or a 1-element array of i32. They'll both end up in the same @@ -2117,7 +2158,7 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { Entry = &Node->Next, Node = *Entry) if (Node->getType() == Ty) return Node; - + // Okay, we didn't get a hit. Create a node of the right class, link it in, // and return it. if (isa(Ty)) @@ -2131,7 +2172,7 @@ void ConstantDataSequential::destroyConstant() { // Remove the constant from the StringMap. StringMap &CDSConstants = getType()->getContext().pImpl->CDSConstants; - + StringMap::iterator Slot = CDSConstants.find(getRawDataValues()); @@ -2158,11 +2199,11 @@ void ConstantDataSequential::destroyConstant() { } } } - + // If we were part of a list, make sure that we don't delete the list that is // still owned by the uniquing map. Next = 0; - + // Finally, actually delete it. destroyConstantImpl(); } @@ -2172,27 +2213,33 @@ void ConstantDataSequential::destroyConstant() { /// can return a ConstantAggregateZero object. Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); } /// getString - This method constructs a CDS and initializes it with a text @@ -2202,9 +2249,12 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { /// to disable this behavior. Constant *ConstantDataArray::getString(LLVMContext &Context, StringRef Str, bool AddNull) { - if (!AddNull) - return get(Context, ArrayRef((uint8_t*)Str.data(), Str.size())); - + if (!AddNull) { + const uint8_t *Data = reinterpret_cast(Str.data()); + return get(Context, ArrayRef(const_cast(Data), + Str.size())); + } + SmallVector ElementVals; ElementVals.append(Str.begin(), Str.end()); ElementVals.push_back(0); @@ -2216,27 +2266,33 @@ Constant *ConstantDataArray::getString(LLVMContext &Context, /// can return a ConstantAggregateZero object. Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); - return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty); + const char *Data = reinterpret_cast(Elts.data()); + return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); } Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { @@ -2281,15 +2337,19 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { assert(isa(getElementType()) && "Accessor can only be used when element is an integer"); const char *EltPtr = getElementPointer(Elt); - + // The data is stored in host byte order, make sure to cast back to the right // type to load with the right endianness. switch (getElementType()->getIntegerBitWidth()) { default: llvm_unreachable("Invalid bitwidth for CDS"); - case 8: return *(uint8_t*)EltPtr; - case 16: return *(uint16_t*)EltPtr; - case 32: return *(uint32_t*)EltPtr; - case 64: return *(uint64_t*)EltPtr; + case 8: + return *const_cast(reinterpret_cast(EltPtr)); + case 16: + return *const_cast(reinterpret_cast(EltPtr)); + case 32: + return *const_cast(reinterpret_cast(EltPtr)); + case 64: + return *const_cast(reinterpret_cast(EltPtr)); } } @@ -2301,8 +2361,14 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { switch (getElementType()->getTypeID()) { default: llvm_unreachable("Accessor can only be used when element is float/double!"); - case Type::FloatTyID: return APFloat(*(float*)EltPtr); - case Type::DoubleTyID: return APFloat(*(double*)EltPtr); + case Type::FloatTyID: { + const float *FloatPrt = reinterpret_cast(EltPtr); + return APFloat(*const_cast(FloatPrt)); + } + case Type::DoubleTyID: { + const double *DoublePtr = reinterpret_cast(EltPtr); + return APFloat(*const_cast(DoublePtr)); + } } } @@ -2311,7 +2377,8 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { assert(getElementType()->isFloatTy() && "Accessor can only be used when element is a 'float'"); - return *(float*)getElementPointer(Elt); + const float *EltPtr = reinterpret_cast(getElementPointer(Elt)); + return *const_cast(EltPtr); } /// getElementAsDouble - If this is an sequential container of doubles, return @@ -2319,7 +2386,9 @@ float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { assert(getElementType()->isDoubleTy() && "Accessor can only be used when element is a 'float'"); - return *(double*)getElementPointer(Elt); + const double *EltPtr = + reinterpret_cast(getElementPointer(Elt)); + return *const_cast(EltPtr); } /// getElementAsConstant - Return a Constant for a specified index's element. @@ -2328,7 +2397,7 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { if (getElementType()->isFloatTy() || getElementType()->isDoubleTy()) return ConstantFP::get(getContext(), getElementAsAPFloat(Elt)); - + return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); } @@ -2342,12 +2411,12 @@ bool ConstantDataSequential::isString() const { bool ConstantDataSequential::isCString() const { if (!isString()) return false; - + StringRef Str = getAsString(); - + // The last value must be nul. if (Str.back() != 0) return false; - + // Other elements must be non-nul. return Str.drop_back().find(0) == StringRef::npos; } @@ -2356,13 +2425,13 @@ bool ConstantDataSequential::isCString() const { /// elements have the same value, return that value. Otherwise return NULL. Constant *ConstantDataVector::getSplatValue() const { const char *Base = getRawDataValues().data(); - + // Compare elements 1+ to the 0'th element. unsigned EltSize = getElementByteSize(); for (unsigned i = 1, e = getNumElements(); i != e; ++i) if (memcmp(Base, Base+i*EltSize, EltSize)) return 0; - + // If they're all the same, return the 0th one as a representative. return getElementAsConstant(0); } @@ -2393,10 +2462,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Lookup.first = cast(getType()); Values.reserve(getNumOperands()); // Build replacement array. - // Fill values with the modified operands of the constant array. Also, + // Fill values with the modified operands of the constant array. Also, // compute whether this turns into an all-zeros array. unsigned NumUpdated = 0; - + // Keep track of whether all the values in the array are "ToC". bool AllSame = true; for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) { @@ -2408,7 +2477,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Values.push_back(Val); AllSame &= Val == ToC; } - + Constant *Replacement = 0; if (AllSame && ToC->isNullValue()) { Replacement = ConstantAggregateZero::get(getType()); @@ -2419,7 +2488,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, Lookup.second = makeArrayRef(Values); LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I = pImpl->ArrayConstants.find(Lookup); - + if (I != pImpl->ArrayConstants.map_end()) { Replacement = I->first; } else { @@ -2428,7 +2497,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, // old with the new, then deleting the old... just update the current one // in place! pImpl->ArrayConstants.remove(this); - + // Update to the new value. Optimize for the case when we have a single // operand that we're changing, but handle bulk updates efficiently. if (NumUpdated == 1) { @@ -2445,13 +2514,13 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, return; } } - + // Otherwise, I do need to replace this with an existing value. assert(Replacement != this && "I didn't contain From!"); - + // Everyone using this now uses the replacement. replaceAllUsesWith(Replacement); - + // Delete the old constant! destroyConstant(); } @@ -2468,8 +2537,8 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, LLVMContextImpl::StructConstantsTy::LookupKey Lookup; Lookup.first = cast(getType()); Values.reserve(getNumOperands()); // Build replacement struct. - - // Fill values with the modified operands of the constant struct. Also, + + // Fill values with the modified operands of the constant struct. Also, // compute whether this turns into an all-zeros struct. bool isAllZeros = false; bool isAllUndef = false; @@ -2492,9 +2561,9 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, Values.push_back(cast(O->get())); } Values[OperandToUpdate] = ToC; - + LLVMContextImpl *pImpl = getContext().pImpl; - + Constant *Replacement = 0; if (isAllZeros) { Replacement = ConstantAggregateZero::get(getType()); @@ -2505,7 +2574,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, Lookup.second = makeArrayRef(Values); LLVMContextImpl::StructConstantsTy::MapTy::iterator I = pImpl->StructConstants.find(Lookup); - + if (I != pImpl->StructConstants.map_end()) { Replacement = I->first; } else { @@ -2514,19 +2583,19 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, // old with the new, then deleting the old... just update the current one // in place! pImpl->StructConstants.remove(this); - + // Update to the new value. setOperand(OperandToUpdate, ToC); pImpl->StructConstants.insert(this); return; } } - + assert(Replacement != this && "I didn't contain From!"); - + // Everyone using this now uses the replacement. replaceAllUsesWith(Replacement); - + // Delete the old constant! destroyConstant(); } @@ -2534,7 +2603,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { assert(isa(To) && "Cannot make Constant refer to non-constant!"); - + SmallVector Values; Values.reserve(getNumOperands()); // Build replacement array... for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { @@ -2542,13 +2611,13 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To, if (Val == From) Val = cast(To); Values.push_back(Val); } - + Constant *Replacement = get(Values); assert(Replacement != this && "I didn't contain From!"); - + // Everyone using this now uses the replacement. replaceAllUsesWith(Replacement); - + // Delete the old constant! destroyConstant(); } @@ -2557,19 +2626,19 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, Use *U) { assert(isa(ToV) && "Cannot make Constant refer to non-constant!"); Constant *To = cast(ToV); - + SmallVector NewOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { Constant *Op = getOperand(i); NewOps.push_back(Op == From ? To : Op); } - + Constant *Replacement = getWithOperands(NewOps); assert(Replacement != this && "I didn't contain From!"); - + // Everyone using this now uses the replacement. replaceAllUsesWith(Replacement); - + // Delete the old constant! destroyConstant(); } diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index a9cca22..972db3c 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -115,6 +115,25 @@ void LLVMDumpModule(LLVMModuleRef M) { unwrap(M)->dump(); } +LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, + char **ErrorMessage) { + std::string error; + raw_fd_ostream dest(Filename, error); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + unwrap(M)->print(dest, NULL); + + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + dest.flush(); + return false; +} + /*--.. Operations on inline assembler ......................................--*/ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) { unwrap(M)->setModuleInlineAsm(StringRef(Asm)); @@ -1191,7 +1210,7 @@ LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, unsigned AddressSpace) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, GlobalValue::ExternalLinkage, 0, Name, 0, - false, AddressSpace)); + GlobalVariable::NotThreadLocal, AddressSpace)); } LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) { diff --git a/lib/VMCore/DIBuilder.cpp b/lib/VMCore/DIBuilder.cpp new file mode 100644 index 0000000..f5894e9 --- /dev/null +++ b/lib/VMCore/DIBuilder.cpp @@ -0,0 +1,1019 @@ +//===--- DIBuilder.cpp - Debug Information Builder ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DIBuilder. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DIBuilder.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" + +using namespace llvm; +using namespace llvm::dwarf; + +static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { + assert((Tag & LLVMDebugVersionMask) == 0 && + "Tag too large for debug encoding!"); + return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion); +} + +DIBuilder::DIBuilder(Module &m) + : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0), + TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0), + ValueFn(0) +{} + +/// finalize - Construct any deferred debug info descriptors. +void DIBuilder::finalize() { + DIArray Enums = getOrCreateArray(AllEnumTypes); + DIType(TempEnumTypes).replaceAllUsesWith(Enums); + + DIArray RetainTypes = getOrCreateArray(AllRetainTypes); + DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); + + DIArray SPs = getOrCreateArray(AllSubprograms); + DIType(TempSubprograms).replaceAllUsesWith(SPs); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + SmallVector Variables; + if (NamedMDNode *NMD = getFnSpecificMDNode(M, SP)) { + for (unsigned ii = 0, ee = NMD->getNumOperands(); ii != ee; ++ii) + Variables.push_back(NMD->getOperand(ii)); + NMD->eraseFromParent(); + } + if (MDNode *Temp = SP.getVariablesNodes()) { + DIArray AV = getOrCreateArray(Variables); + DIType(Temp).replaceAllUsesWith(AV); + } + } + + DIArray GVs = getOrCreateArray(AllGVs); + DIType(TempGVs).replaceAllUsesWith(GVs); +} + +/// getNonCompileUnitScope - If N is compile unit return NULL otherwise return +/// N. +static MDNode *getNonCompileUnitScope(MDNode *N) { + if (DIDescriptor(N).isCompileUnit()) + return NULL; + return N; +} + +/// createCompileUnit - A CompileUnit provides an anchor for all debugging +/// information generated during this instance of compilation. +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, StringRef Producer, + bool isOptimized, StringRef Flags, + unsigned RunTimeVer) { + assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || + (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && + "Invalid Language tag"); + assert(!Filename.empty() && + "Unable to create compile unit without filename"); + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + TempEnumTypes = MDNode::getTemporary(VMContext, TElts); + Value *THElts[] = { TempEnumTypes }; + MDNode *EnumHolder = MDNode::get(VMContext, THElts); + + TempRetainTypes = MDNode::getTemporary(VMContext, TElts); + Value *TRElts[] = { TempRetainTypes }; + MDNode *RetainHolder = MDNode::get(VMContext, TRElts); + + TempSubprograms = MDNode::getTemporary(VMContext, TElts); + Value *TSElts[] = { TempSubprograms }; + MDNode *SPHolder = MDNode::get(VMContext, TSElts); + + TempGVs = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { TempGVs }; + MDNode *GVHolder = MDNode::get(VMContext, TVElts); + + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), Lang), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + // Deprecate isMain field. + ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), + EnumHolder, + RetainHolder, + SPHolder, + GVHolder + }; + TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); + + // Create a named metadata so that it is easier to find cu in a module. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->addOperand(TheCU); +} + +/// createFile - Create a file descriptor to hold debugging information +/// for a file. +DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { + assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); + assert(!Filename.empty() && "Unable to create file without name"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_file_type), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + NULL // TheCU + }; + return DIFile(MDNode::get(VMContext, Elts)); +} + +/// createEnumerator - Create a single enumerator value. +DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { + assert(!Name.empty() && "Unable to create enumerator without name"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) + }; + return DIEnumerator(MDNode::get(VMContext, Elts)); +} + +/// createNullPtrType - Create C++0x nullptr type. +DIType DIBuilder::createNullPtrType(StringRef Name) { + assert(!Name.empty() && "Unable to create type without name"); + // nullptr is encoded in DIBasicType format. Line number, filename, + // ,size, alignment, offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), + NULL, //TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), 0) // Encoding + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createBasicType - Create debugging information entry for a basic +/// type, e.g 'char'. +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, + uint64_t AlignInBits, + unsigned Encoding) { + assert(!Name.empty() && "Unable to create type without name"); + // Basic types are encoded in DIBasicType format. Line number, filename, + // offset and flags are always empty here. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_base_type), + NULL, //TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createQualifiedType - Create debugging information entry for a qualified +/// type, e.g. 'const int'. +DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { + // Qualified types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + NULL, //TheCU, + MDString::get(VMContext, StringRef()), // Empty name. + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FromTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createPointerType - Create debugging information entry for a pointer. +DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, + uint64_t AlignInBits, StringRef Name) { + // Pointer types are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), + NULL, //TheCU, + MDString::get(VMContext, Name), + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + PointeeTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createReferenceType - Create debugging information entry for a reference +/// type. +DIType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { + assert(RTy.Verify() && "Unable to create reference type"); + // References are encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + NULL, // TheCU, + NULL, // Name + NULL, // Filename + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + RTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createTypedef - Create debugging information entry for a typedef. +DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, + unsigned LineNo, DIDescriptor Context) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid typedef type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_typedef), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + Ty + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createFriend - Create debugging information entry for a 'friend'. +DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { + // typedefs are encoded in DIDerivedType format. + assert(Ty.Verify() && "Invalid type!"); + assert(FriendTy.Verify() && "Invalid friend type!"); + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_friend), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + FriendTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createInheritance - Create debugging information entry to establish +/// inheritance relationship between two types. +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, + uint64_t BaseOffset, unsigned Flags) { + assert(Ty.Verify() && "Unable to create inheritance"); + // TAG_inheritance is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), + Ty, + NULL, // Name + Ty.getFile(), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + BaseTy + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createMemberType - Create debugging information entry for a member. +DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, StringRef PropertyName, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(File), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + MDString::get(VMContext, PropertyName), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, MDNode *PropertyNode) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + getNonCompileUnitScope(File), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + PropertyNode + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCProperty - Create debugging information entry for Objective-C +/// property. +DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, + DIFile File, unsigned LineNumber, + StringRef GetterName, + StringRef SetterName, + unsigned PropertyAttributes, + DIType Ty) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes), + Ty + }; + return DIObjCProperty(MDNode::get(VMContext, Elts)); +} + +/// createTemplateTypeParameter - Create debugging information for template +/// type parameter. +DITemplateTypeParameter +DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, + DIType Ty, MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + Ty, + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); +} + +/// createTemplateValueParameter - Create debugging information for template +/// value parameter. +DITemplateValueParameter +DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, + DIType Ty, uint64_t Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + Ty, + ConstantInt::get(Type::getInt64Ty(VMContext), Val), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) + }; + return DITemplateValueParameter(MDNode::get(VMContext, Elts)); +} + +/// createClassType - Create debugging information entry for a class. +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType DerivedFrom, DIArray Elements, + MDNode *VTableHolder, + MDNode *TemplateParams) { + // TAG_class_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_class_type), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + VTableHolder, + TemplateParams + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createStructType - Create debugging information entry for a struct. +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + unsigned Flags, DIArray Elements, + unsigned RunTimeLang) { + // TAG_structure_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + NULL, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createUnionType - Create debugging information entry for an union. +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, + DIFile File, + unsigned LineNumber, uint64_t SizeInBits, + uint64_t AlignInBits, unsigned Flags, + DIArray Elements, unsigned RunTimeLang) { + // TAG_union_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_union_type), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + NULL, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createSubroutineType - Create subroutine type. +DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { + // TAG_subroutine_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + MDString::get(VMContext, ""), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + NULL, + ParameterTypes, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createEnumerationType - Create debugging information entry for an +/// enumeration. +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + DIArray Elements, + DIType ClassType, unsigned Flags) { + // TAG_enumeration_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ClassType, + Elements, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + AllEnumTypes.push_back(Node); + return DIType(Node); +} + +/// createArrayType - Create debugging information entry for an array. +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_array_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + NULL, //TheCU, + MDString::get(VMContext, ""), + NULL, //TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createVectorType - Create debugging information entry for a vector. +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { + // TAG_vector_type is encoded in DICompositeType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_vector_type), + NULL, //TheCU, + MDString::get(VMContext, ""), + NULL, //TheCU, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), Size), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Ty, + Subscripts, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createArtificialType - Create a new DIType with "artificial" flag set. +DIType DIBuilder::createArtificialType(DIType Ty) { + if (Ty.isArtificial()) + return Ty; + + SmallVector Elts; + MDNode *N = Ty; + assert (N && "Unexpected input DIType!"); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (Value *V = N->getOperand(i)) + Elts.push_back(V); + else + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + } + + unsigned CurFlags = Ty.getFlags(); + CurFlags = CurFlags | DIType::FlagArtificial; + + // Flags are stored at this slot. + Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); + + return DIType(MDNode::get(VMContext, Elts)); +} + +/// retainType - Retain DIType in a module even if it is not referenced +/// through debug info anchors. +void DIBuilder::retainType(DIType T) { + AllRetainTypes.push_back(T); +} + +/// createUnspecifiedParameter - Create unspeicified type descriptor +/// for the subroutine type. +DIDescriptor DIBuilder::createUnspecifiedParameter() { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) + }; + return DIDescriptor(MDNode::get(VMContext, Elts)); +} + +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType() { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType(DIFile F) { + // Give the temporary MDNode a tag. It doesn't matter what tag we + // use here as long as DIType accepts it. + Value *Elts[] = { + GetTagConstant(VMContext, DW_TAG_base_type), + TheCU, + NULL, + F + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + +/// createForwardDecl - Create a temporary forward-declared type that +/// can be RAUW'd if the full type is seen. +DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, + DIDescriptor Scope, DIFile F, + unsigned Line, unsigned RuntimeLang) { + // Create a temporary MDNode. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + // To ease transition include sizes etc of 0. + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), + DIDescriptor::FlagFwdDecl), + NULL, + DIArray(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + }; + MDNode *Node = MDNode::getTemporary(VMContext, Elts); + return DIType(Node); +} + +/// getOrCreateArray - Get a DIArray, create one if required. +DIArray DIBuilder::getOrCreateArray(ArrayRef Elements) { + if (Elements.empty()) { + Value *Null = Constant::getNullValue(Type::getInt32Ty(VMContext)); + return DIArray(MDNode::get(VMContext, Null)); + } + return DIArray(MDNode::get(VMContext, Elements)); +} + +/// getOrCreateSubrange - Create a descriptor for a value range. This +/// implicitly uniques the values returned. +DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) + }; + + return DISubrange(MDNode::get(VMContext, Elts)); +} + +/// createGlobalVariable - Create a new descriptor for the specified global. +DIGlobalVariable DIBuilder:: +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, // TheCU, + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, Elts); + AllGVs.push_back(Node); + return DIGlobalVariable(Node); +} + +/// createStaticVariable - Create a new descriptor for the specified static +/// variable. +DIGlobalVariable DIBuilder:: +createStaticVariable(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, Value *Val) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_variable), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ + Val + }; + MDNode *Node = MDNode::get(VMContext, Elts); + AllGVs.push_back(Node); + return DIGlobalVariable(Node); +} + +/// createVariable - Create a new descriptor for the specified variable. +DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile File, + unsigned LineNo, DIType Ty, + bool AlwaysPreserve, unsigned Flags, + unsigned ArgNo) { + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), + Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Constant::getNullValue(Type::getInt32Ty(VMContext)) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + if (AlwaysPreserve) { + // The optimizer may remove local variable. If there is an interest + // to preserve variable info in such situation then stash it in a + // named mdnode. + DISubprogram Fn(getDISubprogram(Scope)); + NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); + FnLocals->addOperand(Node); + } + return DIVariable(Node); +} + +/// createComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, + StringRef Name, DIFile F, + unsigned LineNo, + DIType Ty, ArrayRef Addr, + unsigned ArgNo) { + SmallVector Elts; + Elts.push_back(GetTagConstant(VMContext, Tag)); + Elts.push_back(getNonCompileUnitScope(Scope)), + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(F); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), + (LineNo | (ArgNo << 24)))); + Elts.push_back(Ty); + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.append(Addr.begin(), Addr.end()); + + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// createFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::createFunction(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile File, unsigned LineNo, + DIType Ty, + bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, + unsigned Flags, bool isOptimized, + Function *Fn, + MDNode *TParams, + MDNode *Decl) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), + NULL, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn, + TParams, + Decl, + THolder, + ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + + // Create a named metadata so that we do not lose this mdnode. + AllSubprograms.push_back(Node); + return DISubprogram(Node); +} + +/// createMethod - Create a new descriptor for the specified C++ method. +DISubprogram DIBuilder::createMethod(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile F, + unsigned LineNo, DIType Ty, + bool isLocalToUnit, + bool isDefinition, + unsigned VK, unsigned VIndex, + MDNode *VTableHolder, + unsigned Flags, + bool isOptimized, + Function *Fn, + MDNode *TParam) { + Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; + MDNode *Temp = MDNode::getTemporary(VMContext, TElts); + Value *TVElts[] = { Temp }; + MDNode *THolder = MDNode::get(VMContext, TVElts); + + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + getNonCompileUnitScope(Context), + MDString::get(VMContext, Name), + MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Ty, + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + VTableHolder, + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn, + TParam, + Constant::getNullValue(Type::getInt32Ty(VMContext)), + THolder, + // FIXME: Do we want to use different scope/lines? + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + return DISubprogram(Node); +} + +/// createNameSpace - This creates new descriptor for a namespace +/// with the specified parent scope. +DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNo) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_namespace), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) + }; + return DINameSpace(MDNode::get(VMContext, Elts)); +} + +/// createLexicalBlockFile - This creates a new MDNode that encapsulates +/// an existing scope with a new filename. +DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, + DIFile File) { + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + Scope, + File + }; + return DILexicalBlockFile(MDNode::get(VMContext, Elts)); +} + +DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, + unsigned Line, unsigned Col) { + // Defeat MDNode uniqing for lexical blocks by using unique id. + static unsigned int unique_id = 0; + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + getNonCompileUnitScope(Scope), + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + ConstantInt::get(Type::getInt32Ty(VMContext), Col), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) + }; + return DILexicalBlock(MDNode::get(VMContext, Elts)); +} + +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, + Instruction *InsertBefore) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; + return CallInst::Create(DeclareFn, Args, "", InsertBefore); +} + +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(Storage && "no storage passed to dbg.declare"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; + + // If this block already has a terminator then insert this intrinsic + // before the terminator. + if (TerminatorInst *T = InsertAtEnd->getTerminator()) + return CallInst::Create(DeclareFn, Args, "", T); + else + return CallInst::Create(DeclareFn, Args, "", InsertAtEnd); +} + +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), V), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, "", InsertBefore); +} + +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, + DIVariable VarInfo, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Args[] = { MDNode::get(V->getContext(), V), + ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), + VarInfo }; + return CallInst::Create(ValueFn, Args, "", InsertAtEnd); +} diff --git a/lib/VMCore/DebugInfo.cpp b/lib/VMCore/DebugInfo.cpp new file mode 100644 index 0000000..c8f8f7d --- /dev/null +++ b/lib/VMCore/DebugInfo.cpp @@ -0,0 +1,1168 @@ +//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the helper classes used to build and interpret debug +// information in LLVM IR form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::dwarf; + +//===----------------------------------------------------------------------===// +// DIDescriptor +//===----------------------------------------------------------------------===// + +DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { +} + +DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { +} + +StringRef +DIDescriptor::getStringField(unsigned Elt) const { + if (DbgNode == 0) + return StringRef(); + + if (Elt < DbgNode->getNumOperands()) + if (MDString *MDS = dyn_cast_or_null(DbgNode->getOperand(Elt))) + return MDS->getString(); + + return StringRef(); +} + +uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + if (ConstantInt *CI = dyn_cast_or_null(DbgNode->getOperand(Elt))) + return CI->getZExtValue(); + + return 0; +} + +DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { + if (DbgNode == 0) + return DIDescriptor(); + + if (Elt < DbgNode->getNumOperands()) + return + DIDescriptor(dyn_cast_or_null(DbgNode->getOperand(Elt))); + return DIDescriptor(); +} + +GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return 0; +} + +Constant *DIDescriptor::getConstantField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return 0; +} + +Function *DIDescriptor::getFunctionField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return 0; +} + +unsigned DIVariable::getNumAddrElements() const { + if (getVersion() <= LLVMDebugVersion8) + return DbgNode->getNumOperands()-6; + if (getVersion() == LLVMDebugVersion9) + return DbgNode->getNumOperands()-7; + return DbgNode->getNumOperands()-8; +} + +/// getInlinedAt - If this variable is inlined then return inline location. +MDNode *DIVariable::getInlinedAt() const { + if (getVersion() <= LLVMDebugVersion9) + return NULL; + return dyn_cast_or_null(DbgNode->getOperand(7)); +} + +//===----------------------------------------------------------------------===// +// Predicates +//===----------------------------------------------------------------------===// + +/// isBasicType - Return true if the specified tag is legal for +/// DIBasicType. +bool DIDescriptor::isBasicType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_unspecified_type: + return true; + default: + return false; + } +} + +/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. +bool DIDescriptor::isDerivedType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_rvalue_reference_type: + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + case dwarf::DW_TAG_restrict_type: + case dwarf::DW_TAG_member: + case dwarf::DW_TAG_inheritance: + case dwarf::DW_TAG_friend: + return true; + default: + // CompositeTypes are currently modelled as DerivedTypes. + return isCompositeType(); + } +} + +/// isCompositeType - Return true if the specified tag is legal for +/// DICompositeType. +bool DIDescriptor::isCompositeType() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_array_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_subroutine_type: + case dwarf::DW_TAG_class_type: + return true; + default: + return false; + } +} + +/// isVariable - Return true if the specified tag is legal for DIVariable. +bool DIDescriptor::isVariable() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_auto_variable: + case dwarf::DW_TAG_arg_variable: + case dwarf::DW_TAG_return_variable: + return true; + default: + return false; + } +} + +/// isType - Return true if the specified tag is legal for DIType. +bool DIDescriptor::isType() const { + return isBasicType() || isCompositeType() || isDerivedType(); +} + +/// isSubprogram - Return true if the specified tag is legal for +/// DISubprogram. +bool DIDescriptor::isSubprogram() const { + return DbgNode && getTag() == dwarf::DW_TAG_subprogram; +} + +/// isGlobalVariable - Return true if the specified tag is legal for +/// DIGlobalVariable. +bool DIDescriptor::isGlobalVariable() const { + return DbgNode && (getTag() == dwarf::DW_TAG_variable || + getTag() == dwarf::DW_TAG_constant); +} + +/// isGlobal - Return true if the specified tag is legal for DIGlobal. +bool DIDescriptor::isGlobal() const { + return isGlobalVariable(); +} + +/// isUnspecifiedParmeter - Return true if the specified tag is +/// DW_TAG_unspecified_parameters. +bool DIDescriptor::isUnspecifiedParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters; +} + +/// isScope - Return true if the specified tag is one of the scope +/// related tag. +bool DIDescriptor::isScope() const { + if (!DbgNode) return false; + switch (getTag()) { + case dwarf::DW_TAG_compile_unit: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subprogram: + case dwarf::DW_TAG_namespace: + return true; + default: + break; + } + return false; +} + +/// isTemplateTypeParameter - Return true if the specified tag is +/// DW_TAG_template_type_parameter. +bool DIDescriptor::isTemplateTypeParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter; +} + +/// isTemplateValueParameter - Return true if the specified tag is +/// DW_TAG_template_value_parameter. +bool DIDescriptor::isTemplateValueParameter() const { + return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter; +} + +/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. +bool DIDescriptor::isCompileUnit() const { + return DbgNode && getTag() == dwarf::DW_TAG_compile_unit; +} + +/// isFile - Return true if the specified tag is DW_TAG_file_type. +bool DIDescriptor::isFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_file_type; +} + +/// isNameSpace - Return true if the specified tag is DW_TAG_namespace. +bool DIDescriptor::isNameSpace() const { + return DbgNode && getTag() == dwarf::DW_TAG_namespace; +} + +/// isLexicalBlockFile - Return true if the specified descriptor is a +/// lexical block with an extra file. +bool DIDescriptor::isLexicalBlockFile() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() == 3); +} + +/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. +bool DIDescriptor::isLexicalBlock() const { + return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && + (DbgNode->getNumOperands() > 3); +} + +/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. +bool DIDescriptor::isSubrange() const { + return DbgNode && getTag() == dwarf::DW_TAG_subrange_type; +} + +/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. +bool DIDescriptor::isEnumerator() const { + return DbgNode && getTag() == dwarf::DW_TAG_enumerator; +} + +/// isObjCProperty - Return true if the specified tag is DW_TAG +bool DIDescriptor::isObjCProperty() const { + return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; +} +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +DIType::DIType(const MDNode *N) : DIScope(N) { + if (!N) return; + if (!isBasicType() && !isDerivedType() && !isCompositeType()) { + DbgNode = 0; + } +} + +unsigned DIArray::getNumElements() const { + if (!DbgNode) + return 0; + return DbgNode->getNumOperands(); +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(DIDescriptor &D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null(DN); + Node->replaceAllUsesWith(const_cast(V)); + MDNode::deleteTemporary(Node); + } +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. +void DIType::replaceAllUsesWith(MDNode *D) { + if (!DbgNode) + return; + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (DbgNode != D) { + MDNode *Node = const_cast(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null(DN); + Node->replaceAllUsesWith(const_cast(V)); + MDNode::deleteTemporary(Node); + } +} + +/// isUnsignedDIType - Return true if type encoding is unsigned. +bool DIType::isUnsignedDIType() { + DIDerivedType DTy(DbgNode); + if (DTy.Verify()) + return DTy.getTypeDerivedFrom().isUnsignedDIType(); + + DIBasicType BTy(DbgNode); + if (BTy.Verify()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char) + return true; + } + return false; +} + +/// Verify - Verify that a compile unit is well formed. +bool DICompileUnit::Verify() const { + if (!DbgNode) + return false; + StringRef N = getFilename(); + if (N.empty()) + return false; + // It is possible that directory and produce string is empty. + return true; +} + +/// Verify - Verify that an ObjC property is well formed. +bool DIObjCProperty::Verify() const { + if (!DbgNode) + return false; + unsigned Tag = getTag(); + if (Tag != dwarf::DW_TAG_APPLE_property) return false; + DIType Ty = getType(); + if (!Ty.Verify()) return false; + + // Don't worry about the rest of the strings for now. + return true; +} + +/// Verify - Verify that a type descriptor is well formed. +bool DIType::Verify() const { + if (!DbgNode) + return false; + if (getContext() && !getContext().Verify()) + return false; + unsigned Tag = getTag(); + if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && + Tag != dwarf::DW_TAG_reference_type && + Tag != dwarf::DW_TAG_rvalue_reference_type && + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_vector_type && + Tag != dwarf::DW_TAG_array_type && + Tag != dwarf::DW_TAG_enumeration_type && + Tag != dwarf::DW_TAG_subroutine_type && + getFilename().empty()) + return false; + return true; +} + +/// Verify - Verify that a basic type descriptor is well formed. +bool DIBasicType::Verify() const { + return isBasicType(); +} + +/// Verify - Verify that a derived type descriptor is well formed. +bool DIDerivedType::Verify() const { + return isDerivedType(); +} + +/// Verify - Verify that a composite type descriptor is well formed. +bool DICompositeType::Verify() const { + if (!DbgNode) + return false; + if (getContext() && !getContext().Verify()) + return false; + + return true; +} + +/// Verify - Verify that a subprogram descriptor is well formed. +bool DISubprogram::Verify() const { + if (!DbgNode) + return false; + + if (getContext() && !getContext().Verify()) + return false; + + DICompositeType Ty = getType(); + if (!Ty.Verify()) + return false; + return true; +} + +/// Verify - Verify that a global variable descriptor is well formed. +bool DIGlobalVariable::Verify() const { + if (!DbgNode) + return false; + + if (getDisplayName().empty()) + return false; + + if (getContext() && !getContext().Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + if (!getGlobal() && !getConstant()) + return false; + + return true; +} + +/// Verify - Verify that a variable descriptor is well formed. +bool DIVariable::Verify() const { + if (!DbgNode) + return false; + + if (getContext() && !getContext().Verify()) + return false; + + DIType Ty = getType(); + if (!Ty.Verify()) + return false; + + return true; +} + +/// Verify - Verify that a location descriptor is well formed. +bool DILocation::Verify() const { + if (!DbgNode) + return false; + + return DbgNode->getNumOperands() == 4; +} + +/// Verify - Verify that a namespace descriptor is well formed. +bool DINameSpace::Verify() const { + if (!DbgNode) + return false; + if (getName().empty()) + return false; + return true; +} + +/// getOriginalTypeSize - If this type is derived from a base type then +/// return base type size. +uint64_t DIDerivedType::getOriginalTypeSize() const { + unsigned Tag = getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return getSizeInBits(); + + DIType BaseType = getTypeDerivedFrom(); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return getSizeInBits(); + + if (BaseType.isDerivedType()) + return DIDerivedType(BaseType).getOriginalTypeSize(); + + return BaseType.getSizeInBits(); +} + +/// getObjCProperty - Return property node, if this ivar is associated with one. +MDNode *DIDerivedType::getObjCProperty() const { + if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10) + return NULL; + return dyn_cast_or_null(DbgNode->getOperand(10)); +} + +/// isInlinedFnArgument - Return true if this variable provides debugging +/// information for an inlined function arguments. +bool DIVariable::isInlinedFnArgument(const Function *CurFn) { + assert(CurFn && "Invalid function"); + if (!getContext().isSubprogram()) + return false; + // This variable is not inlined function argument if its scope + // does not describe current function. + return !DISubprogram(getContext()).describes(CurFn); +} + +/// describes - Return true if this subprogram provides debugging +/// information for the function F. +bool DISubprogram::describes(const Function *F) { + assert(F && "Invalid function"); + if (F == getFunction()) + return true; + StringRef Name = getLinkageName(); + if (Name.empty()) + Name = getName(); + if (F->getName() == Name) + return true; + return false; +} + +unsigned DISubprogram::isOptimized() const { + assert (DbgNode && "Invalid subprogram descriptor!"); + if (DbgNode->getNumOperands() == 16) + return getUnsignedField(15); + return 0; +} + +MDNode *DISubprogram::getVariablesNodes() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return NULL; + if (MDNode *Temp = dyn_cast_or_null(DbgNode->getOperand(19))) + return dyn_cast_or_null(Temp->getOperand(0)); + return NULL; +} + +DIArray DISubprogram::getVariables() const { + if (!DbgNode || DbgNode->getNumOperands() <= 19) + return DIArray(); + if (MDNode *T = dyn_cast_or_null(DbgNode->getOperand(19))) + if (MDNode *A = dyn_cast_or_null(T->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +StringRef DIScope::getFilename() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getFilename(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getFilename(); + if (isSubprogram()) + return DISubprogram(DbgNode).getFilename(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getFilename(); + if (isNameSpace()) + return DINameSpace(DbgNode).getFilename(); + if (isType()) + return DIType(DbgNode).getFilename(); + if (isFile()) + return DIFile(DbgNode).getFilename(); + llvm_unreachable("Invalid DIScope!"); +} + +StringRef DIScope::getDirectory() const { + if (!DbgNode) + return StringRef(); + if (isLexicalBlockFile()) + return DILexicalBlockFile(DbgNode).getDirectory(); + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getDirectory(); + if (isSubprogram()) + return DISubprogram(DbgNode).getDirectory(); + if (isCompileUnit()) + return DICompileUnit(DbgNode).getDirectory(); + if (isNameSpace()) + return DINameSpace(DbgNode).getDirectory(); + if (isType()) + return DIType(DbgNode).getDirectory(); + if (isFile()) + return DIFile(DbgNode).getDirectory(); + llvm_unreachable("Invalid DIScope!"); +} + +DIArray DICompileUnit::getEnumTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(10))) + if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getRetainedTypes() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(11))) + if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +DIArray DICompileUnit::getSubprograms() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(12))) + if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + + +DIArray DICompileUnit::getGlobalVariables() const { + if (!DbgNode || DbgNode->getNumOperands() < 14) + return DIArray(); + + if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(13))) + if (MDNode *A = dyn_cast_or_null(N->getOperand(0))) + return DIArray(A); + return DIArray(); +} + +/// fixupObjcLikeName - Replace contains special characters used +/// in a typical Objective-C names with '.' in a given string. +static void fixupObjcLikeName(StringRef Str, SmallVectorImpl &Out) { + bool isObjCLike = false; + for (size_t i = 0, e = Str.size(); i < e; ++i) { + char C = Str[i]; + if (C == '[') + isObjCLike = true; + + if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' || + C == '+' || C == '(' || C == ')')) + Out.push_back('.'); + else + Out.push_back(C); + } +} + +/// getFnSpecificMDNode - Return a NameMDNode, if available, that is +/// suitable to hold function specific information. +NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) { + SmallString<32> Name = StringRef("llvm.dbg.lv."); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); + return M.getNamedMetadata(Name.str()); +} + +/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable +/// to hold function specific information. +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) { + SmallString<32> Name = StringRef("llvm.dbg.lv."); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + else + FName = Fn.getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + fixupObjcLikeName(FName, Name); + + return M.getOrInsertNamedMetadata(Name.str()); +} + +/// createInlinedVariable - Create a new inlined variable based on current +/// variable. +/// @param DV Current Variable. +/// @param InlinedScope Location at current variable is inlined. +DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, + LLVMContext &VMContext) { + SmallVector Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? Elts.push_back(InlinedScope) : + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// cleanseInlinedVariable - Remove inlined scope from the variable. +DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { + SmallVector Elts; + // Insert inlined scope as 7th element. + for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) + i == 7 ? + Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))): + Elts.push_back(DV->getOperand(i)); + return DIVariable(MDNode::get(VMContext, Elts)); +} + +/// getDISubprogram - Find subprogram that is enclosing this scope. +DISubprogram llvm::getDISubprogram(const MDNode *Scope) { + DIDescriptor D(Scope); + if (D.isSubprogram()) + return DISubprogram(Scope); + + if (D.isLexicalBlockFile()) + return getDISubprogram(DILexicalBlockFile(Scope).getContext()); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(Scope).getContext()); + + return DISubprogram(); +} + +/// getDICompositeType - Find underlying composite type. +DICompositeType llvm::getDICompositeType(DIType T) { + if (T.isCompositeType()) + return DICompositeType(T); + + if (T.isDerivedType()) + return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); + + return DICompositeType(); +} + +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool llvm::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(DIType(Context).getContext()); + return false; +} + +//===----------------------------------------------------------------------===// +// DebugInfoFinder implementations. +//===----------------------------------------------------------------------===// + +/// processModule - Process entire module and collect debug info. +void DebugInfoFinder::processModule(Module &M) { + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CU(CU_Nodes->getOperand(i)); + addCompileUnit(CU); + if (CU.getVersion() > LLVMDebugVersion10) { + DIArray GVs = CU.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(GVs.getElement(i)); + if (addGlobalVariable(DIG)) + processType(DIG.getType()); + } + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + processSubprogram(DISubprogram(SPs.getElement(i))); + DIArray EnumTypes = CU.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + processType(DIType(EnumTypes.getElement(i))); + DIArray RetainedTypes = CU.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + processType(DIType(RetainedTypes.getElement(i))); + return; + } + } + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; + ++BI) { + if (DbgDeclareInst *DDI = dyn_cast(BI)) + processDeclare(DDI); + + DebugLoc Loc = BI->getDebugLoc(); + if (Loc.isUnknown()) + continue; + + LLVMContext &Ctx = BI->getContext(); + DIDescriptor Scope(Loc.getScope(Ctx)); + + if (Scope.isCompileUnit()) + addCompileUnit(DICompileUnit(Scope)); + else if (Scope.isSubprogram()) + processSubprogram(DISubprogram(Scope)); + else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Scope); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } + else if (Scope.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(Scope)); + + if (MDNode *IA = Loc.getInlinedAt(Ctx)) + processLocation(DILocation(IA)); + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIGlobalVariable DIG(cast(NMD->getOperand(i))); + if (addGlobalVariable(DIG)) { + if (DIG.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } + } + } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + processSubprogram(DISubprogram(NMD->getOperand(i))); +} + +/// processLocation - Process DILocation. +void DebugInfoFinder::processLocation(DILocation Loc) { + if (!Loc.Verify()) return; + DIDescriptor S(Loc.getScope()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S)); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S)); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S)); + else if (S.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(S); + processLexicalBlock(DILexicalBlock(DBF.getScope())); + } + processLocation(Loc.getOrigLocation()); +} + +/// processType - Process DIType. +void DebugInfoFinder::processType(DIType DT) { + if (!addType(DT)) + return; + if (DT.getVersion() <= LLVMDebugVersion10) + addCompileUnit(DT.getCompileUnit()); + if (DT.isCompositeType()) { + DICompositeType DCT(DT); + processType(DCT.getTypeDerivedFrom()); + DIArray DA = DCT.getTypeArray(); + for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { + DIDescriptor D = DA.getElement(i); + if (D.isType()) + processType(DIType(D)); + else if (D.isSubprogram()) + processSubprogram(DISubprogram(D)); + } + } else if (DT.isDerivedType()) { + DIDerivedType DDT(DT); + processType(DDT.getTypeDerivedFrom()); + } +} + +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context)); + else if (Context.isLexicalBlockFile()) { + DILexicalBlockFile DBF = DILexicalBlockFile(Context); + return processLexicalBlock(DILexicalBlock(DBF.getScope())); + } + else + return processSubprogram(DISubprogram(Context)); +} + +/// processSubprogram - Process DISubprogram. +void DebugInfoFinder::processSubprogram(DISubprogram SP) { + if (!addSubprogram(SP)) + return; + if (SP.getVersion() <= LLVMDebugVersion10) + addCompileUnit(SP.getCompileUnit()); + processType(SP.getType()); +} + +/// processDeclare - Process DbgDeclareInst. +void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { + MDNode *N = dyn_cast(DDI->getVariable()); + if (!N) return; + + DIDescriptor DV(N); + if (!DV.isVariable()) + return; + + if (!NodesSeen.insert(DV)) + return; + if (DIVariable(N).getVersion() <= LLVMDebugVersion10) + addCompileUnit(DIVariable(N).getCompileUnit()); + processType(DIVariable(N).getType()); +} + +/// addType - Add type into Tys. +bool DebugInfoFinder::addType(DIType DT) { + if (!DT.isValid()) + return false; + + if (!NodesSeen.insert(DT)) + return false; + + TYs.push_back(DT); + return true; +} + +/// addCompileUnit - Add compile unit into CUs. +bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { + if (!CU.Verify()) + return false; + + if (!NodesSeen.insert(CU)) + return false; + + CUs.push_back(CU); + return true; +} + +/// addGlobalVariable - Add global variable into GVs. +bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { + if (!DIDescriptor(DIG).isGlobalVariable()) + return false; + + if (!NodesSeen.insert(DIG)) + return false; + + GVs.push_back(DIG); + return true; +} + +// addSubprogram - Add subprgoram into SPs. +bool DebugInfoFinder::addSubprogram(DISubprogram SP) { + if (!DIDescriptor(SP).isSubprogram()) + return false; + + if (!NodesSeen.insert(SP)) + return false; + + SPs.push_back(SP); + return true; +} + +//===----------------------------------------------------------------------===// +// DIDescriptor: dump routines for all descriptors. +//===----------------------------------------------------------------------===// + +/// dump - Print descriptor to dbgs() with a newline. +void DIDescriptor::dump() const { + print(dbgs()); dbgs() << '\n'; +} + +/// print - Print descriptor. +void DIDescriptor::print(raw_ostream &OS) const { + if (!DbgNode) return; + + if (const char *Tag = dwarf::TagString(getTag())) + OS << "[ " << Tag << " ]"; + + if (this->isSubrange()) { + DISubrange(DbgNode).printInternal(OS); + } else if (this->isCompileUnit()) { + DICompileUnit(DbgNode).printInternal(OS); + } else if (this->isFile()) { + DIFile(DbgNode).printInternal(OS); + } else if (this->isEnumerator()) { + DIEnumerator(DbgNode).printInternal(OS); + } else if (this->isBasicType()) { + DIType(DbgNode).printInternal(OS); + } else if (this->isDerivedType()) { + DIDerivedType(DbgNode).printInternal(OS); + } else if (this->isCompositeType()) { + DICompositeType(DbgNode).printInternal(OS); + } else if (this->isSubprogram()) { + DISubprogram(DbgNode).printInternal(OS); + } else if (this->isGlobalVariable()) { + DIGlobalVariable(DbgNode).printInternal(OS); + } else if (this->isVariable()) { + DIVariable(DbgNode).printInternal(OS); + } else if (this->isObjCProperty()) { + DIObjCProperty(DbgNode).printInternal(OS); + } else if (this->isScope()) { + DIScope(DbgNode).printInternal(OS); + } +} + +void DISubrange::printInternal(raw_ostream &OS) const { + OS << " [" << getLo() << ", " << getHi() << ']'; +} + +void DIScope::printInternal(raw_ostream &OS) const { + OS << " [" << getDirectory() << "/" << getFilename() << ']'; +} + +void DICompileUnit::printInternal(raw_ostream &OS) const { + DIScope::printInternal(OS); + if (unsigned Lang = getLanguage()) + OS << " [" << dwarf::LanguageString(Lang) << ']'; +} + +void DIEnumerator::printInternal(raw_ostream &OS) const { + OS << " [" << getName() << " :: " << getEnumValue() << ']'; +} + +void DIType::printInternal(raw_ostream &OS) const { + if (!DbgNode) return; + + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << "]"; + + // TODO: Print context? + + OS << " [line " << getLineNumber() + << ", size " << getSizeInBits() + << ", align " << getAlignInBits() + << ", offset " << getOffsetInBits(); + if (isBasicType()) + if (const char *Enc = + dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding())) + OS << ", enc " << Enc; + OS << "]"; + + if (isPrivate()) + OS << " [private]"; + else if (isProtected()) + OS << " [protected]"; + + if (isForwardDecl()) + OS << " [fwd]"; +} + +void DIDerivedType::printInternal(raw_ostream &OS) const { + DIType::printInternal(OS); + OS << " [from " << getTypeDerivedFrom().getName() << ']'; +} + +void DICompositeType::printInternal(raw_ostream &OS) const { + DIType::printInternal(OS); + DIArray A = getTypeArray(); + OS << " [" << A.getNumElements() << " elements]"; +} + +void DISubprogram::printInternal(raw_ostream &OS) const { + // TODO : Print context + OS << " [line " << getLineNumber() << ']'; + + if (isLocalToUnit()) + OS << " [local]"; + + if (isDefinition()) + OS << " [def]"; + + if (getScopeLineNumber() != getLineNumber()) + OS << " [scope " << getScopeLineNumber() << "]"; + + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; +} + +void DIGlobalVariable::printInternal(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; + + OS << " [line " << getLineNumber() << ']'; + + // TODO : Print context + + if (isLocalToUnit()) + OS << " [local]"; + + if (isDefinition()) + OS << " [def]"; +} + +void DIVariable::printInternal(raw_ostream &OS) const { + StringRef Res = getName(); + if (!Res.empty()) + OS << " [" << Res << ']'; + + OS << " [line " << getLineNumber() << ']'; +} + +void DIObjCProperty::printInternal(raw_ostream &OS) const { + StringRef Name = getObjCPropertyName(); + if (!Name.empty()) + OS << " [" << Name << ']'; + + OS << " [line " << getLineNumber() + << ", properties " << getUnsignedField(6) << ']'; +} + +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << ""; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; + } + } +} + +void DIVariable::printExtendedName(raw_ostream &OS) const { + const LLVMContext &Ctx = DbgNode->getContext(); + StringRef Res = getName(); + if (!Res.empty()) + OS << Res << "," << getLineNumber(); + if (MDNode *InlinedAt = getInlinedAt()) { + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); + if (!InlinedAtDL.isUnknown()) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp index 9013d28..c6a3053 100644 --- a/lib/VMCore/DebugLoc.cpp +++ b/lib/VMCore/DebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DebugLoc.h" +#include "llvm/DebugInfo.h" #include "llvm/ADT/DenseMapInfo.h" #include "LLVMContextImpl.h" using namespace llvm; @@ -114,34 +115,19 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { /// getFromDILocation - Translate the DILocation quad into a DebugLoc. DebugLoc DebugLoc::getFromDILocation(MDNode *N) { - if (N == 0 || N->getNumOperands() != 4) return DebugLoc(); - - MDNode *Scope = dyn_cast_or_null(N->getOperand(2)); + DILocation Loc(N); + MDNode *Scope = Loc.getScope(); if (Scope == 0) return DebugLoc(); - - unsigned LineNo = 0, ColNo = 0; - if (ConstantInt *Line = dyn_cast_or_null(N->getOperand(0))) - LineNo = Line->getZExtValue(); - if (ConstantInt *Col = dyn_cast_or_null(N->getOperand(1))) - ColNo = Col->getZExtValue(); - - return get(LineNo, ColNo, Scope, dyn_cast_or_null(N->getOperand(3))); + return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope, + Loc.getOrigLocation()); } /// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc. DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) { - if (N == 0 || N->getNumOperands() < 3) return DebugLoc(); - - MDNode *Scope = dyn_cast_or_null(N->getOperand(1)); + DILexicalBlock LexBlock(N); + MDNode *Scope = LexBlock.getContext(); if (Scope == 0) return DebugLoc(); - - unsigned LineNo = 0, ColNo = 0; - if (ConstantInt *Line = dyn_cast_or_null(N->getOperand(2))) - LineNo = Line->getZExtValue(); - if (ConstantInt *Col = dyn_cast_or_null(N->getOperand(3))) - ColNo = Col->getZExtValue(); - - return get(LineNo, ColNo, Scope, NULL); + return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL); } void DebugLoc::dump(const LLVMContext &Ctx) const { @@ -164,22 +150,10 @@ void DebugLoc::dump(const LLVMContext &Ctx) const { // DenseMap specialization //===----------------------------------------------------------------------===// -DebugLoc DenseMapInfo::getEmptyKey() { - return DebugLoc::getEmptyKey(); -} - -DebugLoc DenseMapInfo::getTombstoneKey() { - return DebugLoc::getTombstoneKey(); -} - unsigned DenseMapInfo::getHashValue(const DebugLoc &Key) { return static_cast(hash_combine(Key.LineCol, Key.ScopeIdx)); } -bool DenseMapInfo::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) { - return LHS == RHS; -} - //===----------------------------------------------------------------------===// // LLVMContextImpl Implementation //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index 219e631..682d928 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -39,6 +39,22 @@ static cl::opt VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), cl::desc("Verify dominator info (time consuming)")); +namespace llvm { + class BasicBlockEdge { + const BasicBlock *Start; + const BasicBlock *End; + public: + BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) : + Start(Start_), End(End_) { } + const BasicBlock *getStart() const { + return Start; + } + const BasicBlock *getEnd() const { + return End; + } + }; +} + //===----------------------------------------------------------------------===// // DominatorTree Implementation //===----------------------------------------------------------------------===// @@ -142,12 +158,22 @@ bool DominatorTree::dominates(const Instruction *Def, // Invoke results are only usable in the normal destination, not in the // exceptional destination. BasicBlock *NormalDest = II->getNormalDest(); - if (!dominates(NormalDest, UseBB)) + BasicBlockEdge E(DefBB, NormalDest); + return dominates(E, UseBB); +} + +bool DominatorTree::dominates(const BasicBlockEdge &BBE, + const BasicBlock *UseBB) const { + // If the BB the edge ends in doesn't dominate the use BB, then the + // edge also doesn't. + const BasicBlock *Start = BBE.getStart(); + const BasicBlock *End = BBE.getEnd(); + if (!dominates(End, UseBB)) return false; - // Simple case: if the normal destination has a single predecessor, the - // fact that it dominates the use block implies that we also do. - if (NormalDest->getSinglePredecessor()) + // Simple case: if the end BB has a single predecessor, the fact that it + // dominates the use block implies that the edge also does. + if (End->getSinglePredecessor()) return true; // The normal edge from the invoke is critical. Conceptually, what we would @@ -170,29 +196,40 @@ bool DominatorTree::dominates(const Instruction *Def, // trivially dominates itself, so we only have to find if it dominates the // other predecessors. Since the only way out of X is via NormalDest, X can // only properly dominate a node if NormalDest dominates that node too. - for (pred_iterator PI = pred_begin(NormalDest), - E = pred_end(NormalDest); PI != E; ++PI) { + for (const_pred_iterator PI = pred_begin(End), E = pred_end(End); + PI != E; ++PI) { const BasicBlock *BB = *PI; - if (BB == DefBB) + if (BB == Start) continue; - if (!DT->isReachableFromEntry(BB)) - continue; - - if (!dominates(NormalDest, BB)) + if (!dominates(End, BB)) return false; } return true; } -bool DominatorTree::dominates(const Instruction *Def, +bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { - Instruction *UserInst = dyn_cast(U.getUser()); + Instruction *UserInst = cast(U.getUser()); + // A PHI in the end of the edge is dominated by it. + PHINode *PN = dyn_cast(UserInst); + if (PN && PN->getParent() == BBE.getEnd() && + PN->getIncomingBlock(U) == BBE.getStart()) + return true; - // Instructions do not dominate non-instructions. - if (!UserInst) - return false; + // Otherwise use the edge-dominates-block query, which + // handles the crazy critical edge cases properly. + const BasicBlock *UseBB; + if (PN) + UseBB = PN->getIncomingBlock(U); + else + UseBB = UserInst->getParent(); + return dominates(BBE, UseBB); +} +bool DominatorTree::dominates(const Instruction *Def, + const Use &U) const { + Instruction *UserInst = cast(U.getUser()); const BasicBlock *DefBB = Def->getParent(); // Determine the block in which the use happens. PHI nodes use @@ -218,17 +255,9 @@ bool DominatorTree::dominates(const Instruction *Def, // their own block, except possibly a phi, so we don't need to // walk the block in any case. if (const InvokeInst *II = dyn_cast(Def)) { - // A PHI in the normal successor using the invoke's return value is - // dominated by the invoke's return value. - if (isa(UserInst) && - UserInst->getParent() == II->getNormalDest() && - cast(UserInst)->getIncomingBlock(U) == DefBB) - return true; - - // Otherwise use the instruction-dominates-block query, which - // handles the crazy case of an invoke with a critical edge - // properly. - return dominates(Def, UseBB); + BasicBlock *NormalDest = II->getNormalDest(); + BasicBlockEdge E(DefBB, NormalDest); + return dominates(E, U); } // If the def and use are in different blocks, do a simple CFG dominator diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index af6344e..2e0b316 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/StringExtras.h" using namespace llvm; - // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file... template class llvm::SymbolTableListTraits; @@ -358,17 +357,239 @@ std::string Intrinsic::getName(ID id, ArrayRef Tys) { return Result; } -FunctionType *Intrinsic::getType(LLVMContext &Context, - ID id, ArrayRef Tys) { - Type *ResultTy = NULL; - SmallVector ArgTys; - bool IsVarArg = false; + +/// IIT_Info - These are enumerators that describe the entries returned by the +/// getIntrinsicInfoTableEntries function. +/// +/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter! +enum IIT_Info { + // Common values should be encoded with 0-15. + IIT_Done = 0, + IIT_I1 = 1, + IIT_I8 = 2, + IIT_I16 = 3, + IIT_I32 = 4, + IIT_I64 = 5, + IIT_F32 = 6, + IIT_F64 = 7, + IIT_V2 = 8, + IIT_V4 = 9, + IIT_V8 = 10, + IIT_V16 = 11, + IIT_V32 = 12, + IIT_MMX = 13, + IIT_PTR = 14, + IIT_ARG = 15, -#define GET_INTRINSIC_GENERATOR + // Values from 16+ are only encodable with the inefficient encoding. + IIT_METADATA = 16, + IIT_EMPTYSTRUCT = 17, + IIT_STRUCT2 = 18, + IIT_STRUCT3 = 19, + IIT_STRUCT4 = 20, + IIT_STRUCT5 = 21, + IIT_EXTEND_VEC_ARG = 22, + IIT_TRUNC_VEC_ARG = 23, + IIT_ANYPTR = 24 +}; + + +static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, + SmallVectorImpl &OutputTable) { + IIT_Info Info = IIT_Info(Infos[NextElt++]); + unsigned StructElts = 2; + using namespace Intrinsic; + + switch (Info) { + case IIT_Done: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0)); + return; + case IIT_MMX: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0)); + return; + case IIT_METADATA: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0)); + return; + case IIT_F32: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Float, 0)); + return; + case IIT_F64: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Double, 0)); + return; + case IIT_I1: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1)); + return; + case IIT_I8: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8)); + return; + case IIT_I16: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer,16)); + return; + case IIT_I32: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 32)); + return; + case IIT_I64: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); + return; + case IIT_V2: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_V4: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 4)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_V8: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 8)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_V16: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 16)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_V32: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_PTR: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + case IIT_ANYPTR: { // [ANYPTR addrspace, subtype] + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, + Infos[NextElt++])); + DecodeIITType(NextElt, Infos, OutputTable); + return; + } + case IIT_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo)); + return; + } + case IIT_EXTEND_VEC_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::ExtendVecArgument, + ArgInfo)); + return; + } + case IIT_TRUNC_VEC_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::TruncVecArgument, + ArgInfo)); + return; + } + case IIT_EMPTYSTRUCT: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); + return; + case IIT_STRUCT5: ++StructElts; // FALL THROUGH. + case IIT_STRUCT4: ++StructElts; // FALL THROUGH. + case IIT_STRUCT3: ++StructElts; // FALL THROUGH. + case IIT_STRUCT2: { + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts)); + + for (unsigned i = 0; i != StructElts; ++i) + DecodeIITType(NextElt, Infos, OutputTable); + return; + } + } + llvm_unreachable("unhandled"); +} + + +#define GET_INTRINSIC_GENERATOR_GLOBAL #include "llvm/Intrinsics.gen" -#undef GET_INTRINSIC_GENERATOR +#undef GET_INTRINSIC_GENERATOR_GLOBAL + +void Intrinsic::getIntrinsicInfoTableEntries(ID id, + SmallVectorImpl &T){ + // Check to see if the intrinsic's type was expressible by the table. + unsigned TableVal = IIT_Table[id-1]; + + // Decode the TableVal into an array of IITValues. + SmallVector IITValues; + ArrayRef IITEntries; + unsigned NextElt = 0; + if ((TableVal >> 31) != 0) { + // This is an offset into the IIT_LongEncodingTable. + IITEntries = IIT_LongEncodingTable; + + // Strip sentinel bit. + NextElt = (TableVal << 1) >> 1; + } else { + // Decode the TableVal into an array of IITValues. If the entry was encoded + // into a single word in the table itself, decode it now. + do { + IITValues.push_back(TableVal & 0xF); + TableVal >>= 4; + } while (TableVal); + + IITEntries = IITValues; + NextElt = 0; + } - return FunctionType::get(ResultTy, ArgTys, IsVarArg); + // Okay, decode the table into the output vector of IITDescriptors. + DecodeIITType(NextElt, IITEntries, T); + while (NextElt != IITEntries.size() && IITEntries[NextElt] != 0) + DecodeIITType(NextElt, IITEntries, T); +} + + +static Type *DecodeFixedType(ArrayRef &Infos, + ArrayRef Tys, LLVMContext &Context) { + using namespace Intrinsic; + IITDescriptor D = Infos.front(); + Infos = Infos.slice(1); + + switch (D.Kind) { + case IITDescriptor::Void: return Type::getVoidTy(Context); + case IITDescriptor::MMX: return Type::getX86_MMXTy(Context); + case IITDescriptor::Metadata: return Type::getMetadataTy(Context); + case IITDescriptor::Float: return Type::getFloatTy(Context); + case IITDescriptor::Double: return Type::getDoubleTy(Context); + + case IITDescriptor::Integer: + return IntegerType::get(Context, D.Integer_Width); + case IITDescriptor::Vector: + return VectorType::get(DecodeFixedType(Infos, Tys, Context),D.Vector_Width); + case IITDescriptor::Pointer: + return PointerType::get(DecodeFixedType(Infos, Tys, Context), + D.Pointer_AddressSpace); + case IITDescriptor::Struct: { + Type *Elts[5]; + assert(D.Struct_NumElements <= 5 && "Can't handle this yet"); + for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) + Elts[i] = DecodeFixedType(Infos, Tys, Context); + return StructType::get(Context, ArrayRef(Elts,D.Struct_NumElements)); + } + + case IITDescriptor::Argument: + return Tys[D.getArgumentNumber()]; + case IITDescriptor::ExtendVecArgument: + return VectorType::getExtendedElementVectorType(cast( + Tys[D.getArgumentNumber()])); + + case IITDescriptor::TruncVecArgument: + return VectorType::getTruncatedElementVectorType(cast( + Tys[D.getArgumentNumber()])); + } + llvm_unreachable("unhandled"); +} + + + +FunctionType *Intrinsic::getType(LLVMContext &Context, + ID id, ArrayRef Tys) { + SmallVector Table; + getIntrinsicInfoTableEntries(id, Table); + + ArrayRef TableRef = Table; + Type *ResultTy = DecodeFixedType(TableRef, Tys, Context); + + SmallVector ArgTys; + while (!TableRef.empty()) + ArgTys.push_back(DecodeFixedType(TableRef, Tys, Context)); + + return FunctionType::get(ResultTy, ArgTys, false); } bool Intrinsic::isOverloaded(ID id) { @@ -400,7 +621,8 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef Tys) { bool Function::hasAddressTaken(const User* *PutOffender) const { for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { const User *U = *I; - // FIXME: Check for blockaddress, which does not take the address. + if (isa(U)) + continue; if (!isa(U) && !isa(U)) return PutOffender ? (*PutOffender = U, true) : true; ImmutableCallSite CS(cast(U)); @@ -439,4 +661,3 @@ bool Function::callsFunctionThatReturnsTwice() const { return false; } -// vim: sw=2 ai diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp index 595c452..003a5d4 100644 --- a/lib/VMCore/GCOV.cpp +++ b/lib/VMCore/GCOV.cpp @@ -64,7 +64,7 @@ bool GCOVFile::read(GCOVBuffer &Buffer) { /// dump - Dump GCOVFile content on standard out for debugging purposes. void GCOVFile::dump() { for (SmallVector::iterator I = Functions.begin(), - E = Functions.end(); I != E; ++I) + E = Functions.end(); I != E; ++I) (*I)->dump(); } @@ -72,7 +72,7 @@ void GCOVFile::dump() { /// reading .gcno and .gcda files. void GCOVFile::collectLineCounts(FileInfo &FI) { for (SmallVector::iterator I = Functions.begin(), - E = Functions.end(); I != E; ++I) + E = Functions.end(); I != E; ++I) (*I)->collectLineCounts(FI); FI.print(); } @@ -143,7 +143,7 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) { StringRef Filename = Buff.readString(); if (Buff.getCursor() == (Size - 4)) break; while (uint32_t L = Buff.readInt()) - Block->addLine(Filename, L); + Block->addLine(Filename, L); } Buff.readInt(); // flag } @@ -154,7 +154,7 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) { void GCOVFunction::dump() { outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; for (SmallVector::iterator I = Blocks.begin(), - E = Blocks.end(); I != E; ++I) + E = Blocks.end(); I != E; ++I) (*I)->dump(); } @@ -162,7 +162,7 @@ void GCOVFunction::dump() { /// reading .gcno and .gcda files. void GCOVFunction::collectLineCounts(FileInfo &FI) { for (SmallVector::iterator I = Blocks.begin(), - E = Blocks.end(); I != E; ++I) + E = Blocks.end(); I != E; ++I) (*I)->collectLineCounts(FI); } @@ -186,7 +186,7 @@ void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) { /// reading .gcno and .gcda files. void GCOVBlock::collectLineCounts(FileInfo &FI) { for (StringMap::iterator I = Lines.begin(), - E = Lines.end(); I != E; ++I) + E = Lines.end(); I != E; ++I) I->second->collectLineCounts(FI, I->first(), Counter); } @@ -196,14 +196,14 @@ void GCOVBlock::dump() { if (!Edges.empty()) { outs() << "\tEdges : "; for (SmallVector::iterator I = Edges.begin(), E = Edges.end(); - I != E; ++I) + I != E; ++I) outs() << (*I) << ","; outs() << "\n"; } if (!Lines.empty()) { outs() << "\tLines : "; for (StringMap::iterator LI = Lines.begin(), - LE = Lines.end(); LI != LE; ++LI) { + LE = Lines.end(); LI != LE; ++LI) { outs() << LI->first() << " -> "; LI->second->dump(); outs() << "\n"; @@ -217,16 +217,16 @@ void GCOVBlock::dump() { /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename, - uint32_t Count) { + uint32_t Count) { for (SmallVector::iterator I = Lines.begin(), - E = Lines.end(); I != E; ++I) + E = Lines.end(); I != E; ++I) FI.addLineCount(Filename, *I, Count); } /// dump - Dump GCOVLines content on standard out for debugging purposes. void GCOVLines::dump() { for (SmallVector::iterator I = Lines.begin(), - E = Lines.end(); I != E; ++I) + E = Lines.end(); I != E; ++I) outs() << (*I) << ","; } @@ -266,12 +266,12 @@ void FileInfo::print() { StringRef AllLines = Buff.take()->getBuffer(); for (unsigned i = 0, e = L.size(); i != e; ++i) { if (L[i]) - outs() << L[i] << ":\t"; + outs() << L[i] << ":\t"; else - outs() << " :\t"; + outs() << " :\t"; std::pair P = AllLines.split('\n'); if (AllLines != P.first) - outs() << P.first; + outs() << P.first; outs() << "\n"; AllLines = P.second; } diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index 4254fb2..c428b88 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -82,12 +82,12 @@ bool GlobalValue::isDeclaration() const { GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, - bool ThreadLocal, unsigned AddressSpace) - : GlobalValue(PointerType::get(Ty, AddressSpace), + ThreadLocalMode TLMode, unsigned AddressSpace) + : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) { + isConstantGlobal(constant), threadLocalMode(TLMode) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -100,13 +100,13 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, const Twine &Name, - GlobalVariable *Before, bool ThreadLocal, + GlobalVariable *Before, ThreadLocalMode TLMode, unsigned AddressSpace) - : GlobalValue(PointerType::get(Ty, AddressSpace), + : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits::op_begin(this), InitVal != 0, Link, Name), - isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) { + isConstantGlobal(constant), threadLocalMode(TLMode) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index b459234..5c4e6d9 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/IRBuilder.h" -#include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IRBuilder.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" using namespace llvm; @@ -28,7 +28,7 @@ Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::PrivateLinkage, - StrConstant, "", 0, false); + StrConstant); GV->setName(Name); GV->setUnnamedAddr(true); return GV; @@ -120,13 +120,13 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { assert(isa(Ptr->getType()) && - "lifetime.start only applies to pointers."); + "lifetime.start only applies to pointers."); Ptr = getCastedInt8PtrValue(Ptr); if (!Size) Size = getInt64(-1); else assert(Size->getType() == getInt64Ty() && - "lifetime.start requires the size to be an i64"); + "lifetime.start requires the size to be an i64"); Value *Ops[] = { Size, Ptr }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start); @@ -135,13 +135,13 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) { assert(isa(Ptr->getType()) && - "lifetime.end only applies to pointers."); + "lifetime.end only applies to pointers."); Ptr = getCastedInt8PtrValue(Ptr); if (!Size) Size = getInt64(-1); else assert(Size->getType() == getInt64Ty() && - "lifetime.end requires the size to be an i64"); + "lifetime.end requires the size to be an i64"); Value *Ops[] = { Size, Ptr }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end); diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 5449714..66379a0 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -226,34 +226,52 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { RMWI->isVolatile() == cast(I)->isVolatile() && RMWI->getOrdering() == cast(I)->getOrdering() && RMWI->getSynchScope() == cast(I)->getSynchScope(); - + if (const PHINode *thisPHI = dyn_cast(this)) { + const PHINode *otherPHI = cast(I); + for (unsigned i = 0, e = thisPHI->getNumOperands(); i != e; ++i) { + if (thisPHI->getIncomingBlock(i) != otherPHI->getIncomingBlock(i)) + return false; + } + return true; + } return true; } // isSameOperationAs // This should be kept in sync with isEquivalentOperation in // lib/Transforms/IPO/MergeFunctions.cpp. -bool Instruction::isSameOperationAs(const Instruction *I) const { +bool Instruction::isSameOperationAs(const Instruction *I, + unsigned flags) const { + bool IgnoreAlignment = flags & CompareIgnoringAlignment; + bool UseScalarTypes = flags & CompareUsingScalarTypes; + if (getOpcode() != I->getOpcode() || getNumOperands() != I->getNumOperands() || - getType() != I->getType()) + (UseScalarTypes ? + getType()->getScalarType() != I->getType()->getScalarType() : + getType() != I->getType())) return false; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (getOperand(i)->getType() != I->getOperand(i)->getType()) + if (UseScalarTypes ? + getOperand(i)->getType()->getScalarType() != + I->getOperand(i)->getType()->getScalarType() : + getOperand(i)->getType() != I->getOperand(i)->getType()) return false; // Check special state that is a part of some instructions. if (const LoadInst *LI = dyn_cast(this)) return LI->isVolatile() == cast(I)->isVolatile() && - LI->getAlignment() == cast(I)->getAlignment() && + (LI->getAlignment() == cast(I)->getAlignment() || + IgnoreAlignment) && LI->getOrdering() == cast(I)->getOrdering() && LI->getSynchScope() == cast(I)->getSynchScope(); if (const StoreInst *SI = dyn_cast(this)) return SI->isVolatile() == cast(I)->isVolatile() && - SI->getAlignment() == cast(I)->getAlignment() && + (SI->getAlignment() == cast(I)->getAlignment() || + IgnoreAlignment) && SI->getOrdering() == cast(I)->getOrdering() && SI->getSynchScope() == cast(I)->getSynchScope(); if (const CmpInst *CI = dyn_cast(this)) @@ -388,6 +406,29 @@ bool Instruction::isCommutative(unsigned op) { } } +/// isIdempotent - Return true if the instruction is idempotent: +/// +/// Idempotent operators satisfy: x op x === x +/// +/// In LLVM, the And and Or operators are idempotent. +/// +bool Instruction::isIdempotent(unsigned Opcode) { + return Opcode == And || Opcode == Or; +} + +/// isNilpotent - Return true if the instruction is nilpotent: +/// +/// Nilpotent operators satisfy: x op x === Id, +/// +/// where Id is the identity for the operator, i.e. a constant such that +/// x op Id === x and Id op x === x for all x. +/// +/// In LLVM, the Xor operator is nilpotent. +/// +bool Instruction::isNilpotent(unsigned Opcode) { + return Opcode == Xor; +} + Instruction *Instruction::clone() const { Instruction *New = clone_impl(); New->SubclassOptionalData = SubclassOptionalData; diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 6c5db32..9af98e8 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -161,8 +161,14 @@ Value *PHINode::hasConstantValue() const { // Exploit the fact that phi nodes always have at least one entry. Value *ConstantValue = getIncomingValue(0); for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i) - if (getIncomingValue(i) != ConstantValue) - return 0; // Incoming values not all the same. + if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) { + if (ConstantValue != this) + return 0; // Incoming values not all the same. + // The case where the first value is this PHI. + ConstantValue = getIncomingValue(i); + } + if (ConstantValue == this) + return UndefValue::get(getType()); return ConstantValue; } @@ -3158,6 +3164,7 @@ SwitchInst::SwitchInst(const SwitchInst &SI) OL[i] = InOL[i]; OL[i+1] = InOL[i+1]; } + TheSubsets = SI.TheSubsets; SubclassOptionalData = SI.SubclassOptionalData; } @@ -3169,6 +3176,16 @@ SwitchInst::~SwitchInst() { /// addCase - Add an entry to the switch instruction... /// void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { + IntegersSubsetToBB Mapping; + + // FIXME: Currently we work with ConstantInt based cases. + // So inititalize IntItem container directly from ConstantInt. + Mapping.add(IntItem::fromConstantInt(OnVal)); + IntegersSubset CaseRanges = Mapping.getCase(); + addCase(CaseRanges, Dest); +} + +void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) { unsigned NewCaseIdx = getNumCases(); unsigned OpNo = NumOperands; if (OpNo+2 > ReservedSpace) @@ -3176,14 +3193,17 @@ void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; - CaseIt Case(this, NewCaseIdx); - Case.setValue(OnVal); + + SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal); + + CaseIt Case(this, NewCaseIdx, TheSubsetsIt); + Case.updateCaseValueOperand(OnVal); Case.setSuccessor(Dest); } /// removeCase - This method removes the specified case and its successor /// from the switch instruction. -void SwitchInst::removeCase(CaseIt i) { +void SwitchInst::removeCase(CaseIt& i) { unsigned idx = i.getCaseIndex(); assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!"); @@ -3200,6 +3220,16 @@ void SwitchInst::removeCase(CaseIt i) { // Nuke the last value. OL[NumOps-2].set(0); OL[NumOps-2+1].set(0); + + // Do the same with TheCases collection: + if (i.SubsetIt != --TheSubsets.end()) { + *i.SubsetIt = TheSubsets.back(); + TheSubsets.pop_back(); + } else { + TheSubsets.pop_back(); + i.SubsetIt = TheSubsets.end(); + } + NumOperands = NumOps-2; } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 090b09a..95e5a8b 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "SymbolTableListTraitsImpl.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/ValueHandle.h" using namespace llvm; @@ -66,7 +67,11 @@ public: MDNodeOperand(Value *V) : CallbackVH(V) {} ~MDNodeOperand() {} - void set(Value *V) { this->setValPtr(V); } + void set(Value *V) { + unsigned IsFirst = this->getValPtrInt(); + this->setValPtr(V); + this->setAsFirstOperand(IsFirst); + } /// setAsFirstOperand - Accessor method to mark the operand as the first in /// the list. @@ -95,7 +100,7 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) { static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) { // Use <= instead of < to permit a one-past-the-end address. assert(Op <= N->getNumOperands() && "Invalid operand number"); - return reinterpret_cast(N+1)+Op; + return reinterpret_cast(N + 1) + Op; } void MDNode::replaceOperandWith(unsigned i, Value *Val) { @@ -122,7 +127,6 @@ MDNode::MDNode(LLVMContext &C, ArrayRef Vals, bool isFunctionLocal) } } - /// ~MDNode - Destroy MDNode. MDNode::~MDNode() { assert((getSubclassDataFromValue() & DestroyFlag) != 0 && @@ -196,7 +200,7 @@ const Function *MDNode::getFunction() const { // destroy - Delete this node. Only when there are no uses. void MDNode::destroy() { setValueSubclassData(getSubclassDataFromValue() | DestroyFlag); - // Placement delete, the free the memory. + // Placement delete, then free the memory. this->~MDNode(); free(this); } @@ -247,7 +251,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef Vals, } // Coallocate space for the node and Operands together, then placement new. - void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + void *Ptr = malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, isFunctionLocal); // Cache the operand hash. @@ -275,7 +279,7 @@ MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef Vals) { MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef Vals) { MDNode *N = - (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + (MDNode *)malloc(sizeof(MDNode) + Vals.size() * sizeof(MDNodeOperand)); N = new (N) MDNode(Context, Vals, FL_No); N->setValueSubclassData(N->getSubclassDataFromValue() | NotUniquedBit); @@ -398,6 +402,155 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { } } +MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + if (A == B) + return A; + + SmallVector PathA; + MDNode *T = A; + while (T) { + PathA.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + } + + SmallVector PathB; + T = B; + while (T) { + PathB.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + } + + int IA = PathA.size() - 1; + int IB = PathB.size() - 1; + + MDNode *Ret = 0; + while (IA >= 0 && IB >=0) { + if (PathA[IA] == PathB[IB]) + Ret = PathA[IA]; + else + break; + --IA; + --IB; + } + return Ret; +} + +MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + APFloat AVal = cast(A->getOperand(0))->getValueAPF(); + APFloat BVal = cast(B->getOperand(0))->getValueAPF(); + if (AVal.compare(BVal) == APFloat::cmpLessThan) + return A; + return B; +} + +static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { + return A.getUpper() == B.getLower() || A.getLower() == B.getUpper(); +} + +static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) { + return !A.intersectWith(B).isEmptySet() || isContiguous(A, B); +} + +static bool tryMergeRange(SmallVector &EndPoints, ConstantInt *Low, + ConstantInt *High) { + ConstantRange NewRange(Low->getValue(), High->getValue()); + unsigned Size = EndPoints.size(); + APInt LB = cast(EndPoints[Size - 2])->getValue(); + APInt LE = cast(EndPoints[Size - 1])->getValue(); + ConstantRange LastRange(LB, LE); + if (canBeMerged(NewRange, LastRange)) { + ConstantRange Union = LastRange.unionWith(NewRange); + Type *Ty = High->getType(); + EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower()); + EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper()); + return true; + } + return false; +} + +static void addRange(SmallVector &EndPoints, ConstantInt *Low, + ConstantInt *High) { + if (!EndPoints.empty()) + if (tryMergeRange(EndPoints, Low, High)) + return; + + EndPoints.push_back(Low); + EndPoints.push_back(High); +} + +MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { + // Given two ranges, we want to compute the union of the ranges. This + // is slightly complitade by having to combine the intervals and merge + // the ones that overlap. + + if (!A || !B) + return NULL; + + if (A == B) + return A; + + // First, walk both lists in older of the lower boundary of each interval. + // At each step, try to merge the new interval to the last one we adedd. + SmallVector EndPoints; + int AI = 0; + int BI = 0; + int AN = A->getNumOperands() / 2; + int BN = B->getNumOperands() / 2; + while (AI < AN && BI < BN) { + ConstantInt *ALow = cast(A->getOperand(2 * AI)); + ConstantInt *BLow = cast(B->getOperand(2 * BI)); + + if (ALow->getValue().slt(BLow->getValue())) { + addRange(EndPoints, ALow, cast(A->getOperand(2 * AI + 1))); + ++AI; + } else { + addRange(EndPoints, BLow, cast(B->getOperand(2 * BI + 1))); + ++BI; + } + } + while (AI < AN) { + addRange(EndPoints, cast(A->getOperand(2 * AI)), + cast(A->getOperand(2 * AI + 1))); + ++AI; + } + while (BI < BN) { + addRange(EndPoints, cast(B->getOperand(2 * BI)), + cast(B->getOperand(2 * BI + 1))); + ++BI; + } + + // If we have more than 2 ranges (4 endpoints) we have to try to merge + // the last and first ones. + unsigned Size = EndPoints.size(); + if (Size > 4) { + ConstantInt *FB = cast(EndPoints[0]); + ConstantInt *FE = cast(EndPoints[1]); + if (tryMergeRange(EndPoints, FB, FE)) { + for (unsigned i = 0; i < Size - 2; ++i) { + EndPoints[i] = EndPoints[i + 2]; + } + EndPoints.resize(Size - 2); + } + } + + // If in the end we have a single range, it is possible that it is now the + // full range. Just drop the metadata in that case. + if (EndPoints.size() == 2) { + ConstantRange Range(cast(EndPoints[0])->getValue(), + cast(EndPoints[1])->getValue()); + if (Range.isFullSet()) + return NULL; + } + + return MDNode::get(A->getContext(), EndPoints); +} + //===----------------------------------------------------------------------===// // NamedMDNode implementation. // diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 3c67191..5b5176b 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -65,20 +65,20 @@ Module::~Module() { Module::Endianness Module::getEndianness() const { StringRef temp = DataLayout; Module::Endianness ret = AnyEndianness; - + while (!temp.empty()) { std::pair P = getToken(temp, "-"); - + StringRef token = P.first; temp = P.second; - + if (token[0] == 'e') { ret = LittleEndian; } else if (token[0] == 'E') { ret = BigEndian; } } - + return ret; } @@ -86,13 +86,13 @@ Module::Endianness Module::getEndianness() const { Module::PointerSize Module::getPointerSize() const { StringRef temp = DataLayout; Module::PointerSize ret = AnyPointerSize; - + while (!temp.empty()) { std::pair TmpP = getToken(temp, "-"); temp = TmpP.second; TmpP = getToken(TmpP.first, ":"); StringRef token = TmpP.second, signalToken = TmpP.first; - + if (signalToken[0] == 'p') { int size = 0; getToken(token, ":").first.getAsInteger(10, size); @@ -102,7 +102,7 @@ Module::PointerSize Module::getPointerSize() const { ret = Pointer64; } } - + return ret; } @@ -164,9 +164,9 @@ Constant *Module::getOrInsertFunction(StringRef Name, // right type. if (F->getType() != PointerType::getUnqual(Ty)) return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty)); - + // Otherwise, we just found the existing function or a prototype. - return F; + return F; } Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, @@ -183,13 +183,12 @@ Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, } // Otherwise, we just found the existing function or a prototype. - return F; + return F; } Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty) { - AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0); - return getOrInsertFunction(Name, Ty, AttributeList); + return getOrInsertFunction(Name, Ty, AttrListPtr()); } // getOrInsertFunction - Look up the specified function in the module symbol @@ -229,9 +228,9 @@ Constant *Module::getOrInsertFunction(StringRef Name, va_end(Args); // Build the function type and chain to the other getOrInsertFunction... - return getOrInsertFunction(Name, + return getOrInsertFunction(Name, FunctionType::get(RetTy, ArgTys, false), - AttrListPtr::get((AttributeWithIndex *)0, 0)); + AttrListPtr()); } // getFunction - Look up the specified function in the module symbol table. @@ -254,7 +253,7 @@ Function *Module::getFunction(StringRef Name) const { /// GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) const { - if (GlobalVariable *Result = + if (GlobalVariable *Result = dyn_cast_or_null(getNamedValue(Name))) if (AllowLocal || !Result->hasLocalLinkage()) return Result; @@ -282,7 +281,7 @@ Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) { // right type. if (GV->getType() != PointerType::getUnqual(Ty)) return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty)); - + // Otherwise, we just found the existing function or a prototype. return GV; } @@ -299,7 +298,7 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { } /// getNamedMetadata - Return the first NamedMDNode in the module with the -/// specified name. This method returns null if a NamedMDNode with the +/// specified name. This method returns null if a NamedMDNode with the /// specified name is not found. NamedMDNode *Module::getNamedMetadata(const Twine &Name) const { SmallString<256> NameData; @@ -307,8 +306,8 @@ NamedMDNode *Module::getNamedMetadata(const Twine &Name) const { return static_cast *>(NamedMDSymTab)->lookup(NameRef); } -/// getOrInsertNamedMetadata - Return the first named MDNode in the module -/// with the specified name. This method returns a new NamedMDNode if a +/// getOrInsertNamedMetadata - Return the first named MDNode in the module +/// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) { NamedMDNode *&NMD = @@ -468,128 +467,3 @@ void Module::removeLibrary(StringRef Lib) { return; } } - -//===----------------------------------------------------------------------===// -// Type finding functionality. -//===----------------------------------------------------------------------===// - -namespace { - /// TypeFinder - Walk over a module, identifying all of the types that are - /// used by the module. - class TypeFinder { - // To avoid walking constant expressions multiple times and other IR - // objects, we keep several helper maps. - DenseSet VisitedConstants; - DenseSet VisitedTypes; - - std::vector &StructTypes; - public: - TypeFinder(std::vector &structTypes) - : StructTypes(structTypes) {} - - void run(const Module &M) { - // Get types from global variables. - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - incorporateType(I->getType()); - if (I->hasInitializer()) - incorporateValue(I->getInitializer()); - } - - // Get types from aliases. - for (Module::const_alias_iterator I = M.alias_begin(), - E = M.alias_end(); I != E; ++I) { - incorporateType(I->getType()); - if (const Value *Aliasee = I->getAliasee()) - incorporateValue(Aliasee); - } - - SmallVector, 4> MDForInst; - - // Get types from functions. - for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { - incorporateType(FI->getType()); - - for (Function::const_iterator BB = FI->begin(), E = FI->end(); - BB != E;++BB) - for (BasicBlock::const_iterator II = BB->begin(), - E = BB->end(); II != E; ++II) { - const Instruction &I = *II; - // Incorporate the type of the instruction and all its operands. - incorporateType(I.getType()); - for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); - OI != OE; ++OI) - incorporateValue(*OI); - - // Incorporate types hiding in metadata. - I.getAllMetadataOtherThanDebugLoc(MDForInst); - for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) - incorporateMDNode(MDForInst[i].second); - MDForInst.clear(); - } - } - - for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); I != E; ++I) { - const NamedMDNode *NMD = I; - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - incorporateMDNode(NMD->getOperand(i)); - } - } - - private: - void incorporateType(Type *Ty) { - // Check to see if we're already visited this type. - if (!VisitedTypes.insert(Ty).second) - return; - - // If this is a structure or opaque type, add a name for the type. - if (StructType *STy = dyn_cast(Ty)) - StructTypes.push_back(STy); - - // Recursively walk all contained types. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - incorporateType(*I); - } - - /// incorporateValue - This method is used to walk operand lists finding - /// types hiding in constant expressions and other operands that won't be - /// walked in other ways. GlobalValues, basic blocks, instructions, and - /// inst operands are all explicitly enumerated. - void incorporateValue(const Value *V) { - if (const MDNode *M = dyn_cast(V)) - return incorporateMDNode(M); - if (!isa(V) || isa(V)) return; - - // Already visited? - if (!VisitedConstants.insert(V).second) - return; - - // Check this type. - incorporateType(V->getType()); - - // Look in operands for types. - const User *U = cast(V); - for (Constant::const_op_iterator I = U->op_begin(), - E = U->op_end(); I != E;++I) - incorporateValue(*I); - } - - void incorporateMDNode(const MDNode *V) { - - // Already visited? - if (!VisitedConstants.insert(V).second) - return; - - // Look in operands for types. - for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) - if (Value *Op = V->getOperand(i)) - incorporateValue(Op); - } - }; -} // end anonymous namespace - -void Module::findUsedStructTypes(std::vector &StructTypes) const { - TypeFinder(StructTypes).run(*this); -} diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 28fbaa6..4530c04 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -478,8 +478,7 @@ PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) { /// Set pass P as the last user of the given analysis passes. void -PMTopLevelManager::setLastUser(const SmallVectorImpl &AnalysisPasses, - Pass *P) { +PMTopLevelManager::setLastUser(ArrayRef AnalysisPasses, Pass *P) { unsigned PDepth = 0; if (P->getResolver()) PDepth = P->getResolver()->getPMDataManager().getDepth(); @@ -594,6 +593,26 @@ void PMTopLevelManager::schedulePass(Pass *P) { Pass *AnalysisPass = findAnalysisPass(*I); if (!AnalysisPass) { const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + + if (PI == NULL) { + // Pass P is not in the global PassRegistry + dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; + dbgs() << "Verify if there is a pass dependency cycle." << "\n"; + dbgs() << "Required Passes:" << "\n"; + for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(), + E = RequiredSet.end(); I2 != E && I2 != I; ++I2) { + Pass *AnalysisPass2 = findAnalysisPass(*I2); + if (AnalysisPass2) { + dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; + } + else { + dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n"; + dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n"; + dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n"; + } + } + } + assert(PI && "Expected required passes to be initialized"); AnalysisPass = PI->createPass(); if (P->getPotentialPassManagerType () == diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index c6f3558..5e9a00f 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -464,19 +464,26 @@ void StructType::setBody(ArrayRef Elements, bool isPacked) { void StructType::setName(StringRef Name) { if (Name == getName()) return; - // If this struct already had a name, remove its symbol table entry. - if (SymbolTableEntry) { - getContext().pImpl->NamedStructTypes.erase(getName()); - SymbolTableEntry = 0; - } - + StringMap &SymbolTable = getContext().pImpl->NamedStructTypes; + typedef StringMap::MapEntryTy EntryTy; + + // If this struct already had a name, remove its symbol table entry. Don't + // delete the data yet because it may be part of the new name. + if (SymbolTableEntry) + SymbolTable.remove((EntryTy *)SymbolTableEntry); + // If this is just removing the name, we're done. - if (Name.empty()) + if (Name.empty()) { + if (SymbolTableEntry) { + // Delete the old string data. + ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator()); + SymbolTableEntry = 0; + } return; + } // Look up the entry for the name. - StringMapEntry *Entry = - &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name); + EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name); // While we have a name collision, try a random rename. if (Entry->getValue()) { @@ -497,7 +504,10 @@ void StructType::setName(StringRef Name) { // Okay, we found an entry that isn't used. It's us! Entry->setValue(this); - + + // Delete the old string data. + if (SymbolTableEntry) + ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator()); SymbolTableEntry = Entry; } diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp new file mode 100644 index 0000000..4de649f --- /dev/null +++ b/lib/VMCore/TypeFinder.cpp @@ -0,0 +1,148 @@ +//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TypeFinder class for the VMCore library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TypeFinder.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +void TypeFinder::run(const Module &M, bool onlyNamed) { + OnlyNamed = onlyNamed; + + // Get types from global variables. + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + incorporateType(I->getType()); + if (I->hasInitializer()) + incorporateValue(I->getInitializer()); + } + + // Get types from aliases. + for (Module::const_alias_iterator I = M.alias_begin(), + E = M.alias_end(); I != E; ++I) { + incorporateType(I->getType()); + if (const Value *Aliasee = I->getAliasee()) + incorporateValue(Aliasee); + } + + // Get types from functions. + SmallVector, 4> MDForInst; + for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { + incorporateType(FI->getType()); + + // First incorporate the arguments. + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) + incorporateValue(AI); + + for (Function::const_iterator BB = FI->begin(), E = FI->end(); + BB != E;++BB) + for (BasicBlock::const_iterator II = BB->begin(), + E = BB->end(); II != E; ++II) { + const Instruction &I = *II; + + // Incorporate the type of the instruction. + incorporateType(I.getType()); + + // Incorporate non-instruction operand types. (We are incorporating all + // instructions with this loop.) + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + if (!isa(OI)) + incorporateValue(*OI); + + // Incorporate types hiding in metadata. + I.getAllMetadataOtherThanDebugLoc(MDForInst); + for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) + incorporateMDNode(MDForInst[i].second); + + MDForInst.clear(); + } + } + + for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + incorporateMDNode(NMD->getOperand(i)); + } +} + +void TypeFinder::clear() { + VisitedConstants.clear(); + VisitedTypes.clear(); + StructTypes.clear(); +} + +/// incorporateType - This method adds the type to the list of used structures +/// if it's not in there already. +void TypeFinder::incorporateType(Type *Ty) { + // Check to see if we're already visited this type. + if (!VisitedTypes.insert(Ty).second) + return; + + // If this is a structure or opaque type, add a name for the type. + if (StructType *STy = dyn_cast(Ty)) + if (!OnlyNamed || STy->hasName()) + StructTypes.push_back(STy); + + // Recursively walk all contained types. + for (Type::subtype_iterator I = Ty->subtype_begin(), + E = Ty->subtype_end(); I != E; ++I) + incorporateType(*I); +} + +/// incorporateValue - This method is used to walk operand lists finding types +/// hiding in constant expressions and other operands that won't be walked in +/// other ways. GlobalValues, basic blocks, instructions, and inst operands are +/// all explicitly enumerated. +void TypeFinder::incorporateValue(const Value *V) { + if (const MDNode *M = dyn_cast(V)) + return incorporateMDNode(M); + + if (!isa(V) || isa(V)) return; + + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Check this type. + incorporateType(V->getType()); + + // If this is an instruction, we incorporate it separately. + if (isa(V)) + return; + + // Look in operands for types. + const User *U = cast(V); + for (Constant::const_op_iterator I = U->op_begin(), + E = U->op_end(); I != E;++I) + incorporateValue(*I); +} + +/// incorporateMDNode - This method is used to walk the operands of an MDNode to +/// find types hiding within. +void TypeFinder::incorporateMDNode(const MDNode *V) { + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Look in operands for types. + for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) + if (Value *Op = V->getOperand(i)) + incorporateValue(Op); +} diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 4006b2c..d871108 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -686,6 +686,9 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { #endif } -/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable -/// more than once. -CallbackVH::~CallbackVH() {} +// Default implementation for CallbackVH. +void CallbackVH::allUsesReplacedWith(Value *) {} + +void CallbackVH::deleted() { + setValPtr(NULL); +} diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index 9a8e185..d1ca953 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -71,6 +71,10 @@ bool EVT::isExtended512BitVector() const { return isExtendedVector() && getSizeInBits() == 512; } +bool EVT::isExtended1024BitVector() const { + return isExtendedVector() && getSizeInBits() == 1024; +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast(LLVMTy)->getElementType()); @@ -128,10 +132,12 @@ std::string EVT::getEVTString() const { case MVT::v2i32: return "v2i32"; case MVT::v4i32: return "v4i32"; case MVT::v8i32: return "v8i32"; + case MVT::v16i32: return "v16i32"; case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; + case MVT::v16i64: return "v16i64"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; case MVT::v4f32: return "v4f32"; @@ -177,10 +183,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); + case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); + case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 47baef3..6851246 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -68,6 +68,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -293,8 +294,9 @@ namespace { void VerifyCallSite(CallSite CS); bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix); - void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, - unsigned RetNum, unsigned ParamNum, ...); + bool VerifyIntrinsicType(Type *Ty, + ArrayRef &Infos, + SmallVectorImpl &ArgTys); void VerifyParameterAttrs(Attributes Attrs, Type *Ty, bool isReturnValue, const Value *V); void VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs, @@ -804,14 +806,29 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { // Check to make sure that all of the constants in the switch instruction // have the same type as the switched-on value. Type *SwitchTy = SI.getCondition()->getType(); - SmallPtrSet Constants; + IntegerType *IntTy = cast(SwitchTy); + IntegersSubsetToBB Mapping; + std::map RangeSetMap; for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - Assert1(i.getCaseValue()->getType() == SwitchTy, - "Switch constants must all be same type as switch value!", &SI); - Assert2(Constants.insert(i.getCaseValue()), - "Duplicate integer as switch case", &SI, i.getCaseValue()); + IntegersSubset CaseRanges = i.getCaseValueEx(); + for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) { + IntegersSubset::Range r = CaseRanges.getItem(ri); + Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(), + "Switch constants must all be same type as switch value!", &SI); + Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(), + "Switch constants must all be same type as switch value!", &SI); + Mapping.add(r); + RangeSetMap[r] = i.getCaseIndex(); + } } - + + IntegersSubsetToBB::RangeIterator errItem; + if (!Mapping.verify(errItem)) { + unsigned CaseIndex = RangeSetMap[errItem->first]; + SwitchInst::CaseIt i(&SI, CaseIndex); + Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx()); + } + visitTerminatorInst(SI); } @@ -1346,6 +1363,10 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { visitInstruction(GEP); } +static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { + return A.getUpper() == B.getLower() || A.getLower() == B.getUpper(); +} + void Verifier::visitLoadInst(LoadInst &LI) { PointerType *PTy = dyn_cast(LI.getOperand(0)->getType()); Assert1(PTy, "Load operand must be a pointer.", &LI); @@ -1367,6 +1388,8 @@ void Verifier::visitLoadInst(LoadInst &LI) { Assert1(NumOperands % 2 == 0, "Unfinished range!", Range); unsigned NumRanges = NumOperands / 2; Assert1(NumRanges >= 1, "It should have at least one range!", Range); + + ConstantRange LastRange(1); // Dummy initial value for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Low = dyn_cast(Range->getOperand(2*i)); Assert1(Low, "The lower limit must be an integer!", Low); @@ -1375,9 +1398,35 @@ void Verifier::visitLoadInst(LoadInst &LI) { Assert1(High->getType() == Low->getType() && High->getType() == ElTy, "Range types must match load type!", &LI); - Assert1(High->getValue() != Low->getValue(), "Range must not be empty!", + + APInt HighV = High->getValue(); + APInt LowV = Low->getValue(); + ConstantRange CurRange(LowV, HighV); + Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(), + "Range must not be empty!", Range); + if (i != 0) { + Assert1(CurRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order", + Range); + Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous", + Range); + } + LastRange = ConstantRange(LowV, HighV); + } + if (NumRanges > 2) { + APInt FirstLow = + dyn_cast(Range->getOperand(0))->getValue(); + APInt FirstHigh = + dyn_cast(Range->getOperand(1))->getValue(); + ConstantRange FirstRange(FirstLow, FirstHigh); + Assert1(FirstRange.intersectWith(LastRange).isEmptySet(), + "Intervals are overlapping", Range); + Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous", Range); } + + } visitInstruction(LI); @@ -1487,7 +1536,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { // landing pad block may be branched to only by the unwind edge of an invoke. for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { const InvokeInst *II = dyn_cast((*I)->getTerminator()); - Assert1(II && II->getUnwindDest() == BB, + Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, "Block containing LandingPadInst must be jumped to " "only by the unwind edge of an invoke.", &LPI); } @@ -1526,53 +1575,9 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { Instruction *Op = cast(I.getOperand(i)); - BasicBlock *BB = I.getParent(); - BasicBlock *OpBlock = Op->getParent(); - PHINode *PN = dyn_cast(&I); - - // DT can handle non phi instructions for us. - if (!PN) { - // Definition must dominate use unless use is unreachable! - Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB) || - DT->dominates(Op, &I), - "Instruction does not dominate all uses!", Op, &I); - return; - } - // Check that a definition dominates all of its uses. - if (InvokeInst *II = dyn_cast(Op)) { - // Invoke results are only usable in the normal destination, not in the - // exceptional destination. - BasicBlock *NormalDest = II->getNormalDest(); - - - // PHI nodes differ from other nodes because they actually "use" the - // value in the predecessor basic blocks they correspond to. - BasicBlock *UseBlock = BB; - unsigned j = PHINode::getIncomingValueNumForOperand(i); - UseBlock = PN->getIncomingBlock(j); - Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB", - Op, &I); - - if (UseBlock == OpBlock) { - // Special case of a phi node in the normal destination or the unwind - // destination. - Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock), - "Invoke result not available in the unwind destination!", - Op, &I); - } else { - Assert2(DT->dominates(II, UseBlock) || - !DT->isReachableFromEntry(UseBlock), - "Invoke result does not dominate all uses!", Op, &I); - } - } - - // PHI nodes are more difficult than other nodes because they actually - // "use" the value in the predecessor basic blocks they correspond to. - unsigned j = PHINode::getIncomingValueNumForOperand(i); - BasicBlock *PredBB = PN->getIncomingBlock(j); - Assert2(PredBB && (DT->dominates(OpBlock, PredBB) || - !DT->isReachableFromEntry(PredBB)), + const Use &U = I.getOperandUse(i); + Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U), "Instruction does not dominate all uses!", Op, &I); } @@ -1631,8 +1636,11 @@ void Verifier::visitInstruction(Instruction &I) { if (Function *F = dyn_cast(I.getOperand(i))) { // Check to make sure that the "address of" an intrinsic function is never // taken. - Assert1(!F->isIntrinsic() || (i + 1 == e && isa(I)), + Assert1(!F->isIntrinsic() || i == (isa(I) ? e-1 : 0), "Cannot take the address of an intrinsic!", &I); + Assert1(!F->isIntrinsic() || isa(I) || + F->getIntrinsicID() == Intrinsic::donothing, + "Cannot invoke an intrinsinc other than donothing", &I); Assert1(F->getParent() == Mod, "Referencing function in another module!", &I); } else if (BasicBlock *OpBB = dyn_cast(I.getOperand(i))) { @@ -1673,10 +1681,85 @@ void Verifier::visitInstruction(Instruction &I) { InstsInThisBlock.insert(&I); } -// Flags used by TableGen to mark intrinsic parameters with the -// LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes. -static const unsigned ExtendedElementVectorType = 0x40000000; -static const unsigned TruncatedElementVectorType = 0x20000000; +/// VerifyIntrinsicType - Verify that the specified type (which comes from an +/// intrinsic argument or return value) matches the type constraints specified +/// by the .td file (e.g. an "any integer" argument really is an integer). +/// +/// This return true on error but does not print a message. +bool Verifier::VerifyIntrinsicType(Type *Ty, + ArrayRef &Infos, + SmallVectorImpl &ArgTys) { + using namespace Intrinsic; + + // If we ran out of descriptors, there are too many arguments. + if (Infos.empty()) return true; + IITDescriptor D = Infos.front(); + Infos = Infos.slice(1); + + switch (D.Kind) { + case IITDescriptor::Void: return !Ty->isVoidTy(); + case IITDescriptor::MMX: return !Ty->isX86_MMXTy(); + case IITDescriptor::Metadata: return !Ty->isMetadataTy(); + case IITDescriptor::Float: return !Ty->isFloatTy(); + case IITDescriptor::Double: return !Ty->isDoubleTy(); + case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); + case IITDescriptor::Vector: { + VectorType *VT = dyn_cast(Ty); + return VT == 0 || VT->getNumElements() != D.Vector_Width || + VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys); + } + case IITDescriptor::Pointer: { + PointerType *PT = dyn_cast(Ty); + return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace || + VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys); + } + + case IITDescriptor::Struct: { + StructType *ST = dyn_cast(Ty); + if (ST == 0 || ST->getNumElements() != D.Struct_NumElements) + return true; + + for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) + if (VerifyIntrinsicType(ST->getElementType(i), Infos, ArgTys)) + return true; + return false; + } + + case IITDescriptor::Argument: + // Two cases here - If this is the second occurrence of an argument, verify + // that the later instance matches the previous instance. + if (D.getArgumentNumber() < ArgTys.size()) + return Ty != ArgTys[D.getArgumentNumber()]; + + // Otherwise, if this is the first instance of an argument, record it and + // verify the "Any" kind. + assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error"); + ArgTys.push_back(Ty); + + switch (D.getArgumentKind()) { + case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy(); + case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy(); + case IITDescriptor::AK_AnyVector: return !isa(Ty); + case IITDescriptor::AK_AnyPointer: return !isa(Ty); + } + llvm_unreachable("all argument kinds not covered"); + + case IITDescriptor::ExtendVecArgument: + // This may only be used when referring to a previous vector argument. + return D.getArgumentNumber() >= ArgTys.size() || + !isa(ArgTys[D.getArgumentNumber()]) || + VectorType::getExtendedElementVectorType( + cast(ArgTys[D.getArgumentNumber()])) != Ty; + + case IITDescriptor::TruncVecArgument: + // This may only be used when referring to a previous vector argument. + return D.getArgumentNumber() >= ArgTys.size() || + !isa(ArgTys[D.getArgumentNumber()]) || + VectorType::getTruncatedElementVectorType( + cast(ArgTys[D.getArgumentNumber()])) != Ty; + } + llvm_unreachable("unhandled"); +} /// visitIntrinsicFunction - Allow intrinsics to be verified in different ways. /// @@ -1685,10 +1768,30 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!", IF); -#define GET_INTRINSIC_VERIFIER -#include "llvm/Intrinsics.gen" -#undef GET_INTRINSIC_VERIFIER - + // Verify that the intrinsic prototype lines up with what the .td files + // describe. + FunctionType *IFTy = IF->getFunctionType(); + Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF); + + SmallVector Table; + getIntrinsicInfoTableEntries(ID, Table); + ArrayRef TableRef = Table; + + SmallVector ArgTys; + Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys), + "Intrinsic has incorrect return type!", IF); + for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i) + Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys), + "Intrinsic has incorrect argument type!", IF); + Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF); + + // Now that we have the intrinsic ID and the actual argument types (and we + // know they are legal for the intrinsic!) get the intrinsic name through the + // usual means. This allows us to verify the mangling of argument types into + // the name. + Assert1(Intrinsic::getName(ID, ArgTys) == IF->getName(), + "Intrinsic name not mangled correctly for type arguments!", IF); + // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i) @@ -1772,261 +1875,6 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { } } -/// Produce a string to identify an intrinsic parameter or return value. -/// The ArgNo value numbers the return values from 0 to NumRets-1 and the -/// parameters beginning with NumRets. -/// -static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) { - if (ArgNo >= NumRets) - return "Intrinsic parameter #" + utostr(ArgNo - NumRets); - if (NumRets == 1) - return "Intrinsic result type"; - return "Intrinsic result type #" + utostr(ArgNo); -} - -bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, - int VT, unsigned ArgNo, std::string &Suffix) { - FunctionType *FTy = F->getFunctionType(); - - unsigned NumElts = 0; - Type *EltTy = Ty; - VectorType *VTy = dyn_cast(Ty); - if (VTy) { - EltTy = VTy->getElementType(); - NumElts = VTy->getNumElements(); - } - - Type *RetTy = FTy->getReturnType(); - StructType *ST = dyn_cast(RetTy); - unsigned NumRetVals; - if (RetTy->isVoidTy()) - NumRetVals = 0; - else if (ST) - NumRetVals = ST->getNumElements(); - else - NumRetVals = 1; - - if (VT < 0) { - int Match = ~VT; - - // Check flags that indicate a type that is an integral vector type with - // elements that are larger or smaller than the elements of the matched - // type. - if ((Match & (ExtendedElementVectorType | - TruncatedElementVectorType)) != 0) { - IntegerType *IEltTy = dyn_cast(EltTy); - if (!VTy || !IEltTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " - "an integral vector type.", F); - return false; - } - // Adjust the current Ty (in the opposite direction) rather than - // the type being matched against. - if ((Match & ExtendedElementVectorType) != 0) { - if ((IEltTy->getBitWidth() & 1) != 0) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector " - "element bit-width is odd.", F); - return false; - } - Ty = VectorType::getTruncatedElementVectorType(VTy); - } else - Ty = VectorType::getExtendedElementVectorType(VTy); - Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType); - } - - if (Match <= static_cast(NumRetVals - 1)) { - if (ST) - RetTy = ST->getElementType(Match); - - if (Ty != RetTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not " - "match return type.", F); - return false; - } - } else { - if (Ty != FTy->getParamType(Match - NumRetVals)) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not " - "match parameter %" + utostr(Match - NumRetVals) + ".", F); - return false; - } - } - } else if (VT == MVT::iAny) { - if (!EltTy->isIntegerTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " - "an integer type.", F); - return false; - } - - unsigned GotBits = cast(EltTy)->getBitWidth(); - Suffix += "."; - - if (EltTy != Ty) - Suffix += "v" + utostr(NumElts); - - Suffix += "i" + utostr(GotBits); - - // Check some constraints on various intrinsics. - switch (ID) { - default: break; // Not everything needs to be checked. - case Intrinsic::bswap: - if (GotBits < 16 || GotBits % 16 != 0) { - CheckFailed("Intrinsic requires even byte width argument", F); - return false; - } - break; - } - } else if (VT == MVT::fAny) { - if (!EltTy->isFloatingPointTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not " - "a floating-point type.", F); - return false; - } - - Suffix += "."; - - if (EltTy != Ty) - Suffix += "v" + utostr(NumElts); - - Suffix += EVT::getEVT(EltTy).getEVTString(); - } else if (VT == MVT::vAny) { - if (!VTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.", - F); - return false; - } - Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString(); - } else if (VT == MVT::iPTR) { - if (!Ty->isPointerTy()) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a " - "pointer and a pointer is required.", F); - return false; - } - } else if (VT == MVT::iPTRAny) { - // Outside of TableGen, we don't distinguish iPTRAny (to any address space) - // and iPTR. In the verifier, we can not distinguish which case we have so - // allow either case to be legal. - if (PointerType* PTyp = dyn_cast(Ty)) { - EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true); - if (PointeeVT == MVT::Other) { - CheckFailed("Intrinsic has pointer to complex type."); - return false; - } - Suffix += ".p" + utostr(PTyp->getAddressSpace()) + - PointeeVT.getEVTString(); - } else { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a " - "pointer and a pointer is required.", F); - return false; - } - } else if (EVT((MVT::SimpleValueType)VT).isVector()) { - EVT VVT = EVT((MVT::SimpleValueType)VT); - - // If this is a vector argument, verify the number and type of elements. - if (VVT.getVectorElementType() != EVT::getEVT(EltTy)) { - CheckFailed("Intrinsic prototype has incorrect vector element type!", F); - return false; - } - - if (VVT.getVectorNumElements() != NumElts) { - CheckFailed("Intrinsic prototype has incorrect number of " - "vector elements!", F); - return false; - } - } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) != - EltTy) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F); - return false; - } else if (EltTy != Ty) { - CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector " - "and a scalar is required.", F); - return false; - } - - return true; -} - -/// VerifyIntrinsicPrototype - TableGen emits calls to this function into -/// Intrinsics.gen. This implements a little state machine that verifies the -/// prototype of intrinsics. -void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, - unsigned NumRetVals, - unsigned NumParams, ...) { - va_list VA; - va_start(VA, NumParams); - FunctionType *FTy = F->getFunctionType(); - - // For overloaded intrinsics, the Suffix of the function name must match the - // types of the arguments. This variable keeps track of the expected - // suffix, to be checked at the end. - std::string Suffix; - - if (FTy->getNumParams() + FTy->isVarArg() != NumParams) { - CheckFailed("Intrinsic prototype has incorrect number of arguments!", F); - return; - } - - Type *Ty = FTy->getReturnType(); - StructType *ST = dyn_cast(Ty); - - if (NumRetVals == 0 && !Ty->isVoidTy()) { - CheckFailed("Intrinsic should return void", F); - return; - } - - // Verify the return types. - if (ST && ST->getNumElements() != NumRetVals) { - CheckFailed("Intrinsic prototype has incorrect number of return types!", F); - return; - } - - for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) { - int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative. - - if (ST) Ty = ST->getElementType(ArgNo); - if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix)) - break; - } - - // Verify the parameter types. - for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) { - int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative. - - if (VT == MVT::isVoid && ArgNo > 0) { - if (!FTy->isVarArg()) - CheckFailed("Intrinsic prototype has no '...'!", F); - break; - } - - if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT, - ArgNo + NumRetVals, Suffix)) - break; - } - - va_end(VA); - - // For intrinsics without pointer arguments, if we computed a Suffix then the - // intrinsic is overloaded and we need to make sure that the name of the - // function is correct. We add the suffix to the name of the intrinsic and - // compare against the given function name. If they are not the same, the - // function name is invalid. This ensures that overloading of intrinsics - // uses a sane and consistent naming convention. Note that intrinsics with - // pointer argument may or may not be overloaded so we will check assuming it - // has a suffix and not. - if (!Suffix.empty()) { - std::string Name(Intrinsic::getName(ID)); - if (Name + Suffix != F->getName()) { - CheckFailed("Overloaded intrinsic has incorrect suffix: '" + - F->getName().substr(Name.length()) + "'. It should be '" + - Suffix + "'", F); - } - } - - // Check parameter attributes. - Assert1(F->getAttributes() == Intrinsic::getAttributes(ID), - "Intrinsic has wrong parameter attributes!", F); -} - - //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// -- cgit v1.1 From 554bcb69c2d785a011a30e7db87a36a87fe7db10 Mon Sep 17 00:00:00 2001 From: dim Date: Wed, 15 Aug 2012 20:02:54 +0000 Subject: Vendor import of clang trunk r161861: http://llvm.org/svn/llvm-project/cfe/trunk@161861 --- lib/ARCMigrate/ARCMT.cpp | 67 +- lib/ARCMigrate/CMakeLists.txt | 18 +- lib/ARCMigrate/FileRemapper.cpp | 4 +- lib/ARCMigrate/Internals.h | 4 + lib/ARCMigrate/ObjCMT.cpp | 2 + lib/ARCMigrate/TransAPIUses.cpp | 1 + lib/ARCMigrate/TransARCAssign.cpp | 1 + lib/ARCMigrate/TransAutoreleasePool.cpp | 3 +- lib/ARCMigrate/TransBlockObjCVariable.cpp | 19 +- lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp | 7 +- lib/ARCMigrate/TransGCAttrs.cpp | 9 +- lib/ARCMigrate/TransGCCalls.cpp | 9 +- lib/ARCMigrate/TransProperties.cpp | 11 +- lib/ARCMigrate/TransRetainReleaseDealloc.cpp | 99 +- lib/ARCMigrate/TransUnbridgedCasts.cpp | 85 +- lib/ARCMigrate/TransUnusedInitDelegate.cpp | 3 +- lib/ARCMigrate/TransZeroOutPropsInDealloc.cpp | 1 + lib/ARCMigrate/TransformActions.cpp | 1 + lib/ARCMigrate/Transforms.cpp | 59 +- lib/ARCMigrate/Transforms.h | 14 +- lib/AST/APValue.cpp | 6 +- lib/AST/ASTContext.cpp | 569 +++++- lib/AST/ASTDiagnostic.cpp | 934 +++++++++ lib/AST/ASTImporter.cpp | 117 +- lib/AST/CMakeLists.txt | 30 +- lib/AST/CXXABI.h | 2 +- lib/AST/CXXInheritance.cpp | 22 +- lib/AST/Comment.cpp | 264 +++ lib/AST/CommentBriefParser.cpp | 122 ++ lib/AST/CommentCommandTraits.cpp | 134 ++ lib/AST/CommentDumper.cpp | 231 +++ lib/AST/CommentLexer.cpp | 815 ++++++++ lib/AST/CommentParser.cpp | 722 +++++++ lib/AST/CommentSema.cpp | 739 +++++++ lib/AST/Decl.cpp | 346 ++-- lib/AST/DeclBase.cpp | 57 +- lib/AST/DeclCXX.cpp | 179 +- lib/AST/DeclFriend.cpp | 6 + lib/AST/DeclObjC.cpp | 56 +- lib/AST/DeclPrinter.cpp | 54 +- lib/AST/DeclTemplate.cpp | 18 +- lib/AST/DeclarationName.cpp | 55 +- lib/AST/DumpXML.cpp | 15 +- lib/AST/Expr.cpp | 522 ++++- lib/AST/ExprCXX.cpp | 67 +- lib/AST/ExprClassification.cpp | 25 +- lib/AST/ExprConstant.cpp | 346 ++-- lib/AST/ItaniumCXXABI.cpp | 2 +- lib/AST/ItaniumMangle.cpp | 38 +- lib/AST/LambdaMangleContext.cpp | 2 + lib/AST/Mangle.cpp | 29 +- lib/AST/MicrosoftCXXABI.cpp | 4 +- lib/AST/MicrosoftMangle.cpp | 822 ++++++-- lib/AST/NSAPI.cpp | 107 +- lib/AST/ParentMap.cpp | 13 +- lib/AST/RawCommentList.cpp | 271 +++ lib/AST/RecordLayout.cpp | 12 +- lib/AST/RecordLayoutBuilder.cpp | 307 ++- lib/AST/Stmt.cpp | 132 +- lib/AST/StmtDumper.cpp | 22 +- lib/AST/StmtPrinter.cpp | 116 +- lib/AST/StmtProfile.cpp | 9 +- lib/AST/TemplateBase.cpp | 33 +- lib/AST/Type.cpp | 51 +- lib/AST/TypeLoc.cpp | 1 + lib/AST/TypePrinter.cpp | 1453 +++++++++----- lib/AST/VTTBuilder.cpp | 1 + lib/AST/VTableBuilder.cpp | 17 +- lib/ASTMatchers/ASTMatchFinder.cpp | 547 ++++++ lib/ASTMatchers/ASTMatchersInternal.cpp | 102 + lib/ASTMatchers/CMakeLists.txt | 17 + lib/ASTMatchers/Makefile | 13 + lib/Analysis/AnalysisDeclContext.cpp | 63 +- lib/Analysis/CFG.cpp | 500 +++-- lib/Analysis/CMakeLists.txt | 18 +- lib/Analysis/CallGraph.cpp | 7 +- lib/Analysis/CocoaConventions.cpp | 2 + lib/Analysis/FormatString.cpp | 138 +- lib/Analysis/LiveVariables.cpp | 19 + lib/Analysis/PrintfFormatString.cpp | 122 +- lib/Analysis/ProgramPoint.cpp | 6 +- lib/Analysis/PseudoConstantAnalysis.cpp | 1 + lib/Analysis/ScanfFormatString.cpp | 193 +- lib/Analysis/ThreadSafety.cpp | 1641 +++++++++++----- lib/Analysis/UninitializedValues.cpp | 793 ++++---- lib/Basic/CMakeLists.txt | 33 +- lib/Basic/ConvertUTF.c | 3 +- lib/Basic/ConvertUTFWrapper.cpp | 70 + lib/Basic/Diagnostic.cpp | 129 +- lib/Basic/DiagnosticIDs.cpp | 32 +- lib/Basic/FileManager.cpp | 61 +- lib/Basic/IdentifierTable.cpp | 29 +- lib/Basic/ObjCRuntime.cpp | 86 + lib/Basic/SourceManager.cpp | 96 +- lib/Basic/TargetInfo.cpp | 1 + lib/Basic/Targets.cpp | 588 ++++-- lib/Basic/Version.cpp | 5 +- lib/Basic/VersionTuple.cpp | 52 + lib/CMakeLists.txt | 1 + lib/CodeGen/ABIInfo.h | 32 +- lib/CodeGen/BackendUtil.cpp | 32 +- lib/CodeGen/CGBlocks.cpp | 41 +- lib/CodeGen/CGBuilder.h | 2 +- lib/CodeGen/CGBuiltin.cpp | 2040 +------------------- lib/CodeGen/CGCXX.cpp | 4 +- lib/CodeGen/CGCXXABI.cpp | 71 +- lib/CodeGen/CGCXXABI.h | 51 +- lib/CodeGen/CGCall.cpp | 198 +- lib/CodeGen/CGClass.cpp | 202 +- lib/CodeGen/CGCleanup.cpp | 8 +- lib/CodeGen/CGCleanup.h | 2 +- lib/CodeGen/CGDebugInfo.cpp | 228 ++- lib/CodeGen/CGDebugInfo.h | 5 +- lib/CodeGen/CGDecl.cpp | 48 +- lib/CodeGen/CGDeclCXX.cpp | 80 +- lib/CodeGen/CGException.cpp | 80 +- lib/CodeGen/CGExpr.cpp | 239 ++- lib/CodeGen/CGExprAgg.cpp | 253 ++- lib/CodeGen/CGExprCXX.cpp | 136 +- lib/CodeGen/CGExprConstant.cpp | 18 +- lib/CodeGen/CGExprScalar.cpp | 30 +- lib/CodeGen/CGObjC.cpp | 192 +- lib/CodeGen/CGObjCGNU.cpp | 97 +- lib/CodeGen/CGObjCMac.cpp | 75 +- lib/CodeGen/CGObjCRuntime.cpp | 17 +- lib/CodeGen/CGObjCRuntime.h | 20 +- lib/CodeGen/CGRTTI.cpp | 3 +- lib/CodeGen/CGRecordLayout.h | 7 +- lib/CodeGen/CGRecordLayoutBuilder.cpp | 16 +- lib/CodeGen/CGStmt.cpp | 94 +- lib/CodeGen/CGVTables.cpp | 23 +- lib/CodeGen/CGValue.h | 9 +- lib/CodeGen/CMakeLists.txt | 20 +- lib/CodeGen/CodeGenFunction.cpp | 136 +- lib/CodeGen/CodeGenFunction.h | 47 +- lib/CodeGen/CodeGenModule.cpp | 146 +- lib/CodeGen/CodeGenModule.h | 15 +- lib/CodeGen/CodeGenTBAA.cpp | 8 +- lib/CodeGen/CodeGenTBAA.h | 6 +- lib/CodeGen/CodeGenTypes.cpp | 4 +- lib/CodeGen/CodeGenTypes.h | 36 +- lib/CodeGen/ItaniumCXXABI.cpp | 257 ++- lib/CodeGen/MicrosoftCXXABI.cpp | 98 +- lib/CodeGen/TargetInfo.cpp | 383 ++-- lib/Driver/ArgList.cpp | 62 + lib/Driver/CC1Options.cpp | 38 - lib/Driver/CMakeLists.txt | 22 +- lib/Driver/Compilation.cpp | 2 +- lib/Driver/Driver.cpp | 189 +- lib/Driver/OptTable.cpp | 2 + lib/Driver/ToolChain.cpp | 27 +- lib/Driver/ToolChains.cpp | 338 ++-- lib/Driver/ToolChains.h | 109 +- lib/Driver/Tools.cpp | 989 +++++++--- lib/Driver/Tools.h | 38 + lib/Driver/Types.cpp | 4 +- lib/Edit/CMakeLists.txt | 17 +- lib/Edit/Commit.cpp | 1 + lib/Edit/EditedSource.cpp | 5 +- lib/Edit/RewriteObjCFoundationAPI.cpp | 506 ++++- lib/Frontend/ASTConsumers.cpp | 99 +- lib/Frontend/ASTUnit.cpp | 224 ++- lib/Frontend/CMakeLists.txt | 45 +- lib/Frontend/CompilerInstance.cpp | 23 +- lib/Frontend/CompilerInvocation.cpp | 326 +++- lib/Frontend/CreateInvocationFromCommandLine.cpp | 8 +- lib/Frontend/DiagnosticRenderer.cpp | 127 +- lib/Frontend/FrontendAction.cpp | 62 +- lib/Frontend/FrontendActions.cpp | 11 +- lib/Frontend/InitHeaderSearch.cpp | 15 + lib/Frontend/InitPreprocessor.cpp | 72 +- lib/Frontend/LayoutOverrideSource.cpp | 1 + lib/Frontend/PrintPreprocessedOutput.cpp | 66 +- lib/Frontend/SerializedDiagnosticPrinter.cpp | 40 +- lib/Frontend/TextDiagnostic.cpp | 217 ++- lib/Frontend/TextDiagnosticPrinter.cpp | 30 +- lib/Frontend/VerifyDiagnosticConsumer.cpp | 471 +++-- lib/Frontend/Warnings.cpp | 14 +- lib/FrontendTool/CMakeLists.txt | 21 +- lib/FrontendTool/ExecuteCompilerInvocation.cpp | 12 +- lib/Headers/CMakeLists.txt | 3 + lib/Headers/ammintrin.h | 68 + lib/Headers/avx2intrin.h | 240 +++ lib/Headers/bmiintrin.h | 6 +- lib/Headers/emmintrin.h | 5 +- lib/Headers/float.h | 2 +- lib/Headers/fmaintrin.h | 229 +++ lib/Headers/immintrin.h | 26 + lib/Headers/stddef.h | 16 +- lib/Headers/wmmintrin.h | 18 +- lib/Headers/x86intrin.h | 10 +- lib/Headers/xopintrin.h | 411 ++++ lib/Lex/CMakeLists.txt | 12 +- lib/Lex/HeaderSearch.cpp | 42 +- lib/Lex/Lexer.cpp | 91 +- lib/Lex/LiteralSupport.cpp | 154 +- lib/Lex/PPDirectives.cpp | 183 +- lib/Lex/PPLexerChange.cpp | 2 +- lib/Lex/PPMacroExpansion.cpp | 46 +- lib/Lex/Pragma.cpp | 78 +- lib/Lex/PreprocessingRecord.cpp | 4 +- lib/Lex/Preprocessor.cpp | 127 +- lib/Lex/PreprocessorLexer.cpp | 2 +- lib/Lex/TokenConcatenation.cpp | 1 + lib/Lex/TokenLexer.cpp | 12 +- lib/Makefile | 2 +- lib/Parse/CMakeLists.txt | 21 +- lib/Parse/ParseAST.cpp | 45 +- lib/Parse/ParseCXXInlineMethods.cpp | 8 +- lib/Parse/ParseDecl.cpp | 838 +++++--- lib/Parse/ParseDeclCXX.cpp | 327 ++-- lib/Parse/ParseExpr.cpp | 158 +- lib/Parse/ParseExprCXX.cpp | 77 +- lib/Parse/ParseObjc.cpp | 242 ++- lib/Parse/ParsePragma.h | 21 +- lib/Parse/ParseStmt.cpp | 127 +- lib/Parse/ParseTemplate.cpp | 135 +- lib/Parse/ParseTentative.cpp | 93 +- lib/Parse/Parser.cpp | 182 +- lib/Parse/RAIIObjectsForParser.h | 298 ++- lib/Rewrite/CMakeLists.txt | 17 +- lib/Rewrite/FrontendActions.cpp | 11 +- lib/Rewrite/HTMLRewrite.cpp | 11 +- lib/Rewrite/InclusionRewriter.cpp | 361 ++++ lib/Rewrite/RewriteModernObjC.cpp | 759 +++++--- lib/Rewrite/RewriteObjC.cpp | 105 +- lib/Rewrite/Rewriter.cpp | 76 +- lib/Sema/AnalysisBasedWarnings.cpp | 502 ++++- lib/Sema/AttributeList.cpp | 39 +- lib/Sema/CMakeLists.txt | 39 +- lib/Sema/CodeCompleteConsumer.cpp | 27 +- lib/Sema/DeclSpec.cpp | 39 +- lib/Sema/Sema.cpp | 177 +- lib/Sema/SemaAccess.cpp | 83 +- lib/Sema/SemaCXXScopeSpec.cpp | 12 +- lib/Sema/SemaCast.cpp | 30 +- lib/Sema/SemaChecking.cpp | 931 ++++++--- lib/Sema/SemaCodeComplete.cpp | 210 +- lib/Sema/SemaDecl.cpp | 911 ++++++--- lib/Sema/SemaDeclAttr.cpp | 1637 ++++++++++------ lib/Sema/SemaDeclCXX.cpp | 1738 ++++++++--------- lib/Sema/SemaDeclObjC.cpp | 311 +-- lib/Sema/SemaExceptionSpec.cpp | 41 +- lib/Sema/SemaExpr.cpp | 1155 +++++++---- lib/Sema/SemaExprCXX.cpp | 410 +++- lib/Sema/SemaExprMember.cpp | 95 +- lib/Sema/SemaExprObjC.cpp | 925 +++++---- lib/Sema/SemaFixItUtils.cpp | 71 +- lib/Sema/SemaInit.cpp | 372 ++-- lib/Sema/SemaLambda.cpp | 316 ++- lib/Sema/SemaLookup.cpp | 198 +- lib/Sema/SemaObjCProperty.cpp | 375 +++- lib/Sema/SemaOverload.cpp | 456 +++-- lib/Sema/SemaPseudoObject.cpp | 60 +- lib/Sema/SemaStmt.cpp | 735 ++++++- lib/Sema/SemaStmtAttr.cpp | 36 +- lib/Sema/SemaTemplate.cpp | 238 ++- lib/Sema/SemaTemplateDeduction.cpp | 124 +- lib/Sema/SemaTemplateInstantiate.cpp | 266 +-- lib/Sema/SemaTemplateInstantiateDecl.cpp | 162 +- lib/Sema/SemaTemplateVariadic.cpp | 113 +- lib/Sema/SemaType.cpp | 568 +++--- lib/Sema/TargetAttributesSema.cpp | 63 +- lib/Sema/TreeTransform.h | 1264 ++++++------ lib/Serialization/ASTCommon.h | 2 + lib/Serialization/ASTReader.cpp | 150 +- lib/Serialization/ASTReaderDecl.cpp | 51 +- lib/Serialization/ASTReaderStmt.cpp | 32 +- lib/Serialization/ASTWriter.cpp | 82 +- lib/Serialization/ASTWriterDecl.cpp | 26 +- lib/Serialization/ASTWriterStmt.cpp | 19 +- lib/Serialization/CMakeLists.txt | 14 +- lib/StaticAnalyzer/Checkers/AttrNonNullChecker.cpp | 46 +- .../Checkers/BasicObjCFoundationChecks.cpp | 228 ++- lib/StaticAnalyzer/Checkers/CMakeLists.txt | 14 +- lib/StaticAnalyzer/Checkers/CStringChecker.cpp | 28 +- .../Checkers/CallAndMessageChecker.cpp | 304 +-- lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp | 8 +- .../Checkers/CheckSecuritySyntaxOnly.cpp | 1 + .../Checkers/CheckerDocumentation.cpp | 63 +- lib/StaticAnalyzer/Checkers/Checkers.td | 40 +- lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp | 265 ++- lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp | 8 +- .../Checkers/DynamicTypePropagation.cpp | 179 ++ .../Checkers/ExprInspectionChecker.cpp | 122 ++ .../Checkers/GenericTaintChecker.cpp | 15 +- .../Checkers/IdempotentOperationChecker.cpp | 5 +- lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp | 603 ------ .../Checkers/MacOSKeychainAPIChecker.cpp | 20 +- lib/StaticAnalyzer/Checkers/MallocChecker.cpp | 626 +++--- .../Checkers/MallocSizeofChecker.cpp | 37 +- .../Checkers/NSAutoreleasePoolChecker.cpp | 23 +- .../Checkers/NoReturnFunctionChecker.cpp | 6 +- lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp | 6 +- .../Checkers/ObjCContainersChecker.cpp | 1 - .../Checkers/ObjCSelfInitChecker.cpp | 102 +- .../Checkers/ObjCUnusedIVarsChecker.cpp | 9 + lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp | 739 +++---- lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp | 4 +- lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 2 +- lib/StaticAnalyzer/Checkers/TraversalChecker.cpp | 84 + lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp | 3 +- .../Checkers/UndefCapturedBlockVarChecker.cpp | 1 + lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp | 8 +- .../Checkers/UndefinedArraySubscriptChecker.cpp | 4 +- .../Checkers/UndefinedAssignmentChecker.cpp | 3 +- lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp | 13 +- lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp | 3 +- lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp | 2 +- lib/StaticAnalyzer/Core/APSIntType.cpp | 38 + lib/StaticAnalyzer/Core/AnalysisManager.cpp | 7 +- lib/StaticAnalyzer/Core/BasicConstraintManager.cpp | 241 ++- lib/StaticAnalyzer/Core/BasicValueFactory.cpp | 1 + lib/StaticAnalyzer/Core/BugReporter.cpp | 179 +- lib/StaticAnalyzer/Core/BugReporterVisitors.cpp | 86 +- lib/StaticAnalyzer/Core/CMakeLists.txt | 23 +- lib/StaticAnalyzer/Core/CallEvent.cpp | 856 ++++++++ lib/StaticAnalyzer/Core/CheckerManager.cpp | 133 +- lib/StaticAnalyzer/Core/CoreEngine.cpp | 29 +- lib/StaticAnalyzer/Core/Environment.cpp | 17 +- lib/StaticAnalyzer/Core/ExplodedGraph.cpp | 18 +- lib/StaticAnalyzer/Core/ExprEngine.cpp | 538 +++--- lib/StaticAnalyzer/Core/ExprEngineC.cpp | 270 ++- lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 366 ++-- .../Core/ExprEngineCallAndReturn.cpp | 822 ++++---- lib/StaticAnalyzer/Core/ExprEngineObjC.cpp | 211 +- lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp | 84 +- lib/StaticAnalyzer/Core/MemRegion.cpp | 274 ++- lib/StaticAnalyzer/Core/ObjCMessage.cpp | 90 - lib/StaticAnalyzer/Core/PathDiagnostic.cpp | 204 +- lib/StaticAnalyzer/Core/PlistDiagnostics.cpp | 31 +- lib/StaticAnalyzer/Core/ProgramState.cpp | 99 +- lib/StaticAnalyzer/Core/RangeConstraintManager.cpp | 275 ++- lib/StaticAnalyzer/Core/RegionStore.cpp | 949 ++++----- lib/StaticAnalyzer/Core/SValBuilder.cpp | 76 +- lib/StaticAnalyzer/Core/SVals.cpp | 20 +- .../Core/SimpleConstraintManager.cpp | 103 +- lib/StaticAnalyzer/Core/SimpleConstraintManager.h | 6 +- lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp | 338 ++-- lib/StaticAnalyzer/Core/Store.cpp | 34 +- lib/StaticAnalyzer/Core/SymbolManager.cpp | 14 + lib/StaticAnalyzer/Core/TextPathDiagnostics.cpp | 5 +- lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp | 71 +- lib/StaticAnalyzer/Frontend/CMakeLists.txt | 15 +- .../Frontend/CheckerRegistration.cpp | 2 +- lib/Tooling/ArgumentsAdjusters.cpp | 34 + lib/Tooling/CMakeLists.txt | 22 +- lib/Tooling/CommandLineClangTool.cpp | 80 + lib/Tooling/CompilationDatabase.cpp | 106 +- lib/Tooling/CustomCompilationDatabase.h | 42 + lib/Tooling/Refactoring.cpp | 186 ++ lib/Tooling/RefactoringCallbacks.cpp | 81 + lib/Tooling/Tooling.cpp | 136 +- 353 files changed, 38404 insertions(+), 18662 deletions(-) create mode 100644 lib/AST/Comment.cpp create mode 100644 lib/AST/CommentBriefParser.cpp create mode 100644 lib/AST/CommentCommandTraits.cpp create mode 100644 lib/AST/CommentDumper.cpp create mode 100644 lib/AST/CommentLexer.cpp create mode 100644 lib/AST/CommentParser.cpp create mode 100644 lib/AST/CommentSema.cpp create mode 100644 lib/AST/RawCommentList.cpp create mode 100644 lib/ASTMatchers/ASTMatchFinder.cpp create mode 100644 lib/ASTMatchers/ASTMatchersInternal.cpp create mode 100644 lib/ASTMatchers/CMakeLists.txt create mode 100644 lib/ASTMatchers/Makefile create mode 100644 lib/Basic/ConvertUTFWrapper.cpp create mode 100644 lib/Basic/ObjCRuntime.cpp delete mode 100644 lib/Driver/CC1Options.cpp create mode 100644 lib/Headers/ammintrin.h create mode 100644 lib/Headers/fmaintrin.h create mode 100644 lib/Headers/xopintrin.h create mode 100644 lib/Rewrite/InclusionRewriter.cpp create mode 100644 lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp create mode 100644 lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp delete mode 100644 lib/StaticAnalyzer/Checkers/IteratorsChecker.cpp create mode 100644 lib/StaticAnalyzer/Checkers/TraversalChecker.cpp create mode 100644 lib/StaticAnalyzer/Core/APSIntType.cpp create mode 100644 lib/StaticAnalyzer/Core/CallEvent.cpp delete mode 100644 lib/StaticAnalyzer/Core/ObjCMessage.cpp create mode 100644 lib/Tooling/ArgumentsAdjusters.cpp create mode 100644 lib/Tooling/CommandLineClangTool.cpp create mode 100644 lib/Tooling/CustomCompilationDatabase.h create mode 100644 lib/Tooling/Refactoring.cpp create mode 100644 lib/Tooling/RefactoringCallbacks.cpp (limited to 'lib') diff --git a/lib/ARCMigrate/ARCMT.cpp b/lib/ARCMigrate/ARCMT.cpp index 9354dc3..f291dec 100644 --- a/lib/ARCMigrate/ARCMT.cpp +++ b/lib/ARCMigrate/ARCMT.cpp @@ -91,11 +91,40 @@ namespace { class CaptureDiagnosticConsumer : public DiagnosticConsumer { DiagnosticsEngine &Diags; + DiagnosticConsumer &DiagClient; CapturedDiagList &CapturedDiags; + bool HasBegunSourceFile; public: CaptureDiagnosticConsumer(DiagnosticsEngine &diags, - CapturedDiagList &capturedDiags) - : Diags(diags), CapturedDiags(capturedDiags) { } + DiagnosticConsumer &client, + CapturedDiagList &capturedDiags) + : Diags(diags), DiagClient(client), CapturedDiags(capturedDiags), + HasBegunSourceFile(false) { } + + virtual void BeginSourceFile(const LangOptions &Opts, + const Preprocessor *PP) { + // Pass BeginSourceFile message onto DiagClient on first call. + // The corresponding EndSourceFile call will be made from an + // explicit call to FinishCapture. + if (!HasBegunSourceFile) { + DiagClient.BeginSourceFile(Opts, PP); + HasBegunSourceFile = true; + } + } + + void FinishCapture() { + // Call EndSourceFile on DiagClient on completion of capture to + // enable VerifyDiagnosticConsumer to check diagnostics *after* + // it has received the diagnostic list. + if (HasBegunSourceFile) { + DiagClient.EndSourceFile(); + HasBegunSourceFile = false; + } + } + + virtual ~CaptureDiagnosticConsumer() { + assert(!HasBegunSourceFile && "FinishCapture not called!"); + } virtual void HandleDiagnostic(DiagnosticsEngine::Level level, const Diagnostic &Info) { @@ -195,8 +224,19 @@ createInvocationForMigration(CompilerInvocation &origCI) { CInvok->getLangOpts()->ObjCAutoRefCount = true; CInvok->getLangOpts()->setGC(LangOptions::NonGC); CInvok->getDiagnosticOpts().ErrorLimit = 0; - CInvok->getDiagnosticOpts().Warnings.push_back( - "error=arc-unsafe-retained-assign"); + CInvok->getDiagnosticOpts().PedanticErrors = 0; + + // Ignore -Werror flags when migrating. + std::vector WarnOpts; + for (std::vector::iterator + I = CInvok->getDiagnosticOpts().Warnings.begin(), + E = CInvok->getDiagnosticOpts().Warnings.end(); I != E; ++I) { + if (!StringRef(*I).startswith("error")) + WarnOpts.push_back(*I); + } + WarnOpts.push_back("error=arc-unsafe-retained-assign"); + CInvok->getDiagnosticOpts().Warnings = llvm_move(WarnOpts); + CInvok->getLangOpts()->ObjCRuntimeHasWeak = HasARCRuntime(origCI); return CInvok.take(); @@ -249,13 +289,15 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI, new DiagnosticsEngine(DiagID, DiagClient, /*ShouldOwnClient=*/false)); // Filter of all diagnostics. - CaptureDiagnosticConsumer errRec(*Diags, capturedDiags); + CaptureDiagnosticConsumer errRec(*Diags, *DiagClient, capturedDiags); Diags->setClient(&errRec, /*ShouldOwnClient=*/false); OwningPtr Unit( ASTUnit::LoadFromCompilerInvocationAction(CInvok.take(), Diags)); - if (!Unit) + if (!Unit) { + errRec.FinishCapture(); return true; + } // Don't filter diagnostics anymore. Diags->setClient(DiagClient, /*ShouldOwnClient=*/false); @@ -267,6 +309,7 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI, DiagClient->BeginSourceFile(Ctx.getLangOpts(), &Unit->getPreprocessor()); capturedDiags.reportDiagnostics(*Diags); DiagClient->EndSourceFile(); + errRec.FinishCapture(); return true; } @@ -304,6 +347,7 @@ bool arcmt::checkForManualIssues(CompilerInvocation &origCI, capturedDiags.reportDiagnostics(*Diags); DiagClient->EndSourceFile(); + errRec.FinishCapture(); // If we are migrating code that gets the '-fobjc-arc' flag, make sure // to remove it so that we don't get errors from normal compilation. @@ -480,13 +524,12 @@ public: class RewritesApplicator : public TransformActions::RewriteReceiver { Rewriter &rewriter; - ASTContext &Ctx; MigrationProcess::RewriteListener *Listener; public: RewritesApplicator(Rewriter &rewriter, ASTContext &ctx, MigrationProcess::RewriteListener *listener) - : rewriter(rewriter), Ctx(ctx), Listener(listener) { + : rewriter(rewriter), Listener(listener) { if (Listener) Listener->start(ctx); } @@ -553,7 +596,7 @@ bool MigrationProcess::applyTransform(TransformFn trans, new DiagnosticsEngine(DiagID, DiagClient, /*ShouldOwnClient=*/false)); // Filter of all diagnostics. - CaptureDiagnosticConsumer errRec(*Diags, capturedDiags); + CaptureDiagnosticConsumer errRec(*Diags, *DiagClient, capturedDiags); Diags->setClient(&errRec, /*ShouldOwnClient=*/false); OwningPtr ASTAction; @@ -562,8 +605,10 @@ bool MigrationProcess::applyTransform(TransformFn trans, OwningPtr Unit( ASTUnit::LoadFromCompilerInvocationAction(CInvok.take(), Diags, ASTAction.get())); - if (!Unit) + if (!Unit) { + errRec.FinishCapture(); return true; + } Unit->setOwnsRemappedFileBuffers(false); // FileRemapper manages that. // Don't filter diagnostics anymore. @@ -576,6 +621,7 @@ bool MigrationProcess::applyTransform(TransformFn trans, DiagClient->BeginSourceFile(Ctx.getLangOpts(), &Unit->getPreprocessor()); capturedDiags.reportDiagnostics(*Diags); DiagClient->EndSourceFile(); + errRec.FinishCapture(); return true; } @@ -599,6 +645,7 @@ bool MigrationProcess::applyTransform(TransformFn trans, } DiagClient->EndSourceFile(); + errRec.FinishCapture(); if (DiagClient->getNumErrors()) return true; diff --git a/lib/ARCMigrate/CMakeLists.txt b/lib/ARCMigrate/CMakeLists.txt index fcb7f72..f602fc8 100644 --- a/lib/ARCMigrate/CMakeLists.txt +++ b/lib/ARCMigrate/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LLVM_USED_LIBS clangBasic clangAST clangParse clangFrontend clangRewrite) - add_clang_library(clangARCMigrate ARCMT.cpp ARCMTActions.cpp @@ -25,5 +23,19 @@ add_clang_library(clangARCMigrate add_dependencies(clangARCMigrate ClangAttrClasses ClangAttrList + ClangAttrParsedAttrList + ClangCommentNodes ClangDeclNodes - ClangStmtNodes) + ClangDiagnosticCommon + ClangDiagnosticGroups + ClangDiagnosticSema + ClangStmtNodes + ) + +target_link_libraries(clangARCMigrate + clangBasic + clangAST + clangParse + clangFrontend + clangRewrite + ) diff --git a/lib/ARCMigrate/FileRemapper.cpp b/lib/ARCMigrate/FileRemapper.cpp index 474ce7d..e9b49b3 100644 --- a/lib/ARCMigrate/FileRemapper.cpp +++ b/lib/ARCMigrate/FileRemapper.cpp @@ -77,7 +77,9 @@ bool FileRemapper::initFromFile(StringRef filePath, DiagnosticsEngine &Diag, for (unsigned idx = 0; idx+3 <= lines.size(); idx += 3) { StringRef fromFilename = lines[idx]; unsigned long long timeModified; - lines[idx+1].getAsInteger(10, timeModified); + if (lines[idx+1].getAsInteger(10, timeModified)) + return report("Invalid file data: '" + lines[idx+1] + "' not a number", + Diag); StringRef toFilename = lines[idx+2]; const FileEntry *origFE = FileMgr->getFile(fromFilename); diff --git a/lib/ARCMigrate/Internals.h b/lib/ARCMigrate/Internals.h index 59177c4..935fc9b 100644 --- a/lib/ARCMigrate/Internals.h +++ b/lib/ARCMigrate/Internals.h @@ -12,6 +12,7 @@ #include "clang/ARCMigrate/ARCMT.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" namespace clang { class Sema; @@ -144,6 +145,7 @@ public: Sema &SemaRef; TransformActions &TA; std::vector &ARCMTMacroLocs; + llvm::Optional EnableCFBridgeFns; MigrationPass(ASTContext &Ctx, LangOptions::GCMode OrigGCMode, Sema &sema, TransformActions &TA, @@ -157,6 +159,8 @@ public: void setNSAllocReallocError(bool val) { MigOptions.NoNSAllocReallocError = val; } bool noFinalizeRemoval() const { return MigOptions.NoFinalizeRemoval; } void setNoFinalizeRemoval(bool val) {MigOptions.NoFinalizeRemoval = val; } + + bool CFBridgingFunctionsDefined(); }; static inline StringRef getARCMTMacroName() { diff --git a/lib/ARCMigrate/ObjCMT.cpp b/lib/ARCMigrate/ObjCMT.cpp index e635274..0098f97 100644 --- a/lib/ARCMigrate/ObjCMT.cpp +++ b/lib/ARCMigrate/ObjCMT.cpp @@ -10,6 +10,7 @@ #include "clang/ARCMigrate/ARCMTActions.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/NSAPI.h" #include "clang/AST/ASTConsumer.h" @@ -209,6 +210,7 @@ void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) { } bool MigrateSourceAction::BeginInvocation(CompilerInstance &CI) { + CI.getDiagnostics().setIgnoreAllWarnings(true); CI.getPreprocessorOpts().DetailedRecord = true; CI.getPreprocessorOpts().DetailedRecordConditionalDirectives = true; return true; diff --git a/lib/ARCMigrate/TransAPIUses.cpp b/lib/ARCMigrate/TransAPIUses.cpp index aaa82d8..5336f85 100644 --- a/lib/ARCMigrate/TransAPIUses.cpp +++ b/lib/ARCMigrate/TransAPIUses.cpp @@ -19,6 +19,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Sema/SemaDiagnostic.h" using namespace clang; diff --git a/lib/ARCMigrate/TransARCAssign.cpp b/lib/ARCMigrate/TransARCAssign.cpp index cfa6da1..b83f85a 100644 --- a/lib/ARCMigrate/TransARCAssign.cpp +++ b/lib/ARCMigrate/TransARCAssign.cpp @@ -23,6 +23,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Sema/SemaDiagnostic.h" using namespace clang; diff --git a/lib/ARCMigrate/TransAutoreleasePool.cpp b/lib/ARCMigrate/TransAutoreleasePool.cpp index 8787724..5205ce4 100644 --- a/lib/ARCMigrate/TransAutoreleasePool.cpp +++ b/lib/ARCMigrate/TransAutoreleasePool.cpp @@ -29,6 +29,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Basic/SourceManager.h" #include @@ -75,7 +76,7 @@ public: &pass.Ctx.Idents.get("drain")); } - void transformBody(Stmt *body) { + void transformBody(Stmt *body, Decl *ParentD) { Body = body; TraverseStmt(body); } diff --git a/lib/ARCMigrate/TransBlockObjCVariable.cpp b/lib/ARCMigrate/TransBlockObjCVariable.cpp index 3be8132..2a79c9a 100644 --- a/lib/ARCMigrate/TransBlockObjCVariable.cpp +++ b/lib/ARCMigrate/TransBlockObjCVariable.cpp @@ -9,7 +9,7 @@ // // rewriteBlockObjCVariable: // -// Adding __block to an obj-c variable could be either because the the variable +// Adding __block to an obj-c variable could be either because the variable // is used for output storage or the user wanted to break a retain cycle. // This transformation checks whether a reference of the variable for the block // is actually needed (it is assigned to or its address is taken) or not. @@ -27,6 +27,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" using namespace clang; @@ -37,7 +38,6 @@ namespace { class RootBlockObjCVarRewriter : public RecursiveASTVisitor { - MigrationPass &Pass; llvm::DenseSet &VarsToChange; class BlockVarChecker : public RecursiveASTVisitor { @@ -71,9 +71,8 @@ class RootBlockObjCVarRewriter : }; public: - RootBlockObjCVarRewriter(MigrationPass &pass, - llvm::DenseSet &VarsToChange) - : Pass(pass), VarsToChange(VarsToChange) { } + RootBlockObjCVarRewriter(llvm::DenseSet &VarsToChange) + : VarsToChange(VarsToChange) { } bool VisitBlockDecl(BlockDecl *block) { SmallVector BlockVars; @@ -111,16 +110,14 @@ private: }; class BlockObjCVarRewriter : public RecursiveASTVisitor { - MigrationPass &Pass; llvm::DenseSet &VarsToChange; public: - BlockObjCVarRewriter(MigrationPass &pass, - llvm::DenseSet &VarsToChange) - : Pass(pass), VarsToChange(VarsToChange) { } + BlockObjCVarRewriter(llvm::DenseSet &VarsToChange) + : VarsToChange(VarsToChange) { } bool TraverseBlockDecl(BlockDecl *block) { - RootBlockObjCVarRewriter(Pass, VarsToChange).TraverseDecl(block); + RootBlockObjCVarRewriter(VarsToChange).TraverseDecl(block); return true; } }; @@ -131,7 +128,7 @@ void BlockObjCVariableTraverser::traverseBody(BodyContext &BodyCtx) { MigrationPass &Pass = BodyCtx.getMigrationContext().Pass; llvm::DenseSet VarsToChange; - BlockObjCVarRewriter trans(Pass, VarsToChange); + BlockObjCVarRewriter trans(VarsToChange); trans.TraverseStmt(BodyCtx.getTopStmt()); for (llvm::DenseSet::iterator diff --git a/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp b/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp index 0fb7141..552cb2f 100644 --- a/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp +++ b/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp @@ -21,6 +21,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/SourceManager.h" @@ -44,7 +45,7 @@ static bool isEmptyARCMTMacroStatement(NullStmt *S, SourceManager &SM = Ctx.getSourceManager(); std::vector::iterator I = std::upper_bound(MacroLocs.begin(), MacroLocs.end(), SemiLoc, - SourceManager::LocBeforeThanCompare(SM)); + BeforeThanCompare(SM)); --I; SourceLocation AfterMacroLoc = I->getLocWithOffset(getARCMTMacroName().size()); @@ -210,8 +211,8 @@ static void cleanupDeallocOrFinalize(MigrationPass &pass) { ObjCMethodDecl *DeallocM = 0; ObjCMethodDecl *FinalizeM = 0; for (ObjCImplementationDecl::instmeth_iterator - MI = (*I)->instmeth_begin(), - ME = (*I)->instmeth_end(); MI != ME; ++MI) { + MI = I->instmeth_begin(), + ME = I->instmeth_end(); MI != ME; ++MI) { ObjCMethodDecl *MD = *MI; if (!MD->hasBody()) continue; diff --git a/lib/ARCMigrate/TransGCAttrs.cpp b/lib/ARCMigrate/TransGCAttrs.cpp index 9f6066e..eec7306 100644 --- a/lib/ARCMigrate/TransGCAttrs.cpp +++ b/lib/ARCMigrate/TransGCAttrs.cpp @@ -9,12 +9,13 @@ #include "Transforms.h" #include "Internals.h" -#include "clang/Lex/Lexer.h" +#include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" -#include "llvm/Support/SaveAndRestore.h" +#include "clang/Lex/Lexer.h" #include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; using namespace arcmt; @@ -136,7 +137,7 @@ public: if (CXXRecordDecl *RD = dyn_cast(D)) { for (CXXRecordDecl::method_iterator MI = RD->method_begin(), ME = RD->method_end(); MI != ME; ++MI) { - if ((*MI)->isOutOfLine()) + if (MI->isOutOfLine()) return true; } return false; @@ -166,7 +167,7 @@ public: for (Decl::redecl_iterator I = D->redecls_begin(), E = D->redecls_end(); I != E; ++I) - if (!isInMainFile((*I)->getLocation())) + if (!isInMainFile(I->getLocation())) return false; return true; diff --git a/lib/ARCMigrate/TransGCCalls.cpp b/lib/ARCMigrate/TransGCCalls.cpp index 1be9020..2ec480c 100644 --- a/lib/ARCMigrate/TransGCCalls.cpp +++ b/lib/ARCMigrate/TransGCCalls.cpp @@ -9,6 +9,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Sema/SemaDiagnostic.h" using namespace clang; @@ -20,13 +21,12 @@ namespace { class GCCollectableCallsChecker : public RecursiveASTVisitor { MigrationContext &MigrateCtx; - ParentMap &PMap; IdentifierInfo *NSMakeCollectableII; IdentifierInfo *CFMakeCollectableII; public: - GCCollectableCallsChecker(MigrationContext &ctx, ParentMap &map) - : MigrateCtx(ctx), PMap(map) { + GCCollectableCallsChecker(MigrationContext &ctx) + : MigrateCtx(ctx) { IdentifierTable &Ids = MigrateCtx.Pass.Ctx.Idents; NSMakeCollectableII = &Ids.get("NSMakeCollectable"); CFMakeCollectableII = &Ids.get("CFMakeCollectable"); @@ -78,7 +78,6 @@ public: } // anonymous namespace void GCCollectableCallsTraverser::traverseBody(BodyContext &BodyCtx) { - GCCollectableCallsChecker(BodyCtx.getMigrationContext(), - BodyCtx.getParentMap()) + GCCollectableCallsChecker(BodyCtx.getMigrationContext()) .TraverseStmt(BodyCtx.getTopStmt()); } diff --git a/lib/ARCMigrate/TransProperties.cpp b/lib/ARCMigrate/TransProperties.cpp index cc85fe2..fdd6e88 100644 --- a/lib/ARCMigrate/TransProperties.cpp +++ b/lib/ARCMigrate/TransProperties.cpp @@ -309,17 +309,8 @@ private: if (RE->getDecl() != Ivar) return true; - if (ObjCMessageExpr * - ME = dyn_cast(E->getRHS()->IgnoreParenCasts())) - if (ME->getMethodFamily() == OMF_retain) + if (isPlusOneAssign(E)) return false; - - ImplicitCastExpr *implCE = dyn_cast(E->getRHS()); - while (implCE && implCE->getCastKind() == CK_BitCast) - implCE = dyn_cast(implCE->getSubExpr()); - - if (implCE && implCE->getCastKind() == CK_ARCConsumeObject) - return false; } return true; diff --git a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp index 11a6553..91d2b39 100644 --- a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp +++ b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp @@ -19,10 +19,11 @@ #include "Transforms.h" #include "Internals.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/ParentMap.h" -#include "clang/Lex/Lexer.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "clang/Sema/SemaDiagnostic.h" using namespace clang; using namespace arcmt; @@ -49,7 +50,7 @@ public: Pass.Ctx.Selectors.getNullarySelector(&Pass.Ctx.Idents.get("finalize")); } - void transformBody(Stmt *body) { + void transformBody(Stmt *body, Decl *ParentD) { Body = body; collectRemovables(body, Removables); StmtMap.reset(new ParentMap(body)); @@ -64,14 +65,16 @@ public: return true; case OMF_autorelease: if (isRemovable(E)) { - // An unused autorelease is badness. If we remove it the receiver - // will likely die immediately while previously it was kept alive - // by the autorelease pool. This is bad practice in general, leave it - // and emit an error to force the user to restructure his code. - Pass.TA.reportError("it is not safe to remove an unused 'autorelease' " - "message; its receiver may be destroyed immediately", - E->getLocStart(), E->getSourceRange()); - return true; + if (!isCommonUnusedAutorelease(E)) { + // An unused autorelease is badness. If we remove it the receiver + // will likely die immediately while previously it was kept alive + // by the autorelease pool. This is bad practice in general, leave it + // and emit an error to force the user to restructure his code. + Pass.TA.reportError("it is not safe to remove an unused 'autorelease' " + "message; its receiver may be destroyed immediately", + E->getLocStart(), E->getSourceRange()); + return true; + } } // Pass through. case OMF_retain: @@ -156,6 +159,80 @@ public: } private: + /// \brief Checks for idioms where an unused -autorelease is common. + /// + /// Currently only returns true for this idiom which is common in property + /// setters: + /// + /// [backingValue autorelease]; + /// backingValue = [newValue retain]; // in general a +1 assign + /// + bool isCommonUnusedAutorelease(ObjCMessageExpr *E) { + Expr *Rec = E->getInstanceReceiver(); + if (!Rec) + return false; + + Decl *RefD = getReferencedDecl(Rec); + if (!RefD) + return false; + + Stmt *OuterS = E, *InnerS; + do { + InnerS = OuterS; + OuterS = StmtMap->getParent(InnerS); + } + while (OuterS && (isa(OuterS) || + isa(OuterS) || + isa(OuterS))); + + if (!OuterS) + return false; + + // Find next statement after the -autorelease. + + Stmt::child_iterator currChildS = OuterS->child_begin(); + Stmt::child_iterator childE = OuterS->child_end(); + for (; currChildS != childE; ++currChildS) { + if (*currChildS == InnerS) + break; + } + if (currChildS == childE) + return false; + ++currChildS; + if (currChildS == childE) + return false; + + Stmt *nextStmt = *currChildS; + if (!nextStmt) + return false; + nextStmt = nextStmt->IgnoreImplicit(); + + // Check for "RefD = [+1 retained object];". + + if (BinaryOperator *Bop = dyn_cast(nextStmt)) { + if (RefD != getReferencedDecl(Bop->getLHS())) + return false; + if (isPlusOneAssign(Bop)) + return true; + } + return false; + } + + Decl *getReferencedDecl(Expr *E) { + if (!E) + return 0; + + E = E->IgnoreParenCasts(); + if (DeclRefExpr *DRE = dyn_cast(E)) + return DRE->getDecl(); + if (MemberExpr *ME = dyn_cast(E)) + return ME->getMemberDecl(); + if (ObjCIvarRefExpr *IRE = dyn_cast(E)) + return IRE->getDecl(); + + return 0; + } + /// \brief Check if the retain/release is due to a GCD/XPC macro that are /// defined as: /// diff --git a/lib/ARCMigrate/TransUnbridgedCasts.cpp b/lib/ARCMigrate/TransUnbridgedCasts.cpp index 48437c7..ac18b5d 100644 --- a/lib/ARCMigrate/TransUnbridgedCasts.cpp +++ b/lib/ARCMigrate/TransUnbridgedCasts.cpp @@ -12,7 +12,7 @@ // A cast of non-objc pointer to an objc one is checked. If the non-objc pointer // is from a file-level variable, __bridge cast is used to convert it. // For the result of a function call that we know is +1/+0, -// __bridge/__bridge_transfer is used. +// __bridge/CFBridgingRelease is used. // // NSString *str = (NSString *)kUTTypePlainText; // str = b ? kUTTypeRTF : kUTTypePlainText; @@ -21,8 +21,8 @@ // ----> // NSString *str = (__bridge NSString *)kUTTypePlainText; // str = (__bridge NSString *)(b ? kUTTypeRTF : kUTTypePlainText); -// NSString *_uuidString = (__bridge_transfer NSString *) -// CFUUIDCreateString(kCFAllocatorDefault, _uuid); +// NSString *_uuidString = (NSString *) +// CFBridgingRelease(CFUUIDCreateString(kCFAllocatorDefault, _uuid)); // // For a C pointer to ObjC, for casting 'self', __bridge is used. // @@ -35,9 +35,11 @@ #include "Transforms.h" #include "Internals.h" #include "clang/Analysis/DomainSpecific/CocoaConventions.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/ParentMap.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/SmallString.h" using namespace clang; @@ -50,13 +52,15 @@ class UnbridgedCastRewriter : public RecursiveASTVisitor{ MigrationPass &Pass; IdentifierInfo *SelfII; OwningPtr StmtMap; + Decl *ParentD; public: - UnbridgedCastRewriter(MigrationPass &pass) : Pass(pass) { + UnbridgedCastRewriter(MigrationPass &pass) : Pass(pass), ParentD(0) { SelfII = &Pass.Ctx.Idents.get("self"); } - void transformBody(Stmt *body) { + void transformBody(Stmt *body, Decl *ParentD) { + this->ParentD = ParentD; StmtMap.reset(new ParentMap(body)); TraverseStmt(body); } @@ -155,6 +159,21 @@ private: } } } + + // If returning an ivar or a member of an ivar from a +0 method, use + // a __bridge cast. + Expr *base = inner->IgnoreParenImpCasts(); + while (isa(base)) + base = cast(base)->getBase()->IgnoreParenImpCasts(); + if (isa(base) && + isa(StmtMap->getParentIgnoreParenCasts(E))) { + if (ObjCMethodDecl *method = dyn_cast_or_null(ParentD)) { + if (!method->hasAttr()) { + castToObjCObject(E, /*retained=*/false); + return; + } + } + } } void castToObjCObject(CastExpr *E, bool retained) { @@ -191,22 +210,48 @@ private: TA.clearDiagnostic(diag::err_arc_mismatched_cast, diag::err_arc_cast_requires_bridge, E->getLocStart()); - if (CStyleCastExpr *CCE = dyn_cast(E)) { - TA.insertAfterToken(CCE->getLParenLoc(), bridge); - } else { - SourceLocation insertLoc = E->getSubExpr()->getLocStart(); - SmallString<128> newCast; - newCast += '('; - newCast += bridge; - newCast += E->getType().getAsString(Pass.Ctx.getPrintingPolicy()); - newCast += ')'; - - if (isa(E->getSubExpr())) { - TA.insert(insertLoc, newCast.str()); + if (Kind == OBC_Bridge || !Pass.CFBridgingFunctionsDefined()) { + if (CStyleCastExpr *CCE = dyn_cast(E)) { + TA.insertAfterToken(CCE->getLParenLoc(), bridge); } else { + SourceLocation insertLoc = E->getSubExpr()->getLocStart(); + SmallString<128> newCast; newCast += '('; - TA.insert(insertLoc, newCast.str()); - TA.insertAfterToken(E->getLocEnd(), ")"); + newCast += bridge; + newCast += E->getType().getAsString(Pass.Ctx.getPrintingPolicy()); + newCast += ')'; + + if (isa(E->getSubExpr())) { + TA.insert(insertLoc, newCast.str()); + } else { + newCast += '('; + TA.insert(insertLoc, newCast.str()); + TA.insertAfterToken(E->getLocEnd(), ")"); + } + } + } else { + assert(Kind == OBC_BridgeTransfer || Kind == OBC_BridgeRetained); + SmallString<32> BridgeCall; + + Expr *WrapE = E->getSubExpr(); + SourceLocation InsertLoc = WrapE->getLocStart(); + + SourceManager &SM = Pass.Ctx.getSourceManager(); + char PrevChar = *SM.getCharacterData(InsertLoc.getLocWithOffset(-1)); + if (Lexer::isIdentifierBodyChar(PrevChar, Pass.Ctx.getLangOpts())) + BridgeCall += ' '; + + if (Kind == OBC_BridgeTransfer) + BridgeCall += "CFBridgingRelease"; + else + BridgeCall += "CFBridgingRetain"; + + if (isa(WrapE)) { + TA.insert(InsertLoc, BridgeCall); + } else { + BridgeCall += '('; + TA.insert(InsertLoc, BridgeCall); + TA.insertAfterToken(WrapE->getLocEnd(), ")"); } } } diff --git a/lib/ARCMigrate/TransUnusedInitDelegate.cpp b/lib/ARCMigrate/TransUnusedInitDelegate.cpp index 60ed32a..3057e39 100644 --- a/lib/ARCMigrate/TransUnusedInitDelegate.cpp +++ b/lib/ARCMigrate/TransUnusedInitDelegate.cpp @@ -22,6 +22,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/Sema/SemaDiagnostic.h" using namespace clang; @@ -40,7 +41,7 @@ public: UnusedInitRewriter(MigrationPass &pass) : Body(0), Pass(pass) { } - void transformBody(Stmt *body) { + void transformBody(Stmt *body, Decl *ParentD) { Body = body; collectRemovables(body, Removables); TraverseStmt(body); diff --git a/lib/ARCMigrate/TransZeroOutPropsInDealloc.cpp b/lib/ARCMigrate/TransZeroOutPropsInDealloc.cpp index d1f08aa..a07596d 100644 --- a/lib/ARCMigrate/TransZeroOutPropsInDealloc.cpp +++ b/lib/ARCMigrate/TransZeroOutPropsInDealloc.cpp @@ -15,6 +15,7 @@ #include "Transforms.h" #include "Internals.h" +#include "clang/AST/ASTContext.h" using namespace clang; using namespace arcmt; diff --git a/lib/ARCMigrate/TransformActions.cpp b/lib/ARCMigrate/TransformActions.cpp index 0ecfeb5..783db1c 100644 --- a/lib/ARCMigrate/TransformActions.cpp +++ b/lib/ARCMigrate/TransformActions.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Internals.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/Lex/Preprocessor.h" #include "clang/Basic/SourceManager.h" diff --git a/lib/ARCMigrate/Transforms.cpp b/lib/ARCMigrate/Transforms.cpp index d342d1a..1175c36 100644 --- a/lib/ARCMigrate/Transforms.cpp +++ b/lib/ARCMigrate/Transforms.cpp @@ -9,11 +9,14 @@ #include "Transforms.h" #include "Internals.h" -#include "clang/Sema/SemaDiagnostic.h" +#include "clang/Analysis/DomainSpecific/CocoaConventions.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" -#include "clang/Lex/Lexer.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "clang/Sema/Sema.h" +#include "clang/Sema/SemaDiagnostic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/DenseSet.h" #include @@ -24,6 +27,13 @@ using namespace trans; ASTTraverser::~ASTTraverser() { } +bool MigrationPass::CFBridgingFunctionsDefined() { + if (!EnableCFBridgeFns.hasValue()) + EnableCFBridgeFns = SemaRef.isKnownName("CFBridgingRetain") && + SemaRef.isKnownName("CFBridgingRelease"); + return *EnableCFBridgeFns; +} + //===----------------------------------------------------------------------===// // Helpers. //===----------------------------------------------------------------------===// @@ -56,6 +66,47 @@ bool trans::canApplyWeak(ASTContext &Ctx, QualType type, return true; } +bool trans::isPlusOneAssign(const BinaryOperator *E) { + if (E->getOpcode() != BO_Assign) + return false; + + if (const ObjCMessageExpr * + ME = dyn_cast(E->getRHS()->IgnoreParenCasts())) + if (ME->getMethodFamily() == OMF_retain) + return true; + + if (const CallExpr * + callE = dyn_cast(E->getRHS()->IgnoreParenCasts())) { + if (const FunctionDecl *FD = callE->getDirectCallee()) { + if (FD->getAttr()) + return true; + + if (FD->isGlobal() && + FD->getIdentifier() && + FD->getParent()->isTranslationUnit() && + FD->getLinkage() == ExternalLinkage && + ento::cocoa::isRefType(callE->getType(), "CF", + FD->getIdentifier()->getName())) { + StringRef fname = FD->getIdentifier()->getName(); + if (fname.endswith("Retain") || + fname.find("Create") != StringRef::npos || + fname.find("Copy") != StringRef::npos) { + return true; + } + } + } + } + + const ImplicitCastExpr *implCE = dyn_cast(E->getRHS()); + while (implCE && implCE->getCastKind() == CK_BitCast) + implCE = dyn_cast(implCE->getSubExpr()); + + if (implCE && implCE->getCastKind() == CK_ARCConsumeObject) + return true; + + return false; +} + /// \brief 'Loc' is the end of a statement range. This returns the location /// immediately after the semicolon following the statement. /// If no semicolon is found or the location is inside a macro, the returned @@ -472,8 +523,8 @@ static void GCRewriteFinalize(MigrationPass &pass) { for (impl_iterator I = impl_iterator(DC->decls_begin()), E = impl_iterator(DC->decls_end()); I != E; ++I) { for (ObjCImplementationDecl::instmeth_iterator - MI = (*I)->instmeth_begin(), - ME = (*I)->instmeth_end(); MI != ME; ++MI) { + MI = I->instmeth_begin(), + ME = I->instmeth_end(); MI != ME; ++MI) { ObjCMethodDecl *MD = *MI; if (!MD->hasBody()) continue; diff --git a/lib/ARCMigrate/Transforms.h b/lib/ARCMigrate/Transforms.h index 445c3e5..5d4ac94 100644 --- a/lib/ARCMigrate/Transforms.h +++ b/lib/ARCMigrate/Transforms.h @@ -13,6 +13,7 @@ #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/ParentMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Support/SaveAndRestore.h" namespace clang { class Decl; @@ -154,6 +155,8 @@ public: bool canApplyWeak(ASTContext &Ctx, QualType type, bool AllowOnUnknownClass = false); +bool isPlusOneAssign(const BinaryOperator *E); + /// \brief 'Loc' is the end of a statement range. This returns the location /// immediately after the semicolon following the statement. /// If no semicolon is found or the location is inside a macro, the returned @@ -174,15 +177,22 @@ StringRef getNilString(ASTContext &Ctx); template class BodyTransform : public RecursiveASTVisitor > { MigrationPass &Pass; + Decl *ParentD; + typedef RecursiveASTVisitor > base; public: - BodyTransform(MigrationPass &pass) : Pass(pass) { } + BodyTransform(MigrationPass &pass) : Pass(pass), ParentD(0) { } bool TraverseStmt(Stmt *rootS) { if (rootS) - BODY_TRANS(Pass).transformBody(rootS); + BODY_TRANS(Pass).transformBody(rootS, ParentD); return true; } + + bool TraverseObjCMethodDecl(ObjCMethodDecl *D) { + SaveAndRestore SetParent(ParentD, D); + return base::TraverseObjCMethodDecl(D); + } }; typedef llvm::DenseSet ExprSet; diff --git a/lib/AST/APValue.cpp b/lib/AST/APValue.cpp index a31b3c5..a74ef14 100644 --- a/lib/AST/APValue.cpp +++ b/lib/AST/APValue.cpp @@ -467,9 +467,9 @@ void APValue::printPretty(raw_ostream &Out, ASTContext &Ctx, QualType Ty) const{ FI != RD->field_end(); ++FI) { if (!First) Out << ", "; - if ((*FI)->isUnnamedBitfield()) continue; - getStructField((*FI)->getFieldIndex()). - printPretty(Out, Ctx, (*FI)->getType()); + if (FI->isUnnamedBitfield()) continue; + getStructField(FI->getFieldIndex()). + printPretty(Out, Ctx, FI->getType()); First = false; } Out << '}'; diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index cb4d336..ad48dff 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -13,6 +13,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" +#include "clang/AST/CommentCommandTraits.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -53,6 +54,218 @@ enum FloatingRank { HalfRank, FloatRank, DoubleRank, LongDoubleRank }; +RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { + if (!CommentsLoaded && ExternalSource) { + ExternalSource->ReadComments(); + CommentsLoaded = true; + } + + assert(D); + + // User can not attach documentation to implicit declarations. + if (D->isImplicit()) + return NULL; + + // TODO: handle comments for function parameters properly. + if (isa(D)) + return NULL; + + // TODO: we could look up template parameter documentation in the template + // documentation. + if (isa(D) || + isa(D) || + isa(D)) + return NULL; + + ArrayRef RawComments = Comments.getComments(); + + // If there are no comments anywhere, we won't find anything. + if (RawComments.empty()) + return NULL; + + // Find declaration location. + // For Objective-C declarations we generally don't expect to have multiple + // declarators, thus use declaration starting location as the "declaration + // location". + // For all other declarations multiple declarators are used quite frequently, + // so we use the location of the identifier as the "declaration location". + SourceLocation DeclLoc; + if (isa(D) || isa(D) || + isa(D) || + isa(D) || + isa(D)) + DeclLoc = D->getLocStart(); + else + DeclLoc = D->getLocation(); + + // If the declaration doesn't map directly to a location in a file, we + // can't find the comment. + if (DeclLoc.isInvalid() || !DeclLoc.isFileID()) + return NULL; + + // Find the comment that occurs just after this declaration. + ArrayRef::iterator Comment; + { + // When searching for comments during parsing, the comment we are looking + // for is usually among the last two comments we parsed -- check them + // first. + RawComment CommentAtDeclLoc(SourceMgr, SourceRange(DeclLoc)); + BeforeThanCompare Compare(SourceMgr); + ArrayRef::iterator MaybeBeforeDecl = RawComments.end() - 1; + bool Found = Compare(*MaybeBeforeDecl, &CommentAtDeclLoc); + if (!Found && RawComments.size() >= 2) { + MaybeBeforeDecl--; + Found = Compare(*MaybeBeforeDecl, &CommentAtDeclLoc); + } + + if (Found) { + Comment = MaybeBeforeDecl + 1; + assert(Comment == std::lower_bound(RawComments.begin(), RawComments.end(), + &CommentAtDeclLoc, Compare)); + } else { + // Slow path. + Comment = std::lower_bound(RawComments.begin(), RawComments.end(), + &CommentAtDeclLoc, Compare); + } + } + + // Decompose the location for the declaration and find the beginning of the + // file buffer. + std::pair DeclLocDecomp = SourceMgr.getDecomposedLoc(DeclLoc); + + // First check whether we have a trailing comment. + if (Comment != RawComments.end() && + (*Comment)->isDocumentation() && (*Comment)->isTrailingComment() && + (isa(D) || isa(D) || isa(D))) { + std::pair CommentBeginDecomp + = SourceMgr.getDecomposedLoc((*Comment)->getSourceRange().getBegin()); + // Check that Doxygen trailing comment comes after the declaration, starts + // on the same line and in the same file as the declaration. + if (DeclLocDecomp.first == CommentBeginDecomp.first && + SourceMgr.getLineNumber(DeclLocDecomp.first, DeclLocDecomp.second) + == SourceMgr.getLineNumber(CommentBeginDecomp.first, + CommentBeginDecomp.second)) { + (*Comment)->setDecl(D); + return *Comment; + } + } + + // The comment just after the declaration was not a trailing comment. + // Let's look at the previous comment. + if (Comment == RawComments.begin()) + return NULL; + --Comment; + + // Check that we actually have a non-member Doxygen comment. + if (!(*Comment)->isDocumentation() || (*Comment)->isTrailingComment()) + return NULL; + + // Decompose the end of the comment. + std::pair CommentEndDecomp + = SourceMgr.getDecomposedLoc((*Comment)->getSourceRange().getEnd()); + + // If the comment and the declaration aren't in the same file, then they + // aren't related. + if (DeclLocDecomp.first != CommentEndDecomp.first) + return NULL; + + // Get the corresponding buffer. + bool Invalid = false; + const char *Buffer = SourceMgr.getBufferData(DeclLocDecomp.first, + &Invalid).data(); + if (Invalid) + return NULL; + + // Extract text between the comment and declaration. + StringRef Text(Buffer + CommentEndDecomp.second, + DeclLocDecomp.second - CommentEndDecomp.second); + + // There should be no other declarations or preprocessor directives between + // comment and declaration. + if (Text.find_first_of(",;{}#@") != StringRef::npos) + return NULL; + + (*Comment)->setDecl(D); + return *Comment; +} + +const RawComment *ASTContext::getRawCommentForAnyRedecl(const Decl *D) const { + // If we have a 'templated' declaration for a template, adjust 'D' to + // refer to the actual template. + if (const FunctionDecl *FD = dyn_cast(D)) { + if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate()) + D = FTD; + } else if (const CXXRecordDecl *RD = dyn_cast(D)) { + if (const ClassTemplateDecl *CTD = RD->getDescribedClassTemplate()) + D = CTD; + } + // FIXME: Alias templates? + + // Check whether we have cached a comment for this declaration already. + { + llvm::DenseMap::iterator Pos = + RedeclComments.find(D); + if (Pos != RedeclComments.end()) { + const RawCommentAndCacheFlags &Raw = Pos->second; + if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) + return Raw.getRaw(); + } + } + + // Search for comments attached to declarations in the redeclaration chain. + const RawComment *RC = NULL; + for (Decl::redecl_iterator I = D->redecls_begin(), + E = D->redecls_end(); + I != E; ++I) { + llvm::DenseMap::iterator Pos = + RedeclComments.find(*I); + if (Pos != RedeclComments.end()) { + const RawCommentAndCacheFlags &Raw = Pos->second; + if (Raw.getKind() != RawCommentAndCacheFlags::NoCommentInDecl) { + RC = Raw.getRaw(); + break; + } + } else { + RC = getRawCommentForDeclNoCache(*I); + RawCommentAndCacheFlags Raw; + if (RC) { + Raw.setRaw(RC); + Raw.setKind(RawCommentAndCacheFlags::FromDecl); + } else + Raw.setKind(RawCommentAndCacheFlags::NoCommentInDecl); + RedeclComments[*I] = Raw; + if (RC) + break; + } + } + + // If we found a comment, it should be a documentation comment. + assert(!RC || RC->isDocumentation()); + + // Update cache for every declaration in the redeclaration chain. + RawCommentAndCacheFlags Raw; + Raw.setRaw(RC); + Raw.setKind(RawCommentAndCacheFlags::FromRedecl); + + for (Decl::redecl_iterator I = D->redecls_begin(), + E = D->redecls_end(); + I != E; ++I) { + RawCommentAndCacheFlags &R = RedeclComments[*I]; + if (R.getKind() == RawCommentAndCacheFlags::NoCommentInDecl) + R = Raw; + } + + return RC; +} + +comments::FullComment *ASTContext::getCommentForDecl(const Decl *D) const { + const RawComment *RC = getRawCommentForAnyRedecl(D); + if (!RC) + return NULL; + + return RC->getParsed(*this); +} + void ASTContext::CanonicalTemplateTemplateParm::Profile(llvm::FoldingSetNodeID &ID, TemplateTemplateParmDecl *Parm) { @@ -206,7 +419,10 @@ static const LangAS::Map *getAddressSpaceMap(const TargetInfo &T, static const unsigned FakeAddrSpaceMap[] = { 1, // opencl_global 2, // opencl_local - 3 // opencl_constant + 3, // opencl_constant + 4, // cuda_device + 5, // cuda_constant + 6 // cuda_shared }; return &FakeAddrSpaceMap; } else { @@ -226,6 +442,7 @@ ASTContext::ASTContext(LangOptions& LOpts, SourceManager &SM, SubstTemplateTemplateParmPacks(this_()), GlobalNestedNameSpecifier(0), Int128Decl(0), UInt128Decl(0), + BuiltinVaListDecl(0), ObjCIdDecl(0), ObjCSelDecl(0), ObjCClassDecl(0), ObjCProtocolClassDecl(0), CFConstantStringTypeDecl(0), ObjCInstanceTypeDecl(0), FILEDecl(0), @@ -240,6 +457,7 @@ ASTContext::ASTContext(LangOptions& LOpts, SourceManager &SM, BuiltinInfo(builtins), DeclarationNames(*this), ExternalSource(0), Listener(0), + Comments(SM), CommentsLoaded(false), LastSDM(0, 0), UniqueBlockByRefTypeID(0) { @@ -436,6 +654,8 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) { } else // C99 WCharTy = getFromTargetType(Target.getWCharType()); + WIntTy = getFromTargetType(Target.getWIntType()); + if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++ InitBuiltinType(Char16Ty, BuiltinType::Char16); else // C99 @@ -473,8 +693,6 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) { DoubleComplexTy = getComplexType(DoubleTy); LongDoubleComplexTy = getComplexType(LongDoubleTy); - BuiltinVaListType = QualType(); - // Builtin types for 'id', 'Class', and 'SEL'. InitBuiltinType(ObjCBuiltinIdTy, BuiltinType::ObjCId); InitBuiltinType(ObjCBuiltinClassTy, BuiltinType::ObjCClass); @@ -494,6 +712,9 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) { // half type (OpenCL 6.1.1.1) / ARM NEON __fp16 InitBuiltinType(HalfTy, BuiltinType::Half); + + // Builtin type used to help define __builtin_va_list. + VaListTagTy = QualType(); } DiagnosticsEngine &ASTContext::getDiagnostics() const { @@ -881,6 +1102,10 @@ ASTContext::getTypeInfoImpl(const Type *T) const { Align = llvm::NextPowerOf2(Align); Width = llvm::RoundUpToAlignment(Width, Align); } + // Adjust the alignment based on the target max. + uint64_t TargetVectorAlign = Target->getMaxVectorAlign(); + if (TargetVectorAlign && TargetVectorAlign < Align) + Align = TargetVectorAlign; break; } @@ -1337,14 +1562,6 @@ void ASTContext::setBlockVarCopyInits(VarDecl*VD, Expr* Init) { BlockVarCopyInits[VD] = Init; } -/// \brief Allocate an uninitialized TypeSourceInfo. -/// -/// The caller should initialize the memory held by TypeSourceInfo using -/// the TypeLoc wrappers. -/// -/// \param T the type that will be the basis for type source info. This type -/// should refer to how the declarator was written in source code, not to -/// what type semantic analysis resolved the declarator to. TypeSourceInfo *ASTContext::CreateTypeSourceInfo(QualType T, unsigned DataSize) const { if (!DataSize) @@ -2187,15 +2404,18 @@ ASTContext::getFunctionType(QualType ResultTy, // - exception types // - consumed-arguments flags // Instead of the exception types, there could be a noexcept - // expression. + // expression, or information used to resolve the exception + // specification. size_t Size = sizeof(FunctionProtoType) + NumArgs * sizeof(QualType); - if (EPI.ExceptionSpecType == EST_Dynamic) + if (EPI.ExceptionSpecType == EST_Dynamic) { Size += EPI.NumExceptions * sizeof(QualType); - else if (EPI.ExceptionSpecType == EST_ComputedNoexcept) { + } else if (EPI.ExceptionSpecType == EST_ComputedNoexcept) { Size += sizeof(Expr*); } else if (EPI.ExceptionSpecType == EST_Uninstantiated) { Size += 2 * sizeof(FunctionDecl*); + } else if (EPI.ExceptionSpecType == EST_Unevaluated) { + Size += sizeof(FunctionDecl*); } if (EPI.ConsumedArguments) Size += NumArgs * sizeof(bool); @@ -2730,10 +2950,17 @@ QualType ASTContext::getPackExpansionType(QualType Pattern, QualType Canon; if (!Pattern.isCanonical()) { - Canon = getPackExpansionType(getCanonicalType(Pattern), NumExpansions); - - // Find the insert position again. - PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos); + Canon = getCanonicalType(Pattern); + // The canonical type might not contain an unexpanded parameter pack, if it + // contains an alias template specialization which ignores one of its + // parameters. + if (Canon->containsUnexpandedParameterPack()) { + Canon = getPackExpansionType(getCanonicalType(Pattern), NumExpansions); + + // Find the insert position again, in case we inserted an element into + // PackExpansionTypes and invalidated our insert position. + PackExpansionTypes.FindNodeOrInsertPos(ID, InsertPos); + } } T = new (*this) PackExpansionType(Pattern, Canon, NumExpansions); @@ -2950,7 +3177,7 @@ QualType ASTContext::getDecltypeType(Expr *e, QualType UnderlyingType) const { if (Canon) { // We already have a "canonical" version of an equivalent, dependent // decltype type. Use that as our canonical type. - dt = new (*this, TypeAlignment) DecltypeType(e, DependentTy, + dt = new (*this, TypeAlignment) DecltypeType(e, UnderlyingType, QualType((DecltypeType*)Canon, 0)); } else { // Build a new, canonical typeof(expr) type. @@ -3331,8 +3558,7 @@ ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const { Arg.getNumTemplateExpansions()); case TemplateArgument::Integral: - return TemplateArgument(*Arg.getAsIntegral(), - getCanonicalType(Arg.getIntegralType())); + return TemplateArgument(Arg, getCanonicalType(Arg.getIntegralType())); case TemplateArgument::Type: return TemplateArgument(getCanonicalType(Arg.getAsType())); @@ -3471,7 +3697,7 @@ const ArrayType *ASTContext::getAsArrayType(QualType T) const { VAT->getBracketsRange())); } -QualType ASTContext::getAdjustedParameterType(QualType T) { +QualType ASTContext::getAdjustedParameterType(QualType T) const { // C99 6.7.5.3p7: // A declaration of a parameter as "array of type" shall be // adjusted to "qualified pointer to type", where the type @@ -3490,7 +3716,7 @@ QualType ASTContext::getAdjustedParameterType(QualType T) { return T; } -QualType ASTContext::getSignatureParameterType(QualType T) { +QualType ASTContext::getSignatureParameterType(QualType T) const { T = getVariableArrayDecayedType(T); T = getAdjustedParameterType(T); return T.getUnqualifiedType(); @@ -3809,7 +4035,7 @@ QualType ASTContext::getCFConstantStringType() const { FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); CFConstantStringTypeDecl->addDecl(Field); } @@ -3853,7 +4079,7 @@ QualType ASTContext::getBlockDescriptorType() const { FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); T->addDecl(Field); } @@ -3896,7 +4122,7 @@ QualType ASTContext::getBlockDescriptorExtendedType() const { FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); T->addDecl(Field); } @@ -3972,7 +4198,7 @@ ASTContext::BuildByRefType(StringRef DeclName, QualType Ty) const { &Idents.get(FieldNames[i]), FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); T->addDecl(Field); } @@ -4045,6 +4271,8 @@ std::string ASTContext::getObjCEncodingForBlock(const BlockExpr *Expr) const { E = Decl->param_end(); PI != E; ++PI) { QualType PType = (*PI)->getType(); CharUnits sz = getObjCEncodingTypeSize(PType); + if (sz.isZero()) + continue; assert (sz.isPositive() && "BlockExpr - Incomplete param type"); ParmOffset += sz; } @@ -4086,8 +4314,8 @@ bool ASTContext::getObjCEncodingForFunctionDecl(const FunctionDecl *Decl, QualType PType = (*PI)->getType(); CharUnits sz = getObjCEncodingTypeSize(PType); if (sz.isZero()) - return true; - + continue; + assert (sz.isPositive() && "getObjCEncodingForFunctionDecl - Incomplete param type"); ParmOffset += sz; @@ -4155,8 +4383,8 @@ bool ASTContext::getObjCEncodingForMethodDecl(const ObjCMethodDecl *Decl, QualType PType = (*PI)->getType(); CharUnits sz = getObjCEncodingTypeSize(PType); if (sz.isZero()) - return true; - + continue; + assert (sz.isPositive() && "getObjCEncodingForMethodDecl - Incomplete param type"); ParmOffset += sz; @@ -4387,7 +4615,7 @@ static void EncodeBitField(const ASTContext *Ctx, std::string& S, // information is not especially sensible, but we're stuck with it for // compatibility with GCC, although providing it breaks anything that // actually uses runtime introspection and wants to work on both runtimes... - if (!Ctx->getLangOpts().NeXTRuntime) { + if (Ctx->getLangOpts().ObjCRuntime.isGNUFamily()) { const RecordDecl *RD = FD->getParent(); const ASTRecordLayout &RL = Ctx->getASTRecordLayout(RD); S += llvm::utostr(RL.getFieldOffset(FD->getFieldIndex())); @@ -4563,7 +4791,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S, // Special case bit-fields. if (Field->isBitField()) { getObjCEncodingForTypeImpl(Field->getType(), S, false, true, - (*Field)); + *Field); } else { QualType qt = Field->getType(); getLegacyIntegralTypeEncoding(qt); @@ -4746,7 +4974,7 @@ void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl, CXXRecordDecl *base = BI->getType()->getAsCXXRecordDecl(); if (base->isEmpty()) continue; - uint64_t offs = layout.getBaseClassOffsetInBits(base); + uint64_t offs = toBits(layout.getBaseClassOffset(base)); FieldOrBaseOffsets.insert(FieldOrBaseOffsets.upper_bound(offs), std::make_pair(offs, base)); } @@ -4769,7 +4997,7 @@ void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl, CXXRecordDecl *base = BI->getType()->getAsCXXRecordDecl(); if (base->isEmpty()) continue; - uint64_t offs = layout.getVBaseClassOffsetInBits(base); + uint64_t offs = toBits(layout.getVBaseClassOffset(base)); if (FieldOrBaseOffsets.find(offs) == FieldOrBaseOffsets.end()) FieldOrBaseOffsets.insert(FieldOrBaseOffsets.end(), std::make_pair(offs, base)); @@ -4787,11 +5015,8 @@ void ASTContext::getObjCEncodingForStructureImpl(RecordDecl *RDecl, std::multimap::iterator CurLayObj = FieldOrBaseOffsets.begin(); - if ((CurLayObj != FieldOrBaseOffsets.end() && CurLayObj->first != 0) || - (CurLayObj == FieldOrBaseOffsets.end() && - CXXRec && CXXRec->isDynamicClass())) { - assert(CXXRec && CXXRec->isDynamicClass() && - "Offset 0 was empty but no VTable ?"); + if (CXXRec && CXXRec->isDynamicClass() && + (CurLayObj == FieldOrBaseOffsets.end() || CurLayObj->first != 0)) { if (FD) { S += "\"_vptr$"; std::string recname = CXXRec->getNameAsString(); @@ -4877,12 +5102,6 @@ void ASTContext::getObjCEncodingForTypeQualifier(Decl::ObjCDeclQualifier QT, S += 'V'; } -void ASTContext::setBuiltinVaListType(QualType T) { - assert(BuiltinVaListType.isNull() && "__builtin_va_list type already set!"); - - BuiltinVaListType = T; -} - TypedefDecl *ASTContext::getObjCIdDecl() const { if (!ObjCIdDecl) { QualType T = getObjCObjectType(ObjCBuiltinIdTy, 0, 0); @@ -4936,6 +5155,241 @@ ObjCInterfaceDecl *ASTContext::getObjCProtocolDecl() const { return ObjCProtocolClassDecl; } +//===----------------------------------------------------------------------===// +// __builtin_va_list Construction Functions +//===----------------------------------------------------------------------===// + +static TypedefDecl *CreateCharPtrBuiltinVaListDecl(const ASTContext *Context) { + // typedef char* __builtin_va_list; + QualType CharPtrType = Context->getPointerType(Context->CharTy); + TypeSourceInfo *TInfo + = Context->getTrivialTypeSourceInfo(CharPtrType); + + TypedefDecl *VaListTypeDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + TInfo); + return VaListTypeDecl; +} + +static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) { + // typedef void* __builtin_va_list; + QualType VoidPtrType = Context->getPointerType(Context->VoidTy); + TypeSourceInfo *TInfo + = Context->getTrivialTypeSourceInfo(VoidPtrType); + + TypedefDecl *VaListTypeDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + TInfo); + return VaListTypeDecl; +} + +static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) { + // typedef struct __va_list_tag { + RecordDecl *VaListTagDecl; + + VaListTagDecl = CreateRecordDecl(*Context, TTK_Struct, + Context->getTranslationUnitDecl(), + &Context->Idents.get("__va_list_tag")); + VaListTagDecl->startDefinition(); + + const size_t NumFields = 5; + QualType FieldTypes[NumFields]; + const char *FieldNames[NumFields]; + + // unsigned char gpr; + FieldTypes[0] = Context->UnsignedCharTy; + FieldNames[0] = "gpr"; + + // unsigned char fpr; + FieldTypes[1] = Context->UnsignedCharTy; + FieldNames[1] = "fpr"; + + // unsigned short reserved; + FieldTypes[2] = Context->UnsignedShortTy; + FieldNames[2] = "reserved"; + + // void* overflow_arg_area; + FieldTypes[3] = Context->getPointerType(Context->VoidTy); + FieldNames[3] = "overflow_arg_area"; + + // void* reg_save_area; + FieldTypes[4] = Context->getPointerType(Context->VoidTy); + FieldNames[4] = "reg_save_area"; + + // Create fields + for (unsigned i = 0; i < NumFields; ++i) { + FieldDecl *Field = FieldDecl::Create(*Context, VaListTagDecl, + SourceLocation(), + SourceLocation(), + &Context->Idents.get(FieldNames[i]), + FieldTypes[i], /*TInfo=*/0, + /*BitWidth=*/0, + /*Mutable=*/false, + ICIS_NoInit); + Field->setAccess(AS_public); + VaListTagDecl->addDecl(Field); + } + VaListTagDecl->completeDefinition(); + QualType VaListTagType = Context->getRecordType(VaListTagDecl); + Context->VaListTagTy = VaListTagType; + + // } __va_list_tag; + TypedefDecl *VaListTagTypedefDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__va_list_tag"), + Context->getTrivialTypeSourceInfo(VaListTagType)); + QualType VaListTagTypedefType = + Context->getTypedefType(VaListTagTypedefDecl); + + // typedef __va_list_tag __builtin_va_list[1]; + llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1); + QualType VaListTagArrayType + = Context->getConstantArrayType(VaListTagTypedefType, + Size, ArrayType::Normal, 0); + TypeSourceInfo *TInfo + = Context->getTrivialTypeSourceInfo(VaListTagArrayType); + TypedefDecl *VaListTypedefDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + TInfo); + + return VaListTypedefDecl; +} + +static TypedefDecl * +CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) { + // typedef struct __va_list_tag { + RecordDecl *VaListTagDecl; + VaListTagDecl = CreateRecordDecl(*Context, TTK_Struct, + Context->getTranslationUnitDecl(), + &Context->Idents.get("__va_list_tag")); + VaListTagDecl->startDefinition(); + + const size_t NumFields = 4; + QualType FieldTypes[NumFields]; + const char *FieldNames[NumFields]; + + // unsigned gp_offset; + FieldTypes[0] = Context->UnsignedIntTy; + FieldNames[0] = "gp_offset"; + + // unsigned fp_offset; + FieldTypes[1] = Context->UnsignedIntTy; + FieldNames[1] = "fp_offset"; + + // void* overflow_arg_area; + FieldTypes[2] = Context->getPointerType(Context->VoidTy); + FieldNames[2] = "overflow_arg_area"; + + // void* reg_save_area; + FieldTypes[3] = Context->getPointerType(Context->VoidTy); + FieldNames[3] = "reg_save_area"; + + // Create fields + for (unsigned i = 0; i < NumFields; ++i) { + FieldDecl *Field = FieldDecl::Create(const_cast(*Context), + VaListTagDecl, + SourceLocation(), + SourceLocation(), + &Context->Idents.get(FieldNames[i]), + FieldTypes[i], /*TInfo=*/0, + /*BitWidth=*/0, + /*Mutable=*/false, + ICIS_NoInit); + Field->setAccess(AS_public); + VaListTagDecl->addDecl(Field); + } + VaListTagDecl->completeDefinition(); + QualType VaListTagType = Context->getRecordType(VaListTagDecl); + Context->VaListTagTy = VaListTagType; + + // } __va_list_tag; + TypedefDecl *VaListTagTypedefDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__va_list_tag"), + Context->getTrivialTypeSourceInfo(VaListTagType)); + QualType VaListTagTypedefType = + Context->getTypedefType(VaListTagTypedefDecl); + + // typedef __va_list_tag __builtin_va_list[1]; + llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1); + QualType VaListTagArrayType + = Context->getConstantArrayType(VaListTagTypedefType, + Size, ArrayType::Normal,0); + TypeSourceInfo *TInfo + = Context->getTrivialTypeSourceInfo(VaListTagArrayType); + TypedefDecl *VaListTypedefDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + TInfo); + + return VaListTypedefDecl; +} + +static TypedefDecl *CreatePNaClABIBuiltinVaListDecl(const ASTContext *Context) { + // typedef int __builtin_va_list[4]; + llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 4); + QualType IntArrayType + = Context->getConstantArrayType(Context->IntTy, + Size, ArrayType::Normal, 0); + TypedefDecl *VaListTypedefDecl + = TypedefDecl::Create(const_cast(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + Context->getTrivialTypeSourceInfo(IntArrayType)); + + return VaListTypedefDecl; +} + +static TypedefDecl *CreateVaListDecl(const ASTContext *Context, + TargetInfo::BuiltinVaListKind Kind) { + switch (Kind) { + case TargetInfo::CharPtrBuiltinVaList: + return CreateCharPtrBuiltinVaListDecl(Context); + case TargetInfo::VoidPtrBuiltinVaList: + return CreateVoidPtrBuiltinVaListDecl(Context); + case TargetInfo::PowerABIBuiltinVaList: + return CreatePowerABIBuiltinVaListDecl(Context); + case TargetInfo::X86_64ABIBuiltinVaList: + return CreateX86_64ABIBuiltinVaListDecl(Context); + case TargetInfo::PNaClABIBuiltinVaList: + return CreatePNaClABIBuiltinVaListDecl(Context); + } + + llvm_unreachable("Unhandled __builtin_va_list type kind"); +} + +TypedefDecl *ASTContext::getBuiltinVaListDecl() const { + if (!BuiltinVaListDecl) + BuiltinVaListDecl = CreateVaListDecl(this, Target->getBuiltinVaListKind()); + + return BuiltinVaListDecl; +} + +QualType ASTContext::getVaListTagType() const { + // Force the creation of VaListTagTy by building the __builtin_va_list + // declaration. + if (VaListTagTy.isNull()) + (void) getBuiltinVaListDecl(); + + return VaListTagTy; +} + void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) { assert(ObjCConstantStringType.isNull() && "'NSConstantString' type already set!"); @@ -6412,6 +6866,19 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, VectorType::GenericVector); break; } + case 'E': { + char *End; + + unsigned NumElements = strtoul(Str, &End, 10); + assert(End != Str && "Missing vector size"); + + Str = End; + + QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE, + false); + Type = Context.getExtVectorType(ElementType, NumElements); + break; + } case 'X': { QualType ElementType = DecodeTypeFromStr(Str, Context, Error, RequiresICE, false); @@ -6712,9 +7179,15 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) { return true; } -CallingConv ASTContext::getDefaultMethodCallConv() { +CallingConv ASTContext::getDefaultCXXMethodCallConv(bool isVariadic) { // Pass through to the C++ ABI object - return ABI->getDefaultMethodCallConv(); + return ABI->getDefaultMethodCallConv(isVariadic); +} + +CallingConv ASTContext::getCanonicalCallConv(CallingConv CC) const { + if (CC == CC_C && !LangOpts.MRTD && getTargetInfo().getCXXABI() != CXXABI_Microsoft) + return CC_Default; + return CC; } bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const { diff --git a/lib/AST/ASTDiagnostic.cpp b/lib/AST/ASTDiagnostic.cpp index ca4fe26..35fcd41 100644 --- a/lib/AST/ASTDiagnostic.cpp +++ b/lib/AST/ASTDiagnostic.cpp @@ -14,7 +14,11 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/TemplateBase.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/DeclTemplate.h" #include "clang/AST/Type.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -225,6 +229,11 @@ ConvertTypeToDiagnosticString(ASTContext &Context, QualType Ty, return S; } +static bool FormatTemplateTypeDiff(ASTContext &Context, QualType FromType, + QualType ToType, bool PrintTree, + bool PrintFromType, bool ElideType, + bool ShowColors, std::string &S); + void clang::FormatASTNodeDiagnosticArgument( DiagnosticsEngine::ArgumentKind Kind, intptr_t Val, @@ -244,6 +253,33 @@ void clang::FormatASTNodeDiagnosticArgument( switch (Kind) { default: llvm_unreachable("unknown ArgumentKind"); + case DiagnosticsEngine::ak_qualtype_pair: { + TemplateDiffTypes &TDT = *reinterpret_cast(Val); + QualType FromType = + QualType::getFromOpaquePtr(reinterpret_cast(TDT.FromType)); + QualType ToType = + QualType::getFromOpaquePtr(reinterpret_cast(TDT.ToType)); + + if (FormatTemplateTypeDiff(Context, FromType, ToType, TDT.PrintTree, + TDT.PrintFromType, TDT.ElideType, + TDT.ShowColors, S)) { + NeedQuotes = !TDT.PrintTree; + TDT.TemplateDiffUsed = true; + break; + } + + // Don't fall-back during tree printing. The caller will handle + // this case. + if (TDT.PrintTree) + return; + + // Attempting to do a templete diff on non-templates. Set the variables + // and continue with regular type printing of the appropriate type. + Val = TDT.PrintFromType ? TDT.FromType : TDT.ToType; + ModLen = 0; + ArgLen = 0; + // Fall through + } case DiagnosticsEngine::ak_qualtype: { assert(ModLen == 0 && ArgLen == 0 && "Invalid modifier for QualType argument"); @@ -329,3 +365,901 @@ void clang::FormatASTNodeDiagnosticArgument( if (NeedQuotes) Output.push_back('\''); } + +/// TemplateDiff - A class that constructs a pretty string for a pair of +/// QualTypes. For the pair of types, a diff tree will be created containing +/// all the information about the templates and template arguments. Afterwards, +/// the tree is transformed to a string according to the options passed in. +namespace { +class TemplateDiff { + /// Context - The ASTContext which is used for comparing template arguments. + ASTContext &Context; + + /// Policy - Used during expression printing. + PrintingPolicy Policy; + + /// ElideType - Option to elide identical types. + bool ElideType; + + /// PrintTree - Format output string as a tree. + bool PrintTree; + + /// ShowColor - Diagnostics support color, so bolding will be used. + bool ShowColor; + + /// FromType - When single type printing is selected, this is the type to be + /// be printed. When tree printing is selected, this type will show up first + /// in the tree. + QualType FromType; + + /// ToType - The type that FromType is compared to. Only in tree printing + /// will this type be outputed. + QualType ToType; + + /// Str - Storage for the output stream. + llvm::SmallString<128> Str; + + /// OS - The stream used to construct the output strings. + llvm::raw_svector_ostream OS; + + /// IsBold - Keeps track of the bold formatting for the output string. + bool IsBold; + + /// DiffTree - A tree representation the differences between two types. + class DiffTree { + /// DiffNode - The root node stores the original type. Each child node + /// stores template arguments of their parents. For templated types, the + /// template decl is also stored. + struct DiffNode { + /// NextNode - The index of the next sibling node or 0. + unsigned NextNode; + + /// ChildNode - The index of the first child node or 0. + unsigned ChildNode; + + /// ParentNode - The index of the parent node. + unsigned ParentNode; + + /// FromType, ToType - The type arguments. + QualType FromType, ToType; + + /// FromExpr, ToExpr - The expression arguments. + Expr *FromExpr, *ToExpr; + + /// FromTD, ToTD - The template decl for template template + /// arguments or the type arguments that are templates. + TemplateDecl *FromTD, *ToTD; + + /// FromDefault, ToDefault - Whether the argument is a default argument. + bool FromDefault, ToDefault; + + /// Same - Whether the two arguments evaluate to the same value. + bool Same; + + DiffNode(unsigned ParentNode = 0) + : NextNode(0), ChildNode(0), ParentNode(ParentNode), + FromType(), ToType(), FromExpr(0), ToExpr(0), FromTD(0), ToTD(0), + FromDefault(false), ToDefault(false), Same(false) { } + }; + + /// FlatTree - A flattened tree used to store the DiffNodes. + llvm::SmallVector FlatTree; + + /// CurrentNode - The index of the current node being used. + unsigned CurrentNode; + + /// NextFreeNode - The index of the next unused node. Used when creating + /// child nodes. + unsigned NextFreeNode; + + /// ReadNode - The index of the current node being read. + unsigned ReadNode; + + public: + DiffTree() : + CurrentNode(0), NextFreeNode(1) { + FlatTree.push_back(DiffNode()); + } + + // Node writing functions. + /// SetNode - Sets FromTD and ToTD of the current node. + void SetNode(TemplateDecl *FromTD, TemplateDecl *ToTD) { + FlatTree[CurrentNode].FromTD = FromTD; + FlatTree[CurrentNode].ToTD = ToTD; + } + + /// SetNode - Sets FromType and ToType of the current node. + void SetNode(QualType FromType, QualType ToType) { + FlatTree[CurrentNode].FromType = FromType; + FlatTree[CurrentNode].ToType = ToType; + } + + /// SetNode - Set FromExpr and ToExpr of the current node. + void SetNode(Expr *FromExpr, Expr *ToExpr) { + FlatTree[CurrentNode].FromExpr = FromExpr; + FlatTree[CurrentNode].ToExpr = ToExpr; + } + + /// SetSame - Sets the same flag of the current node. + void SetSame(bool Same) { + FlatTree[CurrentNode].Same = Same; + } + + /// SetDefault - Sets FromDefault and ToDefault flags of the current node. + void SetDefault(bool FromDefault, bool ToDefault) { + FlatTree[CurrentNode].FromDefault = FromDefault; + FlatTree[CurrentNode].ToDefault = ToDefault; + } + + /// Up - Changes the node to the parent of the current node. + void Up() { + CurrentNode = FlatTree[CurrentNode].ParentNode; + } + + /// AddNode - Adds a child node to the current node, then sets that node + /// node as the current node. + void AddNode() { + FlatTree.push_back(DiffNode(CurrentNode)); + DiffNode &Node = FlatTree[CurrentNode]; + if (Node.ChildNode == 0) { + // If a child node doesn't exist, add one. + Node.ChildNode = NextFreeNode; + } else { + // If a child node exists, find the last child node and add a + // next node to it. + unsigned i; + for (i = Node.ChildNode; FlatTree[i].NextNode != 0; + i = FlatTree[i].NextNode) { + } + FlatTree[i].NextNode = NextFreeNode; + } + CurrentNode = NextFreeNode; + ++NextFreeNode; + } + + // Node reading functions. + /// StartTraverse - Prepares the tree for recursive traversal. + void StartTraverse() { + ReadNode = 0; + CurrentNode = NextFreeNode; + NextFreeNode = 0; + } + + /// Parent - Move the current read node to its parent. + void Parent() { + ReadNode = FlatTree[ReadNode].ParentNode; + } + + /// NodeIsTemplate - Returns true if a template decl is set, and types are + /// set. + bool NodeIsTemplate() { + return (FlatTree[ReadNode].FromTD && + !FlatTree[ReadNode].ToType.isNull()) || + (FlatTree[ReadNode].ToTD && !FlatTree[ReadNode].ToType.isNull()); + } + + /// NodeIsQualType - Returns true if a Qualtype is set. + bool NodeIsQualType() { + return !FlatTree[ReadNode].FromType.isNull() || + !FlatTree[ReadNode].ToType.isNull(); + } + + /// NodeIsExpr - Returns true if an expr is set. + bool NodeIsExpr() { + return FlatTree[ReadNode].FromExpr || FlatTree[ReadNode].ToExpr; + } + + /// NodeIsTemplateTemplate - Returns true if the argument is a template + /// template type. + bool NodeIsTemplateTemplate() { + return FlatTree[ReadNode].FromType.isNull() && + FlatTree[ReadNode].ToType.isNull() && + (FlatTree[ReadNode].FromTD || FlatTree[ReadNode].ToTD); + } + + /// GetNode - Gets the FromType and ToType. + void GetNode(QualType &FromType, QualType &ToType) { + FromType = FlatTree[ReadNode].FromType; + ToType = FlatTree[ReadNode].ToType; + } + + /// GetNode - Gets the FromExpr and ToExpr. + void GetNode(Expr *&FromExpr, Expr *&ToExpr) { + FromExpr = FlatTree[ReadNode].FromExpr; + ToExpr = FlatTree[ReadNode].ToExpr; + } + + /// GetNode - Gets the FromTD and ToTD. + void GetNode(TemplateDecl *&FromTD, TemplateDecl *&ToTD) { + FromTD = FlatTree[ReadNode].FromTD; + ToTD = FlatTree[ReadNode].ToTD; + } + + /// NodeIsSame - Returns true the arguments are the same. + bool NodeIsSame() { + return FlatTree[ReadNode].Same; + } + + /// HasChildrend - Returns true if the node has children. + bool HasChildren() { + return FlatTree[ReadNode].ChildNode != 0; + } + + /// MoveToChild - Moves from the current node to its child. + void MoveToChild() { + ReadNode = FlatTree[ReadNode].ChildNode; + } + + /// AdvanceSibling - If there is a next sibling, advance to it and return + /// true. Otherwise, return false. + bool AdvanceSibling() { + if (FlatTree[ReadNode].NextNode == 0) + return false; + + ReadNode = FlatTree[ReadNode].NextNode; + return true; + } + + /// HasNextSibling - Return true if the node has a next sibling. + bool HasNextSibling() { + return FlatTree[ReadNode].NextNode != 0; + } + + /// FromDefault - Return true if the from argument is the default. + bool FromDefault() { + return FlatTree[ReadNode].FromDefault; + } + + /// ToDefault - Return true if the to argument is the default. + bool ToDefault() { + return FlatTree[ReadNode].ToDefault; + } + + /// Empty - Returns true if the tree has no information. + bool Empty() { + return !FlatTree[0].FromTD && !FlatTree[0].ToTD && + !FlatTree[0].FromExpr && !FlatTree[0].ToExpr && + FlatTree[0].FromType.isNull() && FlatTree[0].ToType.isNull(); + } + }; + + DiffTree Tree; + + /// TSTiterator - an iterator that is used to enter a + /// TemplateSpecializationType and read TemplateArguments inside template + /// parameter packs in order with the rest of the TemplateArguments. + struct TSTiterator { + typedef const TemplateArgument& reference; + typedef const TemplateArgument* pointer; + + /// TST - the template specialization whose arguments this iterator + /// traverse over. + const TemplateSpecializationType *TST; + + /// Index - the index of the template argument in TST. + unsigned Index; + + /// CurrentTA - if CurrentTA is not the same as EndTA, then CurrentTA + /// points to a TemplateArgument within a parameter pack. + TemplateArgument::pack_iterator CurrentTA; + + /// EndTA - the end iterator of a parameter pack + TemplateArgument::pack_iterator EndTA; + + /// TSTiterator - Constructs an iterator and sets it to the first template + /// argument. + TSTiterator(const TemplateSpecializationType *TST) + : TST(TST), Index(0), CurrentTA(0), EndTA(0) { + if (isEnd()) return; + + // Set to first template argument. If not a parameter pack, done. + TemplateArgument TA = TST->getArg(0); + if (TA.getKind() != TemplateArgument::Pack) return; + + // Start looking into the parameter pack. + CurrentTA = TA.pack_begin(); + EndTA = TA.pack_end(); + + // Found a valid template argument. + if (CurrentTA != EndTA) return; + + // Parameter pack is empty, use the increment to get to a valid + // template argument. + ++(*this); + } + + /// isEnd - Returns true if the iterator is one past the end. + bool isEnd() const { + return Index == TST->getNumArgs(); + } + + /// &operator++ - Increment the iterator to the next template argument. + TSTiterator &operator++() { + assert(!isEnd() && "Iterator incremented past end of arguments."); + + // If in a parameter pack, advance in the parameter pack. + if (CurrentTA != EndTA) { + ++CurrentTA; + if (CurrentTA != EndTA) + return *this; + } + + // Loop until a template argument is found, or the end is reached. + while (true) { + // Advance to the next template argument. Break if reached the end. + if (++Index == TST->getNumArgs()) break; + + // If the TemplateArgument is not a parameter pack, done. + TemplateArgument TA = TST->getArg(Index); + if (TA.getKind() != TemplateArgument::Pack) break; + + // Handle parameter packs. + CurrentTA = TA.pack_begin(); + EndTA = TA.pack_end(); + + // If the parameter pack is empty, try to advance again. + if (CurrentTA != EndTA) break; + } + return *this; + } + + /// operator* - Returns the appropriate TemplateArgument. + reference operator*() const { + assert(!isEnd() && "Index exceeds number of arguments."); + if (CurrentTA == EndTA) + return TST->getArg(Index); + else + return *CurrentTA; + } + + /// operator-> - Allow access to the underlying TemplateArgument. + pointer operator->() const { + return &operator*(); + } + }; + + // These functions build up the template diff tree, including functions to + // retrieve and compare template arguments. + + static const TemplateSpecializationType * GetTemplateSpecializationType( + ASTContext &Context, QualType Ty) { + if (const TemplateSpecializationType *TST = + Ty->getAs()) + return TST; + + const RecordType *RT = Ty->getAs(); + + if (!RT) + return 0; + + const ClassTemplateSpecializationDecl *CTSD = + dyn_cast(RT->getDecl()); + + if (!CTSD) + return 0; + + Ty = Context.getTemplateSpecializationType( + TemplateName(CTSD->getSpecializedTemplate()), + CTSD->getTemplateArgs().data(), + CTSD->getTemplateArgs().size(), + Ty.getCanonicalType()); + + return Ty->getAs(); + } + + /// DiffTemplate - recursively visits template arguments and stores the + /// argument info into a tree. + void DiffTemplate(const TemplateSpecializationType *FromTST, + const TemplateSpecializationType *ToTST) { + // Begin descent into diffing template tree. + TemplateParameterList *Params = + FromTST->getTemplateName().getAsTemplateDecl()->getTemplateParameters(); + unsigned TotalArgs = 0; + for (TSTiterator FromIter(FromTST), ToIter(ToTST); + !FromIter.isEnd() || !ToIter.isEnd(); ++TotalArgs) { + Tree.AddNode(); + + // Get the parameter at index TotalArgs. If index is larger + // than the total number of parameters, then there is an + // argument pack, so re-use the last parameter. + NamedDecl *ParamND = Params->getParam( + (TotalArgs < Params->size()) ? TotalArgs + : Params->size() - 1); + // Handle Types + if (TemplateTypeParmDecl *DefaultTTPD = + dyn_cast(ParamND)) { + QualType FromType, ToType; + GetType(FromIter, DefaultTTPD, FromType); + GetType(ToIter, DefaultTTPD, ToType); + Tree.SetNode(FromType, ToType); + Tree.SetDefault(FromIter.isEnd() && !FromType.isNull(), + ToIter.isEnd() && !ToType.isNull()); + if (!FromType.isNull() && !ToType.isNull()) { + if (Context.hasSameType(FromType, ToType)) { + Tree.SetSame(true); + } else { + const TemplateSpecializationType *FromArgTST = + GetTemplateSpecializationType(Context, FromType); + const TemplateSpecializationType *ToArgTST = + GetTemplateSpecializationType(Context, ToType); + + if (FromArgTST && ToArgTST) { + bool SameTemplate = hasSameTemplate(FromArgTST, ToArgTST); + if (SameTemplate) { + Tree.SetNode(FromArgTST->getTemplateName().getAsTemplateDecl(), + ToArgTST->getTemplateName().getAsTemplateDecl()); + DiffTemplate(FromArgTST, ToArgTST); + } + } + } + } + } + + // Handle Expressions + if (NonTypeTemplateParmDecl *DefaultNTTPD = + dyn_cast(ParamND)) { + Expr *FromExpr, *ToExpr; + GetExpr(FromIter, DefaultNTTPD, FromExpr); + GetExpr(ToIter, DefaultNTTPD, ToExpr); + Tree.SetNode(FromExpr, ToExpr); + Tree.SetSame(IsEqualExpr(Context, FromExpr, ToExpr)); + Tree.SetDefault(FromIter.isEnd() && FromExpr, + ToIter.isEnd() && ToExpr); + } + + // Handle Templates + if (TemplateTemplateParmDecl *DefaultTTPD = + dyn_cast(ParamND)) { + TemplateDecl *FromDecl, *ToDecl; + GetTemplateDecl(FromIter, DefaultTTPD, FromDecl); + GetTemplateDecl(ToIter, DefaultTTPD, ToDecl); + Tree.SetNode(FromDecl, ToDecl); + Tree.SetSame(FromDecl && ToDecl && + FromDecl->getIdentifier() == ToDecl->getIdentifier()); + } + + if (!FromIter.isEnd()) ++FromIter; + if (!ToIter.isEnd()) ++ToIter; + Tree.Up(); + } + } + + /// hasSameTemplate - Returns true if both types are specialized from the + /// same template declaration. If they come from different template aliases, + /// do a parallel ascension search to determine the highest template alias in + /// common and set the arguments to them. + static bool hasSameTemplate(const TemplateSpecializationType *&FromTST, + const TemplateSpecializationType *&ToTST) { + // Check the top templates if they are the same. + if (FromTST->getTemplateName().getAsTemplateDecl()->getIdentifier() == + ToTST->getTemplateName().getAsTemplateDecl()->getIdentifier()) + return true; + + // Create vectors of template aliases. + SmallVector FromTemplateList, + ToTemplateList; + + const TemplateSpecializationType *TempToTST = ToTST, *TempFromTST = FromTST; + FromTemplateList.push_back(FromTST); + ToTemplateList.push_back(ToTST); + + // Dump every template alias into the vectors. + while (TempFromTST->isTypeAlias()) { + TempFromTST = + TempFromTST->getAliasedType()->getAs(); + if (!TempFromTST) + break; + FromTemplateList.push_back(TempFromTST); + } + while (TempToTST->isTypeAlias()) { + TempToTST = + TempToTST->getAliasedType()->getAs(); + if (!TempToTST) + break; + ToTemplateList.push_back(TempToTST); + } + + SmallVector::reverse_iterator + FromIter = FromTemplateList.rbegin(), FromEnd = FromTemplateList.rend(), + ToIter = ToTemplateList.rbegin(), ToEnd = ToTemplateList.rend(); + + // Check if the lowest template types are the same. If not, return. + if ((*FromIter)->getTemplateName().getAsTemplateDecl()->getIdentifier() != + (*ToIter)->getTemplateName().getAsTemplateDecl()->getIdentifier()) + return false; + + // Begin searching up the template aliases. The bottom most template + // matches so move up until one pair does not match. Use the template + // right before that one. + for (; FromIter != FromEnd && ToIter != ToEnd; ++FromIter, ++ToIter) { + if ((*FromIter)->getTemplateName().getAsTemplateDecl()->getIdentifier() != + (*ToIter)->getTemplateName().getAsTemplateDecl()->getIdentifier()) + break; + } + + FromTST = FromIter[-1]; + ToTST = ToIter[-1]; + + return true; + } + + /// GetType - Retrieves the template type arguments, including default + /// arguments. + void GetType(const TSTiterator &Iter, TemplateTypeParmDecl *DefaultTTPD, + QualType &ArgType) { + ArgType = QualType(); + bool isVariadic = DefaultTTPD->isParameterPack(); + + if (!Iter.isEnd()) + ArgType = Iter->getAsType(); + else if (!isVariadic) + ArgType = DefaultTTPD->getDefaultArgument(); + } + + /// GetExpr - Retrieves the template expression argument, including default + /// arguments. + void GetExpr(const TSTiterator &Iter, NonTypeTemplateParmDecl *DefaultNTTPD, + Expr *&ArgExpr) { + ArgExpr = 0; + bool isVariadic = DefaultNTTPD->isParameterPack(); + + if (!Iter.isEnd()) + ArgExpr = Iter->getAsExpr(); + else if (!isVariadic) + ArgExpr = DefaultNTTPD->getDefaultArgument(); + + if (ArgExpr) + while (SubstNonTypeTemplateParmExpr *SNTTPE = + dyn_cast(ArgExpr)) + ArgExpr = SNTTPE->getReplacement(); + } + + /// GetTemplateDecl - Retrieves the template template arguments, including + /// default arguments. + void GetTemplateDecl(const TSTiterator &Iter, + TemplateTemplateParmDecl *DefaultTTPD, + TemplateDecl *&ArgDecl) { + ArgDecl = 0; + bool isVariadic = DefaultTTPD->isParameterPack(); + + TemplateArgument TA = DefaultTTPD->getDefaultArgument().getArgument(); + TemplateDecl *DefaultTD = TA.getAsTemplate().getAsTemplateDecl(); + + if (!Iter.isEnd()) + ArgDecl = Iter->getAsTemplate().getAsTemplateDecl(); + else if (!isVariadic) + ArgDecl = DefaultTD; + } + + /// IsEqualExpr - Returns true if the expressions evaluate to the same value. + static bool IsEqualExpr(ASTContext &Context, Expr *FromExpr, Expr *ToExpr) { + if (FromExpr == ToExpr) + return true; + + if (!FromExpr || !ToExpr) + return false; + + FromExpr = FromExpr->IgnoreParens(); + ToExpr = ToExpr->IgnoreParens(); + + DeclRefExpr *FromDRE = dyn_cast(FromExpr), + *ToDRE = dyn_cast(ToExpr); + + if (FromDRE || ToDRE) { + if (!FromDRE || !ToDRE) + return false; + return FromDRE->getDecl() == ToDRE->getDecl(); + } + + Expr::EvalResult FromResult, ToResult; + if (!FromExpr->EvaluateAsRValue(FromResult, Context) || + !ToExpr->EvaluateAsRValue(ToResult, Context)) + assert(0 && "Template arguments must be known at compile time."); + + APValue &FromVal = FromResult.Val; + APValue &ToVal = ToResult.Val; + + if (FromVal.getKind() != ToVal.getKind()) return false; + + switch (FromVal.getKind()) { + case APValue::Int: + return FromVal.getInt() == ToVal.getInt(); + case APValue::LValue: { + APValue::LValueBase FromBase = FromVal.getLValueBase(); + APValue::LValueBase ToBase = ToVal.getLValueBase(); + if (FromBase.isNull() && ToBase.isNull()) + return true; + if (FromBase.isNull() || ToBase.isNull()) + return false; + return FromBase.get() == + ToBase.get(); + } + case APValue::MemberPointer: + return FromVal.getMemberPointerDecl() == ToVal.getMemberPointerDecl(); + default: + llvm_unreachable("Unknown template argument expression."); + } + } + + // These functions converts the tree representation of the template + // differences into the internal character vector. + + /// TreeToString - Converts the Tree object into a character stream which + /// will later be turned into the output string. + void TreeToString(int Indent = 1) { + if (PrintTree) { + OS << '\n'; + for (int i = 0; i < Indent; ++i) + OS << " "; + ++Indent; + } + + // Handle cases where the difference is not templates with different + // arguments. + if (!Tree.NodeIsTemplate()) { + if (Tree.NodeIsQualType()) { + QualType FromType, ToType; + Tree.GetNode(FromType, ToType); + PrintTypeNames(FromType, ToType, Tree.FromDefault(), Tree.ToDefault(), + Tree.NodeIsSame()); + return; + } + if (Tree.NodeIsExpr()) { + Expr *FromExpr, *ToExpr; + Tree.GetNode(FromExpr, ToExpr); + PrintExpr(FromExpr, ToExpr, Tree.FromDefault(), Tree.ToDefault(), + Tree.NodeIsSame()); + return; + } + if (Tree.NodeIsTemplateTemplate()) { + TemplateDecl *FromTD, *ToTD; + Tree.GetNode(FromTD, ToTD); + PrintTemplateTemplate(FromTD, ToTD, Tree.FromDefault(), + Tree.ToDefault(), Tree.NodeIsSame()); + return; + } + llvm_unreachable("Unable to deduce template difference."); + } + + // Node is root of template. Recurse on children. + TemplateDecl *FromTD, *ToTD; + Tree.GetNode(FromTD, ToTD); + + assert(Tree.HasChildren() && "Template difference not found in diff tree."); + + OS << FromTD->getNameAsString() << '<'; + Tree.MoveToChild(); + unsigned NumElideArgs = 0; + do { + if (ElideType) { + if (Tree.NodeIsSame()) { + ++NumElideArgs; + continue; + } + if (NumElideArgs > 0) { + PrintElideArgs(NumElideArgs, Indent); + NumElideArgs = 0; + OS << ", "; + } + } + TreeToString(Indent); + if (Tree.HasNextSibling()) + OS << ", "; + } while (Tree.AdvanceSibling()); + if (NumElideArgs > 0) + PrintElideArgs(NumElideArgs, Indent); + + Tree.Parent(); + OS << ">"; + } + + // To signal to the text printer that a certain text needs to be bolded, + // a special character is injected into the character stream which the + // text printer will later strip out. + + /// Bold - Start bolding text. + void Bold() { + assert(!IsBold && "Attempting to bold text that is already bold."); + IsBold = true; + if (ShowColor) + OS << ToggleHighlight; + } + + /// Unbold - Stop bolding text. + void Unbold() { + assert(IsBold && "Attempting to remove bold from unbold text."); + IsBold = false; + if (ShowColor) + OS << ToggleHighlight; + } + + // Functions to print out the arguments and highlighting the difference. + + /// PrintTypeNames - prints the typenames, bolding differences. Will detect + /// typenames that are the same and attempt to disambiguate them by using + /// canonical typenames. + void PrintTypeNames(QualType FromType, QualType ToType, + bool FromDefault, bool ToDefault, bool Same) { + assert((!FromType.isNull() || !ToType.isNull()) && + "Only one template argument may be missing."); + + if (Same) { + OS << FromType.getAsString(); + return; + } + + std::string FromTypeStr = FromType.isNull() ? "(no argument)" + : FromType.getAsString(); + std::string ToTypeStr = ToType.isNull() ? "(no argument)" + : ToType.getAsString(); + // Switch to canonical typename if it is better. + // TODO: merge this with other aka printing above. + if (FromTypeStr == ToTypeStr) { + std::string FromCanTypeStr = FromType.getCanonicalType().getAsString(); + std::string ToCanTypeStr = ToType.getCanonicalType().getAsString(); + if (FromCanTypeStr != ToCanTypeStr) { + FromTypeStr = FromCanTypeStr; + ToTypeStr = ToCanTypeStr; + } + } + + if (PrintTree) OS << '['; + OS << (FromDefault ? "(default) " : ""); + Bold(); + OS << FromTypeStr; + Unbold(); + if (PrintTree) { + OS << " != " << (ToDefault ? "(default) " : ""); + Bold(); + OS << ToTypeStr; + Unbold(); + OS << "]"; + } + return; + } + + /// PrintExpr - Prints out the expr template arguments, highlighting argument + /// differences. + void PrintExpr(const Expr *FromExpr, const Expr *ToExpr, + bool FromDefault, bool ToDefault, bool Same) { + assert((FromExpr || ToExpr) && + "Only one template argument may be missing."); + if (Same) { + PrintExpr(FromExpr); + } else if (!PrintTree) { + OS << (FromDefault ? "(default) " : ""); + Bold(); + PrintExpr(FromExpr); + Unbold(); + } else { + OS << (FromDefault ? "[(default) " : "["); + Bold(); + PrintExpr(FromExpr); + Unbold(); + OS << " != " << (ToDefault ? "(default) " : ""); + Bold(); + PrintExpr(ToExpr); + Unbold(); + OS << ']'; + } + } + + /// PrintExpr - Actual formatting and printing of expressions. + void PrintExpr(const Expr *E) { + if (!E) + OS << "(no argument)"; + else + E->printPretty(OS, Context, 0, Policy); return; + } + + /// PrintTemplateTemplate - Handles printing of template template arguments, + /// highlighting argument differences. + void PrintTemplateTemplate(TemplateDecl *FromTD, TemplateDecl *ToTD, + bool FromDefault, bool ToDefault, bool Same) { + assert((FromTD || ToTD) && "Only one template argument may be missing."); + if (Same) { + OS << "template " << FromTD->getNameAsString(); + } else if (!PrintTree) { + OS << (FromDefault ? "(default) template " : "template "); + Bold(); + OS << (FromTD ? FromTD->getNameAsString() : "(no argument)"); + Unbold(); + } else { + OS << (FromDefault ? "[(default) template " : "[template "); + Bold(); + OS << (FromTD ? FromTD->getNameAsString() : "(no argument)"); + Unbold(); + OS << " != " << (ToDefault ? "(default) template " : "template "); + Bold(); + OS << (ToTD ? ToTD->getNameAsString() : "(no argument)"); + Unbold(); + OS << ']'; + } + } + + // Prints the appropriate placeholder for elided template arguments. + void PrintElideArgs(unsigned NumElideArgs, unsigned Indent) { + if (PrintTree) { + OS << '\n'; + for (unsigned i = 0; i < Indent; ++i) + OS << " "; + } + if (NumElideArgs == 0) return; + if (NumElideArgs == 1) + OS << "[...]"; + else + OS << "[" << NumElideArgs << " * ...]"; + } + +public: + + TemplateDiff(ASTContext &Context, QualType FromType, QualType ToType, + bool PrintTree, bool PrintFromType, bool ElideType, + bool ShowColor) + : Context(Context), + Policy(Context.getLangOpts()), + ElideType(ElideType), + PrintTree(PrintTree), + ShowColor(ShowColor), + // When printing a single type, the FromType is the one printed. + FromType(PrintFromType ? FromType : ToType), + ToType(PrintFromType ? ToType : FromType), + OS(Str), + IsBold(false) { + } + + /// DiffTemplate - Start the template type diffing. + void DiffTemplate() { + const TemplateSpecializationType *FromOrigTST = + GetTemplateSpecializationType(Context, FromType); + const TemplateSpecializationType *ToOrigTST = + GetTemplateSpecializationType(Context, ToType); + + // Only checking templates. + if (!FromOrigTST || !ToOrigTST) + return; + + // Different base templates. + if (!hasSameTemplate(FromOrigTST, ToOrigTST)) { + return; + } + + Tree.SetNode(FromType, ToType); + + // Same base template, but different arguments. + Tree.SetNode(FromOrigTST->getTemplateName().getAsTemplateDecl(), + ToOrigTST->getTemplateName().getAsTemplateDecl()); + + DiffTemplate(FromOrigTST, ToOrigTST); + } + + /// MakeString - When the two types given are templated types with the same + /// base template, a string representation of the type difference will be + /// loaded into S and return true. Otherwise, return false. + bool MakeString(std::string &S) { + Tree.StartTraverse(); + if (Tree.Empty()) + return false; + + TreeToString(); + assert(!IsBold && "Bold is applied to end of string."); + S = OS.str(); + return true; + } +}; // end class TemplateDiff +} // end namespace + +/// FormatTemplateTypeDiff - A helper static function to start the template +/// diff and return the properly formatted string. Returns true if the diff +/// is successful. +static bool FormatTemplateTypeDiff(ASTContext &Context, QualType FromType, + QualType ToType, bool PrintTree, + bool PrintFromType, bool ElideType, + bool ShowColors, std::string &S) { + if (PrintTree) + PrintFromType = true; + TemplateDiff TD(Context, FromType, ToType, PrintTree, PrintFromType, + ElideType, ShowColors); + TD.DiffTemplate(); + return TD.MakeString(S); +} diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp index 3879907..3e952ac 100644 --- a/lib/AST/ASTImporter.cpp +++ b/lib/AST/ASTImporter.cpp @@ -119,7 +119,8 @@ namespace clang { bool ImportTemplateArguments(const TemplateArgument *FromArgs, unsigned NumFromArgs, SmallVectorImpl &ToArgs); - bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord); + bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord, + bool Complain = true); bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord); bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To); Decl *VisitDecl(Decl *D); @@ -201,12 +202,16 @@ namespace { /// \brief Whether we're being strict about the spelling of types when /// unifying two types. bool StrictTypeSpelling; - + + /// \brief Whether to complain about failures. + bool Complain; + StructuralEquivalenceContext(ASTContext &C1, ASTContext &C2, llvm::DenseSet > &NonEquivalentDecls, - bool StrictTypeSpelling = false) + bool StrictTypeSpelling = false, + bool Complain = true) : C1(C1), C2(C2), NonEquivalentDecls(NonEquivalentDecls), - StrictTypeSpelling(StrictTypeSpelling) { } + StrictTypeSpelling(StrictTypeSpelling), Complain(Complain) { } /// \brief Determine whether the two declarations are structurally /// equivalent. @@ -223,10 +228,16 @@ namespace { public: DiagnosticBuilder Diag1(SourceLocation Loc, unsigned DiagID) { + if (!Complain) + return DiagnosticBuilder::getEmpty(); + return C1.getDiagnostics().Report(Loc, DiagID); } DiagnosticBuilder Diag2(SourceLocation Loc, unsigned DiagID) { + if (!Complain) + return DiagnosticBuilder::getEmpty(); + return C2.getDiagnostics().Report(Loc, DiagID); } }; @@ -237,45 +248,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, Decl *D1, Decl *D2); -/// \brief Determine if two APInts have the same value, after zero-extending -/// one of them (if needed!) to ensure that the bit-widths match. -static bool IsSameValue(const llvm::APInt &I1, const llvm::APInt &I2) { - if (I1.getBitWidth() == I2.getBitWidth()) - return I1 == I2; - - if (I1.getBitWidth() > I2.getBitWidth()) - return I1 == I2.zext(I1.getBitWidth()); - - return I1.zext(I2.getBitWidth()) == I2; -} - -/// \brief Determine if two APSInts have the same value, zero- or sign-extending -/// as needed. -static bool IsSameValue(const llvm::APSInt &I1, const llvm::APSInt &I2) { - if (I1.getBitWidth() == I2.getBitWidth() && I1.isSigned() == I2.isSigned()) - return I1 == I2; - - // Check for a bit-width mismatch. - if (I1.getBitWidth() > I2.getBitWidth()) - return IsSameValue(I1, I2.extend(I1.getBitWidth())); - else if (I2.getBitWidth() > I1.getBitWidth()) - return IsSameValue(I1.extend(I2.getBitWidth()), I2); - - // We have a signedness mismatch. Turn the signed value into an unsigned - // value. - if (I1.isSigned()) { - if (I1.isNegative()) - return false; - - return llvm::APSInt(I1, true) == I2; - } - - if (I2.isNegative()) - return false; - - return I1 == llvm::APSInt(I2, true); -} - /// \brief Determine structural equivalence of two expressions. static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, Expr *E1, Expr *E2) { @@ -322,7 +294,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, Arg2.getIntegralType())) return false; - return IsSameValue(*Arg1.getAsIntegral(), *Arg2.getAsIntegral()); + return llvm::APSInt::isSameValue(Arg1.getAsIntegral(), Arg2.getAsIntegral()); case TemplateArgument::Declaration: if (!Arg1.getAsDecl() || !Arg2.getAsDecl()) @@ -467,7 +439,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, case Type::ConstantArray: { const ConstantArrayType *Array1 = cast(T1); const ConstantArrayType *Array2 = cast(T2); - if (!IsSameValue(Array1->getSize(), Array2->getSize())) + if (!llvm::APInt::isSameValue(Array1->getSize(), Array2->getSize())) return false; if (!IsArrayStructurallyEquivalent(Context, Array1, Array2)) @@ -1002,9 +974,9 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, } // Check the fields for consistency. - CXXRecordDecl::field_iterator Field2 = D2->field_begin(), + RecordDecl::field_iterator Field2 = D2->field_begin(), Field2End = D2->field_end(); - for (CXXRecordDecl::field_iterator Field1 = D1->field_begin(), + for (RecordDecl::field_iterator Field1 = D1->field_begin(), Field1End = D1->field_end(); Field1 != Field1End; ++Field1, ++Field2) { @@ -1053,7 +1025,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, llvm::APSInt Val1 = EC1->getInitVal(); llvm::APSInt Val2 = EC2->getInitVal(); - if (!IsSameValue(Val1, Val2) || + if (!llvm::APSInt::isSameValue(Val1, Val2) || !IsStructurallyEquivalent(EC1->getIdentifier(), EC2->getIdentifier())) { Context.Diag2(D2->getLocation(), diag::warn_odr_tag_type_inconsistent) << Context.C2.getTypeDeclType(D2); @@ -1852,19 +1824,14 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To, ToData.HasPublicFields = FromData.HasPublicFields; ToData.HasMutableFields = FromData.HasMutableFields; ToData.HasOnlyCMembers = FromData.HasOnlyCMembers; + ToData.HasInClassInitializer = FromData.HasInClassInitializer; ToData.HasTrivialDefaultConstructor = FromData.HasTrivialDefaultConstructor; ToData.HasConstexprNonCopyMoveConstructor = FromData.HasConstexprNonCopyMoveConstructor; ToData.DefaultedDefaultConstructorIsConstexpr = FromData.DefaultedDefaultConstructorIsConstexpr; - ToData.DefaultedCopyConstructorIsConstexpr - = FromData.DefaultedCopyConstructorIsConstexpr; - ToData.DefaultedMoveConstructorIsConstexpr - = FromData.DefaultedMoveConstructorIsConstexpr; ToData.HasConstexprDefaultConstructor = FromData.HasConstexprDefaultConstructor; - ToData.HasConstexprCopyConstructor = FromData.HasConstexprCopyConstructor; - ToData.HasConstexprMoveConstructor = FromData.HasConstexprMoveConstructor; ToData.HasTrivialCopyConstructor = FromData.HasTrivialCopyConstructor; ToData.HasTrivialMoveConstructor = FromData.HasTrivialMoveConstructor; ToData.HasTrivialCopyAssignment = FromData.HasTrivialCopyAssignment; @@ -1991,7 +1958,7 @@ ASTNodeImporter::ImportTemplateArgument(const TemplateArgument &From) { QualType ToType = Importer.Import(From.getIntegralType()); if (ToType.isNull()) return TemplateArgument(); - return TemplateArgument(*From.getAsIntegral(), ToType); + return TemplateArgument(From, ToType); } case TemplateArgument::Declaration: @@ -2052,10 +2019,11 @@ bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs, } bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord, - RecordDecl *ToRecord) { + RecordDecl *ToRecord, bool Complain) { StructuralEquivalenceContext Ctx(Importer.getFromContext(), Importer.getToContext(), - Importer.getNonEquivalentDecls()); + Importer.getNonEquivalentDecls(), + false, Complain); return Ctx.IsStructurallyEquivalent(FromRecord, ToRecord); } @@ -2335,7 +2303,7 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { // We may already have a record of the same name; try to find and match it. RecordDecl *AdoptDecl = 0; - if (!DC->isFunctionOrMethod() && SearchName) { + if (!DC->isFunctionOrMethod()) { SmallVector ConflictingDecls; llvm::SmallVector FoundDecls; DC->localUncachedLookup(SearchName, FoundDecls); @@ -2351,25 +2319,31 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { if (RecordDecl *FoundRecord = dyn_cast(Found)) { if (RecordDecl *FoundDef = FoundRecord->getDefinition()) { - if (!D->isCompleteDefinition() || IsStructuralMatch(D, FoundDef)) { + if ((SearchName && !D->isCompleteDefinition()) + || (D->isCompleteDefinition() && + D->isAnonymousStructOrUnion() + == FoundDef->isAnonymousStructOrUnion() && + IsStructuralMatch(D, FoundDef))) { // The record types structurally match, or the "from" translation // unit only had a forward declaration anyway; call it the same // function. // FIXME: For C++, we should also merge methods here. return Importer.Imported(D, FoundDef); } - } else { + } else if (!D->isCompleteDefinition()) { // We have a forward declaration of this type, so adopt that forward // declaration rather than building a new one. AdoptDecl = FoundRecord; continue; - } + } else if (!SearchName) { + continue; + } } ConflictingDecls.push_back(FoundDecls[I]); } - if (!ConflictingDecls.empty()) { + if (!ConflictingDecls.empty() && SearchName) { Name = Importer.HandleNameConflict(Name, DC, IDNS, ConflictingDecls.data(), ConflictingDecls.size()); @@ -2395,6 +2369,8 @@ Decl *ASTNodeImporter::VisitRecordDecl(RecordDecl *D) { D2->setQualifierInfo(Importer.Import(D->getQualifierLoc())); D2->setLexicalDeclContext(LexicalDC); LexicalDC->addDeclInternal(D2); + if (D->isAnonymousStructOrUnion()) + D2->setAnonymousStructOrUnion(true); } Importer.Imported(D, D2); @@ -2661,7 +2637,7 @@ Decl *ASTNodeImporter::VisitFieldDecl(FieldDecl *D) { Importer.Import(D->getInnerLocStart()), Loc, Name.getAsIdentifierInfo(), T, TInfo, BitWidth, D->isMutable(), - D->hasInClassInitializer()); + D->getInClassInitStyle()); ToField->setAccess(D->getAccess()); ToField->setLexicalDeclContext(LexicalDC); if (ToField->hasInClassInitializer()) @@ -2686,11 +2662,16 @@ Decl *ASTNodeImporter::VisitIndirectFieldDecl(IndirectFieldDecl *D) { if (IndirectFieldDecl *FoundField = dyn_cast(FoundDecls[I])) { if (Importer.IsStructurallyEquivalent(D->getType(), - FoundField->getType())) { + FoundField->getType(), + Name)) { Importer.Imported(D, FoundField); return FoundField; } - + + // If there are more anonymous fields to check, continue. + if (!Name && I < N-1) + continue; + Importer.ToDiag(Loc, diag::err_odr_field_type_inconsistent) << Name << D->getType() << FoundField->getType(); Importer.ToDiag(FoundField->getLocation(), diag::note_odr_value_here) @@ -4665,12 +4646,14 @@ Decl *ASTImporter::Imported(Decl *From, Decl *To) { return To; } -bool ASTImporter::IsStructurallyEquivalent(QualType From, QualType To) { +bool ASTImporter::IsStructurallyEquivalent(QualType From, QualType To, + bool Complain) { llvm::DenseMap::iterator Pos = ImportedTypes.find(From.getTypePtr()); if (Pos != ImportedTypes.end() && ToContext.hasSameType(Import(From), To)) return true; - StructuralEquivalenceContext Ctx(FromContext, ToContext, NonEquivalentDecls); + StructuralEquivalenceContext Ctx(FromContext, ToContext, NonEquivalentDecls, + false, Complain); return Ctx.IsStructurallyEquivalent(From, To); } diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt index 716459a..bcc96f9 100644 --- a/lib/AST/CMakeLists.txt +++ b/lib/AST/CMakeLists.txt @@ -1,7 +1,5 @@ set(LLVM_LINK_COMPONENTS support) -set(LLVM_USED_LIBS clangBasic clangLex) - add_clang_library(clangAST APValue.cpp ASTConsumer.cpp @@ -10,6 +8,13 @@ add_clang_library(clangAST ASTImporter.cpp AttrImpl.cpp CXXInheritance.cpp + Comment.cpp + CommentBriefParser.cpp + CommentCommandTraits.cpp + CommentDumper.cpp + CommentLexer.cpp + CommentParser.cpp + CommentSema.cpp Decl.cpp DeclarationName.cpp DeclBase.cpp @@ -35,6 +40,7 @@ add_clang_library(clangAST NestedNameSpecifier.cpp NSAPI.cpp ParentMap.cpp + RawCommentList.cpp RecordLayout.cpp RecordLayoutBuilder.cpp SelectorLocationsKind.cpp @@ -53,5 +59,21 @@ add_clang_library(clangAST VTTBuilder.cpp ) -add_dependencies(clangAST ClangARMNeon ClangAttrClasses ClangAttrList - ClangAttrImpl ClangDiagnosticAST ClangDeclNodes ClangStmtNodes) +add_dependencies(clangAST + ClangARMNeon + ClangAttrClasses + ClangAttrList + ClangAttrImpl + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticAST + ClangDiagnosticComment + ClangDiagnosticCommon + ClangDiagnosticSema + ClangStmtNodes + ) + +target_link_libraries(clangAST + clangBasic + clangLex + ) diff --git a/lib/AST/CXXABI.h b/lib/AST/CXXABI.h index 943c43e..0d9c869 100644 --- a/lib/AST/CXXABI.h +++ b/lib/AST/CXXABI.h @@ -32,7 +32,7 @@ public: virtual unsigned getMemberPointerSize(const MemberPointerType *MPT) const = 0; /// Returns the default calling convention for C++ methods. - virtual CallingConv getDefaultMethodCallConv() const = 0; + virtual CallingConv getDefaultMethodCallConv(bool isVariadic) const = 0; // Returns whether the given class is nearly empty, with just virtual pointers // and no data except possibly virtual bases. diff --git a/lib/AST/CXXInheritance.cpp b/lib/AST/CXXInheritance.cpp index 2186730..cf3913b 100644 --- a/lib/AST/CXXInheritance.cpp +++ b/lib/AST/CXXInheritance.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "clang/AST/CXXInheritance.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/DeclCXX.h" #include @@ -96,7 +97,7 @@ bool CXXRecordDecl::isDerivedFrom(const CXXRecordDecl *Base, Paths); } -bool CXXRecordDecl::isVirtuallyDerivedFrom(CXXRecordDecl *Base) const { +bool CXXRecordDecl::isVirtuallyDerivedFrom(const CXXRecordDecl *Base) const { if (!getNumVBases()) return false; @@ -106,8 +107,12 @@ bool CXXRecordDecl::isVirtuallyDerivedFrom(CXXRecordDecl *Base) const { if (getCanonicalDecl() == Base->getCanonicalDecl()) return false; - Paths.setOrigin(const_cast(this)); - return lookupInBases(&FindVirtualBaseClass, Base->getCanonicalDecl(), Paths); + Paths.setOrigin(const_cast(this)); + + const void *BasePtr = static_cast(Base->getCanonicalDecl()); + return lookupInBases(&FindVirtualBaseClass, + const_cast(BasePtr), + Paths); } static bool BaseIsNot(const CXXRecordDecl *Base, void *OpaqueTarget) { @@ -160,7 +165,7 @@ bool CXXRecordDecl::forallBases(ForallBasesCallback *BaseMatches, return AllMatches; } -bool CXXBasePaths::lookupInBases(ASTContext &Context, +bool CXXBasePaths::lookupInBases(ASTContext &Context, const CXXRecordDecl *Record, CXXRecordDecl::BaseMatchesCallback *BaseMatches, void *UserData) { @@ -505,12 +510,17 @@ void FinalOverriderCollector::Collect(const CXXRecordDecl *RD, CXXFinalOverriderMap *BaseOverriders = &ComputedBaseOverriders; if (Base->isVirtual()) { CXXFinalOverriderMap *&MyVirtualOverriders = VirtualOverriders[BaseDecl]; + BaseOverriders = MyVirtualOverriders; if (!MyVirtualOverriders) { MyVirtualOverriders = new CXXFinalOverriderMap; + + // Collect may cause VirtualOverriders to reallocate, invalidating the + // MyVirtualOverriders reference. Set BaseOverriders to the right + // value now. + BaseOverriders = MyVirtualOverriders; + Collect(BaseDecl, true, BaseDecl, *MyVirtualOverriders); } - - BaseOverriders = MyVirtualOverriders; } else Collect(BaseDecl, false, InVirtualSubobject, ComputedBaseOverriders); diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp new file mode 100644 index 0000000..8a711f0 --- /dev/null +++ b/lib/AST/Comment.cpp @@ -0,0 +1,264 @@ +//===--- Comment.cpp - Comment AST node implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Comment.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclTemplate.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace comments { + +const char *Comment::getCommentKindName() const { + switch (getCommentKind()) { + case NoCommentKind: return "NoCommentKind"; +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return #CLASS; +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } + llvm_unreachable("Unknown comment kind!"); +} + +void Comment::dump() const { + // It is important that Comment::dump() is defined in a different TU than + // Comment::dump(raw_ostream, SourceManager). If both functions were defined + // in CommentDumper.cpp, that object file would be removed by linker because + // none of its functions are referenced by other object files, despite the + // LLVM_ATTRIBUTE_USED. + dump(llvm::errs(), NULL); +} + +void Comment::dump(SourceManager &SM) const { + dump(llvm::errs(), &SM); +} + +namespace { +struct good {}; +struct bad {}; + +template +good implements_child_begin_end(Comment::child_iterator (T::*)() const) { + return good(); +} + +static inline bad implements_child_begin_end( + Comment::child_iterator (Comment::*)() const) { + return bad(); +} + +#define ASSERT_IMPLEMENTS_child_begin(function) \ + (void) sizeof(good(implements_child_begin_end(function))) + +static inline void CheckCommentASTNodes() { +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + ASSERT_IMPLEMENTS_child_begin(&CLASS::child_begin); \ + ASSERT_IMPLEMENTS_child_begin(&CLASS::child_end); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT +} + +#undef ASSERT_IMPLEMENTS_child_begin + +} // end unnamed namespace + +Comment::child_iterator Comment::child_begin() const { + switch (getCommentKind()) { + case NoCommentKind: llvm_unreachable("comment without a kind"); +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return static_cast(this)->child_begin(); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } + llvm_unreachable("Unknown comment kind!"); +} + +Comment::child_iterator Comment::child_end() const { + switch (getCommentKind()) { + case NoCommentKind: llvm_unreachable("comment without a kind"); +#define ABSTRACT_COMMENT(COMMENT) +#define COMMENT(CLASS, PARENT) \ + case CLASS##Kind: \ + return static_cast(this)->child_end(); +#include "clang/AST/CommentNodes.inc" +#undef COMMENT +#undef ABSTRACT_COMMENT + } + llvm_unreachable("Unknown comment kind!"); +} + +bool TextComment::isWhitespaceNoCache() const { + for (StringRef::const_iterator I = Text.begin(), E = Text.end(); + I != E; ++I) { + const char C = *I; + if (C != ' ' && C != '\n' && C != '\r' && + C != '\t' && C != '\f' && C != '\v') + return false; + } + return true; +} + +bool ParagraphComment::isWhitespaceNoCache() const { + for (child_iterator I = child_begin(), E = child_end(); I != E; ++I) { + if (const TextComment *TC = dyn_cast(*I)) { + if (!TC->isWhitespace()) + return false; + } else + return false; + } + return true; +} + +const char *ParamCommandComment::getDirectionAsString(PassDirection D) { + switch (D) { + case ParamCommandComment::In: + return "[in]"; + case ParamCommandComment::Out: + return "[out]"; + case ParamCommandComment::InOut: + return "[in,out]"; + } + llvm_unreachable("unknown PassDirection"); +} + +void DeclInfo::fill() { + assert(!IsFilled); + + // Set defaults. + Kind = OtherKind; + TemplateKind = NotTemplate; + IsObjCMethod = false; + IsInstanceMethod = false; + IsClassMethod = false; + ParamVars = ArrayRef(); + TemplateParameters = NULL; + + if (!ThisDecl) { + // If there is no declaration, the defaults is our only guess. + IsFilled = true; + return; + } + + Decl::Kind K = ThisDecl->getKind(); + switch (K) { + default: + // Defaults are should be good for declarations we don't handle explicitly. + break; + case Decl::Function: + case Decl::CXXMethod: + case Decl::CXXConstructor: + case Decl::CXXDestructor: + case Decl::CXXConversion: { + const FunctionDecl *FD = cast(ThisDecl); + Kind = FunctionKind; + ParamVars = ArrayRef(FD->param_begin(), + FD->getNumParams()); + ResultType = FD->getResultType(); + unsigned NumLists = FD->getNumTemplateParameterLists(); + if (NumLists != 0) { + TemplateKind = TemplateSpecialization; + TemplateParameters = + FD->getTemplateParameterList(NumLists - 1); + } + + if (K == Decl::CXXMethod || K == Decl::CXXConstructor || + K == Decl::CXXDestructor || K == Decl::CXXConversion) { + const CXXMethodDecl *MD = cast(ThisDecl); + IsInstanceMethod = MD->isInstance(); + IsClassMethod = !IsInstanceMethod; + } + break; + } + case Decl::ObjCMethod: { + const ObjCMethodDecl *MD = cast(ThisDecl); + Kind = FunctionKind; + ParamVars = ArrayRef(MD->param_begin(), + MD->param_size()); + ResultType = MD->getResultType(); + IsObjCMethod = true; + IsInstanceMethod = MD->isInstanceMethod(); + IsClassMethod = !IsInstanceMethod; + break; + } + case Decl::FunctionTemplate: { + const FunctionTemplateDecl *FTD = cast(ThisDecl); + Kind = FunctionKind; + TemplateKind = Template; + const FunctionDecl *FD = FTD->getTemplatedDecl(); + ParamVars = ArrayRef(FD->param_begin(), + FD->getNumParams()); + ResultType = FD->getResultType(); + TemplateParameters = FTD->getTemplateParameters(); + break; + } + case Decl::ClassTemplate: { + const ClassTemplateDecl *CTD = cast(ThisDecl); + Kind = ClassKind; + TemplateKind = Template; + TemplateParameters = CTD->getTemplateParameters(); + break; + } + case Decl::ClassTemplatePartialSpecialization: { + const ClassTemplatePartialSpecializationDecl *CTPSD = + cast(ThisDecl); + Kind = ClassKind; + TemplateKind = TemplatePartialSpecialization; + TemplateParameters = CTPSD->getTemplateParameters(); + break; + } + case Decl::ClassTemplateSpecialization: + Kind = ClassKind; + TemplateKind = TemplateSpecialization; + break; + case Decl::Record: + case Decl::CXXRecord: + Kind = ClassKind; + break; + case Decl::Var: + case Decl::Field: + case Decl::EnumConstant: + case Decl::ObjCIvar: + case Decl::ObjCAtDefsField: + Kind = VariableKind; + break; + case Decl::Namespace: + Kind = NamespaceKind; + break; + case Decl::Typedef: + case Decl::TypeAlias: + Kind = TypedefKind; + break; + case Decl::TypeAliasTemplate: { + const TypeAliasTemplateDecl *TAT = cast(ThisDecl); + Kind = TypedefKind; + TemplateKind = Template; + TemplateParameters = TAT->getTemplateParameters(); + break; + } + case Decl::Enum: + Kind = EnumKind; + break; + } + + IsFilled = true; +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentBriefParser.cpp b/lib/AST/CommentBriefParser.cpp new file mode 100644 index 0000000..0aebc1e --- /dev/null +++ b/lib/AST/CommentBriefParser.cpp @@ -0,0 +1,122 @@ +//===--- CommentBriefParser.cpp - Dumb comment parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentBriefParser.h" +#include "clang/AST/CommentCommandTraits.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { +namespace comments { + +namespace { +/// Convert all whitespace into spaces, remove leading and trailing spaces, +/// compress multiple spaces into one. +void cleanupBrief(std::string &S) { + bool PrevWasSpace = true; + std::string::iterator O = S.begin(); + for (std::string::iterator I = S.begin(), E = S.end(); + I != E; ++I) { + const char C = *I; + if (C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\v' || C == '\f') { + if (!PrevWasSpace) { + *O++ = ' '; + PrevWasSpace = true; + } + continue; + } else { + *O++ = C; + PrevWasSpace = false; + } + } + if (O != S.begin() && *(O - 1) == ' ') + --O; + + S.resize(O - S.begin()); +} +} // unnamed namespace + +BriefParser::BriefParser(Lexer &L, const CommandTraits &Traits) : + L(L), Traits(Traits) { + // Get lookahead token. + ConsumeToken(); +} + +std::string BriefParser::Parse() { + std::string FirstParagraphOrBrief; + std::string ReturnsParagraph; + bool InFirstParagraph = true; + bool InBrief = false; + bool InReturns = false; + + while (Tok.isNot(tok::eof)) { + if (Tok.is(tok::text)) { + if (InFirstParagraph || InBrief) + FirstParagraphOrBrief += Tok.getText(); + else if (InReturns) + ReturnsParagraph += Tok.getText(); + ConsumeToken(); + continue; + } + + if (Tok.is(tok::command)) { + StringRef Name = Tok.getCommandName(); + if (Traits.isBriefCommand(Name)) { + FirstParagraphOrBrief.clear(); + InBrief = true; + ConsumeToken(); + continue; + } + if (Traits.isReturnsCommand(Name)) { + InReturns = true; + ReturnsParagraph += "Returns "; + } + // Block commands implicitly start a new paragraph. + if (Traits.isBlockCommand(Name)) { + // We found an implicit paragraph end. + InFirstParagraph = false; + if (InBrief) + break; + } + } + + if (Tok.is(tok::newline)) { + if (InFirstParagraph || InBrief) + FirstParagraphOrBrief += ' '; + else if (InReturns) + ReturnsParagraph += ' '; + ConsumeToken(); + + if (Tok.is(tok::newline)) { + ConsumeToken(); + // We found a paragraph end. + InFirstParagraph = false; + InReturns = false; + if (InBrief) + break; + } + continue; + } + + // We didn't handle this token, so just drop it. + ConsumeToken(); + } + + cleanupBrief(FirstParagraphOrBrief); + if (!FirstParagraphOrBrief.empty()) + return FirstParagraphOrBrief; + + cleanupBrief(ReturnsParagraph); + return ReturnsParagraph; +} + +} // end namespace comments +} // end namespace clang + + diff --git a/lib/AST/CommentCommandTraits.cpp b/lib/AST/CommentCommandTraits.cpp new file mode 100644 index 0000000..d8ce1f3 --- /dev/null +++ b/lib/AST/CommentCommandTraits.cpp @@ -0,0 +1,134 @@ +//===--- CommentCommandTraits.cpp - Comment command properties --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentCommandTraits.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { +namespace comments { + +// TODO: tablegen + +bool CommandTraits::isVerbatimBlockCommand(StringRef BeginName, + StringRef &EndName) const { + const char *Result = llvm::StringSwitch(BeginName) + .Case("code", "endcode") + .Case("verbatim", "endverbatim") + .Case("htmlonly", "endhtmlonly") + .Case("latexonly", "endlatexonly") + .Case("xmlonly", "endxmlonly") + .Case("manonly", "endmanonly") + .Case("rtfonly", "endrtfonly") + + .Case("dot", "enddot") + .Case("msc", "endmsc") + + .Case("f$", "f$") // Inline LaTeX formula + .Case("f[", "f]") // Displayed LaTeX formula + .Case("f{", "f}") // LaTeX environment + + .Default(NULL); + + if (Result) { + EndName = Result; + return true; + } + + for (VerbatimBlockCommandVector::const_iterator + I = VerbatimBlockCommands.begin(), + E = VerbatimBlockCommands.end(); + I != E; ++I) + if (I->BeginName == BeginName) { + EndName = I->EndName; + return true; + } + + return false; +} + +bool CommandTraits::isVerbatimLineCommand(StringRef Name) const { + bool Result = isDeclarationCommand(Name) || llvm::StringSwitch(Name) + .Case("defgroup", true) + .Case("ingroup", true) + .Case("addtogroup", true) + .Case("weakgroup", true) + .Case("name", true) + + .Case("section", true) + .Case("subsection", true) + .Case("subsubsection", true) + .Case("paragraph", true) + + .Case("mainpage", true) + .Case("subpage", true) + .Case("ref", true) + + .Default(false); + + if (Result) + return true; + + for (VerbatimLineCommandVector::const_iterator + I = VerbatimLineCommands.begin(), + E = VerbatimLineCommands.end(); + I != E; ++I) + if (I->Name == Name) + return true; + + return false; +} + +bool CommandTraits::isDeclarationCommand(StringRef Name) const { + return llvm::StringSwitch(Name) + // Doxygen commands. + .Case("fn", true) + .Case("var", true) + .Case("property", true) + .Case("typedef", true) + + .Case("overload", true) + + // HeaderDoc commands. + .Case("class", true) + .Case("interface", true) + .Case("protocol", true) + .Case("category", true) + .Case("template", true) + .Case("function", true) + .Case("method", true) + .Case("callback", true) + .Case("var", true) + .Case("const", true) + .Case("constant", true) + .Case("property", true) + .Case("struct", true) + .Case("union", true) + .Case("typedef", true) + .Case("enum", true) + + .Default(false); +} + +void CommandTraits::addVerbatimBlockCommand(StringRef BeginName, + StringRef EndName) { + VerbatimBlockCommand VBC; + VBC.BeginName = BeginName; + VBC.EndName = EndName; + VerbatimBlockCommands.push_back(VBC); +} + +void CommandTraits::addVerbatimLineCommand(StringRef Name) { + VerbatimLineCommand VLC; + VLC.Name = Name; + VerbatimLineCommands.push_back(VLC); +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentDumper.cpp b/lib/AST/CommentDumper.cpp new file mode 100644 index 0000000..dffc823 --- /dev/null +++ b/lib/AST/CommentDumper.cpp @@ -0,0 +1,231 @@ +//===--- CommentDumper.cpp - Dumping implementation for Comment ASTs ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentVisitor.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace comments { + +namespace { +class CommentDumper: public comments::ConstCommentVisitor { + raw_ostream &OS; + SourceManager *SM; + unsigned IndentLevel; + +public: + CommentDumper(raw_ostream &OS, SourceManager *SM) : + OS(OS), SM(SM), IndentLevel(0) + { } + + void dumpIndent() const { + for (unsigned i = 1, e = IndentLevel; i < e; ++i) + OS << " "; + } + + void dumpLocation(SourceLocation Loc) { + if (SM) + Loc.print(OS, *SM); + } + + void dumpSourceRange(const Comment *C); + + void dumpComment(const Comment *C); + + void dumpSubtree(const Comment *C); + + // Inline content. + void visitTextComment(const TextComment *C); + void visitInlineCommandComment(const InlineCommandComment *C); + void visitHTMLStartTagComment(const HTMLStartTagComment *C); + void visitHTMLEndTagComment(const HTMLEndTagComment *C); + + // Block content. + void visitParagraphComment(const ParagraphComment *C); + void visitBlockCommandComment(const BlockCommandComment *C); + void visitParamCommandComment(const ParamCommandComment *C); + void visitTParamCommandComment(const TParamCommandComment *C); + void visitVerbatimBlockComment(const VerbatimBlockComment *C); + void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C); + void visitVerbatimLineComment(const VerbatimLineComment *C); + + void visitFullComment(const FullComment *C); +}; + +void CommentDumper::dumpSourceRange(const Comment *C) { + if (!SM) + return; + + SourceRange SR = C->getSourceRange(); + + OS << " <"; + dumpLocation(SR.getBegin()); + if (SR.getBegin() != SR.getEnd()) { + OS << ", "; + dumpLocation(SR.getEnd()); + } + OS << ">"; +} + +void CommentDumper::dumpComment(const Comment *C) { + dumpIndent(); + OS << "(" << C->getCommentKindName() + << " " << (const void *) C; + dumpSourceRange(C); +} + +void CommentDumper::dumpSubtree(const Comment *C) { + ++IndentLevel; + if (C) { + visit(C); + for (Comment::child_iterator I = C->child_begin(), + E = C->child_end(); + I != E; ++I) { + OS << '\n'; + dumpSubtree(*I); + } + OS << ')'; + } else { + dumpIndent(); + OS << "<<>>"; + } + --IndentLevel; +} + +void CommentDumper::visitTextComment(const TextComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitInlineCommandComment(const InlineCommandComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getCommandName() << "\""; + switch (C->getRenderKind()) { + case InlineCommandComment::RenderNormal: + OS << " RenderNormal"; + break; + case InlineCommandComment::RenderBold: + OS << " RenderBold"; + break; + case InlineCommandComment::RenderMonospaced: + OS << " RenderMonospaced"; + break; + case InlineCommandComment::RenderEmphasized: + OS << " RenderEmphasized"; + break; + } + + for (unsigned i = 0, e = C->getNumArgs(); i != e; ++i) + OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\""; +} + +void CommentDumper::visitHTMLStartTagComment(const HTMLStartTagComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getTagName() << "\""; + if (C->getNumAttrs() != 0) { + OS << " Attrs: "; + for (unsigned i = 0, e = C->getNumAttrs(); i != e; ++i) { + const HTMLStartTagComment::Attribute &Attr = C->getAttr(i); + OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\""; + } + } + if (C->isSelfClosing()) + OS << " SelfClosing"; +} + +void CommentDumper::visitHTMLEndTagComment(const HTMLEndTagComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getTagName() << "\""; +} + +void CommentDumper::visitParagraphComment(const ParagraphComment *C) { + dumpComment(C); +} + +void CommentDumper::visitBlockCommandComment(const BlockCommandComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getCommandName() << "\""; + for (unsigned i = 0, e = C->getNumArgs(); i != e; ++i) + OS << " Arg[" << i << "]=\"" << C->getArgText(i) << "\""; +} + +void CommentDumper::visitParamCommandComment(const ParamCommandComment *C) { + dumpComment(C); + + OS << " " << ParamCommandComment::getDirectionAsString(C->getDirection()); + + if (C->isDirectionExplicit()) + OS << " explicitly"; + else + OS << " implicitly"; + + if (C->hasParamName()) + OS << " Param=\"" << C->getParamName() << "\""; + + if (C->isParamIndexValid()) + OS << " ParamIndex=" << C->getParamIndex(); +} + +void CommentDumper::visitTParamCommandComment(const TParamCommandComment *C) { + dumpComment(C); + + if (C->hasParamName()) { + OS << " Param=\"" << C->getParamName() << "\""; + } + + if (C->isPositionValid()) { + OS << " Position=<"; + for (unsigned i = 0, e = C->getDepth(); i != e; ++i) { + OS << C->getIndex(i); + if (i != e - 1) + OS << ", "; + } + OS << ">"; + } +} + +void CommentDumper::visitVerbatimBlockComment(const VerbatimBlockComment *C) { + dumpComment(C); + + OS << " Name=\"" << C->getCommandName() << "\"" + " CloseName=\"" << C->getCloseName() << "\""; +} + +void CommentDumper::visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitVerbatimLineComment(const VerbatimLineComment *C) { + dumpComment(C); + + OS << " Text=\"" << C->getText() << "\""; +} + +void CommentDumper::visitFullComment(const FullComment *C) { + dumpComment(C); +} + +} // unnamed namespace + +void Comment::dump(llvm::raw_ostream &OS, SourceManager *SM) const { + CommentDumper D(llvm::errs(), SM); + D.dumpSubtree(this); + llvm::errs() << '\n'; +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp new file mode 100644 index 0000000..b6516ec --- /dev/null +++ b/lib/AST/CommentLexer.cpp @@ -0,0 +1,815 @@ +#include "clang/AST/CommentLexer.h" +#include "clang/AST/CommentCommandTraits.h" +#include "clang/Basic/ConvertUTF.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +void Token::dump(const Lexer &L, const SourceManager &SM) const { + llvm::errs() << "comments::Token Kind=" << Kind << " "; + Loc.dump(SM); + llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; +} + +namespace { +bool isHTMLNamedCharacterReferenceCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z'); +} + +bool isHTMLDecimalCharacterReferenceCharacter(char C) { + return C >= '0' && C <= '9'; +} + +bool isHTMLHexCharacterReferenceCharacter(char C) { + return (C >= '0' && C <= '9') || + (C >= 'a' && C <= 'f') || + (C >= 'A' && C <= 'F'); +} +} // unnamed namespace + +StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const { + return llvm::StringSwitch(Name) + .Case("amp", "&") + .Case("lt", "<") + .Case("gt", ">") + .Case("quot", "\"") + .Case("apos", "\'") + .Default(""); +} + +StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const { + unsigned CodePoint = 0; + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + assert(isHTMLDecimalCharacterReferenceCharacter(Name[i])); + CodePoint *= 10; + CodePoint += Name[i] - '0'; + } + + char *Resolved = Allocator.Allocate(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + char *ResolvedPtr = Resolved; + if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) + return StringRef(Resolved, ResolvedPtr - Resolved); + else + return StringRef(); +} + +StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const { + unsigned CodePoint = 0; + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + CodePoint *= 16; + const char C = Name[i]; + assert(isHTMLHexCharacterReferenceCharacter(C)); + if (C >= '0' && C <= '9') + CodePoint += Name[i] - '0'; + else if (C >= 'a' && C <= 'f') + CodePoint += Name[i] - 'a' + 10; + else + CodePoint += Name[i] - 'A' + 10; + } + + char *Resolved = Allocator.Allocate(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + char *ResolvedPtr = Resolved; + if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) + return StringRef(Resolved, ResolvedPtr - Resolved); + else + return StringRef(); +} + +void Lexer::skipLineStartingDecorations() { + // This function should be called only for C comments + assert(CommentState == LCS_InsideCComment); + + if (BufferPtr == CommentEnd) + return; + + switch (*BufferPtr) { + case ' ': + case '\t': + case '\f': + case '\v': { + const char *NewBufferPtr = BufferPtr; + NewBufferPtr++; + if (NewBufferPtr == CommentEnd) + return; + + char C = *NewBufferPtr; + while (C == ' ' || C == '\t' || C == '\f' || C == '\v') { + NewBufferPtr++; + if (NewBufferPtr == CommentEnd) + return; + C = *NewBufferPtr; + } + if (C == '*') + BufferPtr = NewBufferPtr + 1; + break; + } + case '*': + BufferPtr++; + break; + } +} + +namespace { +/// Returns pointer to the first newline character in the string. +const char *findNewline(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + const char C = *BufferPtr; + if (C == '\n' || C == '\r') + return BufferPtr; + } + return BufferEnd; +} + +const char *skipNewline(const char *BufferPtr, const char *BufferEnd) { + if (BufferPtr == BufferEnd) + return BufferPtr; + + if (*BufferPtr == '\n') + BufferPtr++; + else { + assert(*BufferPtr == '\r'); + BufferPtr++; + if (BufferPtr != BufferEnd && *BufferPtr == '\n') + BufferPtr++; + } + return BufferPtr; +} + +const char *skipNamedCharacterReference(const char *BufferPtr, + const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +const char *skipDecimalCharacterReference(const char *BufferPtr, + const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +const char *skipHexCharacterReference(const char *BufferPtr, + const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +bool isHTMLIdentifierStartingCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z'); +} + +bool isHTMLIdentifierCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9'); +} + +const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isHTMLIdentifierCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +/// Skip HTML string quoted in single or double quotes. Escaping quotes inside +/// string allowed. +/// +/// Returns pointer to closing quote. +const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd) +{ + const char Quote = *BufferPtr; + assert(Quote == '\"' || Quote == '\''); + + BufferPtr++; + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + const char C = *BufferPtr; + if (C == Quote && BufferPtr[-1] != '\\') + return BufferPtr; + } + return BufferEnd; +} + +bool isHorizontalWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\f' || C == '\v'; +} + +bool isWhitespace(char C) { + return C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\f' || C == '\v'; +} + +const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isWhitespace(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +bool isWhitespace(const char *BufferPtr, const char *BufferEnd) { + return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd; +} + +bool isCommandNameCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9'); +} + +const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isCommandNameCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +/// Return the one past end pointer for BCPL comments. +/// Handles newlines escaped with backslash or trigraph for backslahs. +const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) { + const char *CurPtr = BufferPtr; + while (CurPtr != BufferEnd) { + char C = *CurPtr; + while (C != '\n' && C != '\r') { + CurPtr++; + if (CurPtr == BufferEnd) + return BufferEnd; + C = *CurPtr; + } + // We found a newline, check if it is escaped. + const char *EscapePtr = CurPtr - 1; + while(isHorizontalWhitespace(*EscapePtr)) + EscapePtr--; + + if (*EscapePtr == '\\' || + (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' && + EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) { + // We found an escaped newline. + CurPtr = skipNewline(CurPtr, BufferEnd); + } else + return CurPtr; // Not an escaped newline. + } + return BufferEnd; +} + +/// Return the one past end pointer for C comments. +/// Very dumb, does not handle escaped newlines or trigraphs. +const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (*BufferPtr == '*') { + assert(BufferPtr + 1 != BufferEnd); + if (*(BufferPtr + 1) == '/') + return BufferPtr; + } + } + llvm_unreachable("buffer end hit before '*/' was seen"); +} +} // unnamed namespace + +void Lexer::lexCommentText(Token &T) { + assert(CommentState == LCS_InsideBCPLComment || + CommentState == LCS_InsideCComment); + + switch (State) { + case LS_Normal: + break; + case LS_VerbatimBlockFirstLine: + lexVerbatimBlockFirstLine(T); + return; + case LS_VerbatimBlockBody: + lexVerbatimBlockBody(T); + return; + case LS_VerbatimLineText: + lexVerbatimLineText(T); + return; + case LS_HTMLStartTag: + lexHTMLStartTag(T); + return; + case LS_HTMLEndTag: + lexHTMLEndTag(T); + return; + } + + assert(State == LS_Normal); + + const char *TokenPtr = BufferPtr; + assert(TokenPtr < CommentEnd); + while (TokenPtr != CommentEnd) { + switch(*TokenPtr) { + case '\\': + case '@': { + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTextToken(T, TokenPtr); + return; + } + char C = *TokenPtr; + switch (C) { + default: + break; + + case '\\': case '@': case '&': case '$': + case '#': case '<': case '>': case '%': + case '\"': case '.': case ':': + // This is one of \\ \@ \& \$ etc escape sequences. + TokenPtr++; + if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { + // This is the \:: escape sequence. + TokenPtr++; + } + StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(UnescapedText); + return; + } + + // Don't make zero-length commands. + if (!isCommandNameCharacter(*TokenPtr)) { + formTextToken(T, TokenPtr); + return; + } + + TokenPtr = skipCommandName(TokenPtr, CommentEnd); + unsigned Length = TokenPtr - (BufferPtr + 1); + + // Hardcoded support for lexing LaTeX formula commands + // \f$ \f[ \f] \f{ \f} as a single command. + if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { + C = *TokenPtr; + if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { + TokenPtr++; + Length++; + } + } + + const StringRef CommandName(BufferPtr + 1, Length); + StringRef EndName; + + if (Traits.isVerbatimBlockCommand(CommandName, EndName)) { + setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName); + return; + } + if (Traits.isVerbatimLineCommand(CommandName)) { + setupAndLexVerbatimLine(T, TokenPtr); + return; + } + formTokenWithChars(T, TokenPtr, tok::command); + T.setCommandName(CommandName); + return; + } + + case '&': + lexHTMLCharacterReference(T); + return; + + case '<': { + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTextToken(T, TokenPtr); + return; + } + const char C = *TokenPtr; + if (isHTMLIdentifierStartingCharacter(C)) + setupAndLexHTMLStartTag(T); + else if (C == '/') + setupAndLexHTMLEndTag(T); + else + formTextToken(T, TokenPtr); + + return; + } + + case '\n': + case '\r': + TokenPtr = skipNewline(TokenPtr, CommentEnd); + formTokenWithChars(T, TokenPtr, tok::newline); + + if (CommentState == LCS_InsideCComment) + skipLineStartingDecorations(); + return; + + default: { + while (true) { + TokenPtr++; + if (TokenPtr == CommentEnd) + break; + const char C = *TokenPtr; + if(C == '\n' || C == '\r' || + C == '\\' || C == '@' || C == '&' || C == '<') + break; + } + formTextToken(T, TokenPtr); + return; + } + } + } +} + +void Lexer::setupAndLexVerbatimBlock(Token &T, + const char *TextBegin, + char Marker, StringRef EndName) { + VerbatimBlockEndCommandName.clear(); + VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@"); + VerbatimBlockEndCommandName.append(EndName); + + StringRef Name(BufferPtr + 1, TextBegin - (BufferPtr + 1)); + formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); + T.setVerbatimBlockName(Name); + + // If there is a newline following the verbatim opening command, skip the + // newline so that we don't create an tok::verbatim_block_line with empty + // text content. + if (BufferPtr != CommentEnd) { + const char C = *BufferPtr; + if (C == '\n' || C == '\r') { + BufferPtr = skipNewline(BufferPtr, CommentEnd); + State = LS_VerbatimBlockBody; + return; + } + } + + State = LS_VerbatimBlockFirstLine; +} + +void Lexer::lexVerbatimBlockFirstLine(Token &T) { +again: + assert(BufferPtr < CommentEnd); + + // FIXME: It would be better to scan the text once, finding either the block + // end command or newline. + // + // Extract current line. + const char *Newline = findNewline(BufferPtr, CommentEnd); + StringRef Line(BufferPtr, Newline - BufferPtr); + + // Look for end command in current line. + size_t Pos = Line.find(VerbatimBlockEndCommandName); + const char *TextEnd; + const char *NextLine; + if (Pos == StringRef::npos) { + // Current line is completely verbatim. + TextEnd = Newline; + NextLine = skipNewline(Newline, CommentEnd); + } else if (Pos == 0) { + // Current line contains just an end command. + const char *End = BufferPtr + VerbatimBlockEndCommandName.size(); + StringRef Name(BufferPtr + 1, End - (BufferPtr + 1)); + formTokenWithChars(T, End, tok::verbatim_block_end); + T.setVerbatimBlockName(Name); + State = LS_Normal; + return; + } else { + // There is some text, followed by end command. Extract text first. + TextEnd = BufferPtr + Pos; + NextLine = TextEnd; + // If there is only whitespace before end command, skip whitespace. + if (isWhitespace(BufferPtr, TextEnd)) { + BufferPtr = TextEnd; + goto again; + } + } + + StringRef Text(BufferPtr, TextEnd - BufferPtr); + formTokenWithChars(T, NextLine, tok::verbatim_block_line); + T.setVerbatimBlockText(Text); + + State = LS_VerbatimBlockBody; +} + +void Lexer::lexVerbatimBlockBody(Token &T) { + assert(State == LS_VerbatimBlockBody); + + if (CommentState == LCS_InsideCComment) + skipLineStartingDecorations(); + + lexVerbatimBlockFirstLine(T); +} + +void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin) { + const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1); + formTokenWithChars(T, TextBegin, tok::verbatim_line_name); + T.setVerbatimLineName(Name); + + State = LS_VerbatimLineText; +} + +void Lexer::lexVerbatimLineText(Token &T) { + assert(State == LS_VerbatimLineText); + + // Extract current line. + const char *Newline = findNewline(BufferPtr, CommentEnd); + const StringRef Text(BufferPtr, Newline - BufferPtr); + formTokenWithChars(T, Newline, tok::verbatim_line_text); + T.setVerbatimLineText(Text); + + State = LS_Normal; +} + +void Lexer::lexHTMLCharacterReference(Token &T) { + const char *TokenPtr = BufferPtr; + assert(*TokenPtr == '&'); + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTextToken(T, TokenPtr); + return; + } + const char *NamePtr; + bool isNamed = false; + bool isDecimal = false; + char C = *TokenPtr; + if (isHTMLNamedCharacterReferenceCharacter(C)) { + NamePtr = TokenPtr; + TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd); + isNamed = true; + } else if (C == '#') { + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTextToken(T, TokenPtr); + return; + } + C = *TokenPtr; + if (isHTMLDecimalCharacterReferenceCharacter(C)) { + NamePtr = TokenPtr; + TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd); + isDecimal = true; + } else if (C == 'x' || C == 'X') { + TokenPtr++; + NamePtr = TokenPtr; + TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd); + } else { + formTextToken(T, TokenPtr); + return; + } + } else { + formTextToken(T, TokenPtr); + return; + } + if (NamePtr == TokenPtr || TokenPtr == CommentEnd || + *TokenPtr != ';') { + formTextToken(T, TokenPtr); + return; + } + StringRef Name(NamePtr, TokenPtr - NamePtr); + TokenPtr++; // Skip semicolon. + StringRef Resolved; + if (isNamed) + Resolved = resolveHTMLNamedCharacterReference(Name); + else if (isDecimal) + Resolved = resolveHTMLDecimalCharacterReference(Name); + else + Resolved = resolveHTMLHexCharacterReference(Name); + + if (Resolved.empty()) { + formTextToken(T, TokenPtr); + return; + } + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(Resolved); + return; +} + +void Lexer::setupAndLexHTMLStartTag(Token &T) { + assert(BufferPtr[0] == '<' && + isHTMLIdentifierStartingCharacter(BufferPtr[1])); + const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd); + + StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1)); + formTokenWithChars(T, TagNameEnd, tok::html_start_tag); + T.setHTMLTagStartName(Name); + + BufferPtr = skipWhitespace(BufferPtr, CommentEnd); + + const char C = *BufferPtr; + if (BufferPtr != CommentEnd && + (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C))) + State = LS_HTMLStartTag; +} + +void Lexer::lexHTMLStartTag(Token &T) { + assert(State == LS_HTMLStartTag); + + const char *TokenPtr = BufferPtr; + char C = *TokenPtr; + if (isHTMLIdentifierCharacter(C)) { + TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd); + StringRef Ident(BufferPtr, TokenPtr - BufferPtr); + formTokenWithChars(T, TokenPtr, tok::html_ident); + T.setHTMLIdent(Ident); + } else { + switch (C) { + case '=': + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_equals); + break; + case '\"': + case '\'': { + const char *OpenQuote = TokenPtr; + TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd); + const char *ClosingQuote = TokenPtr; + if (TokenPtr != CommentEnd) // Skip closing quote. + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_quoted_string); + T.setHTMLQuotedString(StringRef(OpenQuote + 1, + ClosingQuote - (OpenQuote + 1))); + break; + } + case '>': + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_greater); + State = LS_Normal; + return; + case '/': + TokenPtr++; + if (TokenPtr != CommentEnd && *TokenPtr == '>') { + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_slash_greater); + } else + formTextToken(T, TokenPtr); + + State = LS_Normal; + return; + } + } + + // Now look ahead and return to normal state if we don't see any HTML tokens + // ahead. + BufferPtr = skipWhitespace(BufferPtr, CommentEnd); + if (BufferPtr == CommentEnd) { + State = LS_Normal; + return; + } + + C = *BufferPtr; + if (!isHTMLIdentifierStartingCharacter(C) && + C != '=' && C != '\"' && C != '\'' && C != '>') { + State = LS_Normal; + return; + } +} + +void Lexer::setupAndLexHTMLEndTag(Token &T) { + assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); + + const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); + const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); + + const char *End = skipWhitespace(TagNameEnd, CommentEnd); + + formTokenWithChars(T, End, tok::html_end_tag); + T.setHTMLTagEndName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin)); + + if (BufferPtr != CommentEnd && *BufferPtr == '>') + State = LS_HTMLEndTag; +} + +void Lexer::lexHTMLEndTag(Token &T) { + assert(BufferPtr != CommentEnd && *BufferPtr == '>'); + + formTokenWithChars(T, BufferPtr + 1, tok::html_greater); + State = LS_Normal; +} + +Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, const CommandTraits &Traits, + SourceLocation FileLoc, const CommentOptions &CommOpts, + const char *BufferStart, const char *BufferEnd): + Allocator(Allocator), Traits(Traits), + BufferStart(BufferStart), BufferEnd(BufferEnd), + FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart), + CommentState(LCS_BeforeComment), State(LS_Normal) { +} + +void Lexer::lex(Token &T) { +again: + switch (CommentState) { + case LCS_BeforeComment: + if (BufferPtr == BufferEnd) { + formTokenWithChars(T, BufferPtr, tok::eof); + return; + } + + assert(*BufferPtr == '/'); + BufferPtr++; // Skip first slash. + switch(*BufferPtr) { + case '/': { // BCPL comment. + BufferPtr++; // Skip second slash. + + if (BufferPtr != BufferEnd) { + // Skip Doxygen magic marker, if it is present. + // It might be missing because of a typo //< or /*<, or because we + // merged this non-Doxygen comment into a bunch of Doxygen comments + // around it: /** ... */ /* ... */ /** ... */ + const char C = *BufferPtr; + if (C == '/' || C == '!') + BufferPtr++; + } + + // Skip less-than symbol that marks trailing comments. + // Skip it even if the comment is not a Doxygen one, because //< and /*< + // are frequent typos. + if (BufferPtr != BufferEnd && *BufferPtr == '<') + BufferPtr++; + + CommentState = LCS_InsideBCPLComment; + if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) + State = LS_Normal; + CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); + goto again; + } + case '*': { // C comment. + BufferPtr++; // Skip star. + + // Skip Doxygen magic marker. + const char C = *BufferPtr; + if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!') + BufferPtr++; + + // Skip less-than symbol that marks trailing comments. + if (BufferPtr != BufferEnd && *BufferPtr == '<') + BufferPtr++; + + CommentState = LCS_InsideCComment; + State = LS_Normal; + CommentEnd = findCCommentEnd(BufferPtr, BufferEnd); + goto again; + } + default: + llvm_unreachable("second character of comment should be '/' or '*'"); + } + + case LCS_BetweenComments: { + // Consecutive comments are extracted only if there is only whitespace + // between them. So we can search for the start of the next comment. + const char *EndWhitespace = BufferPtr; + while(EndWhitespace != BufferEnd && *EndWhitespace != '/') + EndWhitespace++; + + // Turn any whitespace between comments (and there is only whitespace + // between them -- guaranteed by comment extraction) into a newline. We + // have two newlines between C comments in total (first one was synthesized + // after a comment). + formTokenWithChars(T, EndWhitespace, tok::newline); + + CommentState = LCS_BeforeComment; + break; + } + + case LCS_InsideBCPLComment: + case LCS_InsideCComment: + if (BufferPtr != CommentEnd) { + lexCommentText(T); + break; + } else { + // Skip C comment closing sequence. + if (CommentState == LCS_InsideCComment) { + assert(BufferPtr[0] == '*' && BufferPtr[1] == '/'); + BufferPtr += 2; + assert(BufferPtr <= BufferEnd); + + // Synthenize newline just after the C comment, regardless if there is + // actually a newline. + formTokenWithChars(T, BufferPtr, tok::newline); + + CommentState = LCS_BetweenComments; + break; + } else { + // Don't synthesized a newline after BCPL comment. + CommentState = LCS_BetweenComments; + goto again; + } + } + } +} + +StringRef Lexer::getSpelling(const Token &Tok, + const SourceManager &SourceMgr, + bool *Invalid) const { + SourceLocation Loc = Tok.getLocation(); + std::pair LocInfo = SourceMgr.getDecomposedLoc(Loc); + + bool InvalidTemp = false; + StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); + if (InvalidTemp) { + *Invalid = true; + return StringRef(); + } + + const char *Begin = File.data() + LocInfo.second; + return StringRef(Begin, Tok.getLength()); +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp new file mode 100644 index 0000000..43abf6a --- /dev/null +++ b/lib/AST/CommentParser.cpp @@ -0,0 +1,722 @@ +//===--- CommentParser.cpp - Doxygen comment parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentSema.h" +#include "clang/AST/CommentDiagnostic.h" +#include "clang/AST/CommentCommandTraits.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +/// Re-lexes a sequence of tok::text tokens. +class TextTokenRetokenizer { + llvm::BumpPtrAllocator &Allocator; + Parser &P; + + /// This flag is set when there are no more tokens we can fetch from lexer. + bool NoMoreInterestingTokens; + + /// Token buffer: tokens we have processed and lookahead. + SmallVector Toks; + + /// A position in \c Toks. + struct Position { + unsigned CurToken; + const char *BufferStart; + const char *BufferEnd; + const char *BufferPtr; + SourceLocation BufferStartLoc; + }; + + /// Current position in Toks. + Position Pos; + + bool isEnd() const { + return Pos.CurToken >= Toks.size(); + } + + /// Sets up the buffer pointers to point to current token. + void setupBuffer() { + assert(!isEnd()); + const Token &Tok = Toks[Pos.CurToken]; + + Pos.BufferStart = Tok.getText().begin(); + Pos.BufferEnd = Tok.getText().end(); + Pos.BufferPtr = Pos.BufferStart; + Pos.BufferStartLoc = Tok.getLocation(); + } + + SourceLocation getSourceLocation() const { + const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; + return Pos.BufferStartLoc.getLocWithOffset(CharNo); + } + + char peek() const { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + return *Pos.BufferPtr; + } + + void consumeChar() { + assert(!isEnd()); + assert(Pos.BufferPtr != Pos.BufferEnd); + Pos.BufferPtr++; + if (Pos.BufferPtr == Pos.BufferEnd) { + Pos.CurToken++; + if (isEnd() && !addToken()) + return; + + assert(!isEnd()); + setupBuffer(); + } + } + + /// Add a token. + /// Returns true on success, false if there are no interesting tokens to + /// fetch from lexer. + bool addToken() { + if (NoMoreInterestingTokens) + return false; + + if (P.Tok.is(tok::newline)) { + // If we see a single newline token between text tokens, skip it. + Token Newline = P.Tok; + P.consumeToken(); + if (P.Tok.isNot(tok::text)) { + P.putBack(Newline); + NoMoreInterestingTokens = true; + return false; + } + } + if (P.Tok.isNot(tok::text)) { + NoMoreInterestingTokens = true; + return false; + } + + Toks.push_back(P.Tok); + P.consumeToken(); + if (Toks.size() == 1) + setupBuffer(); + return true; + } + + static bool isWhitespace(char C) { + return C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\f' || C == '\v'; + } + + void consumeWhitespace() { + while (!isEnd()) { + if (isWhitespace(peek())) + consumeChar(); + else + break; + } + } + + void formTokenWithChars(Token &Result, + SourceLocation Loc, + const char *TokBegin, + unsigned TokLength, + StringRef Text) { + Result.setLocation(Loc); + Result.setKind(tok::text); + Result.setLength(TokLength); +#ifndef NDEBUG + Result.TextPtr1 = ""; + Result.TextLen1 = 7; +#endif + Result.setText(Text); + } + +public: + TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): + Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { + Pos.CurToken = 0; + addToken(); + } + + /// Extract a word -- sequence of non-whitespace characters. + bool lexWord(Token &Tok) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + while (!isEnd()) { + const char C = peek(); + if (!isWhitespace(C)) { + WordText.push_back(C); + consumeChar(); + } else + break; + } + const unsigned Length = WordText.size(); + if (Length == 0) { + Pos = SavedPos; + return false; + } + + char *TextPtr = Allocator.Allocate(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { + if (isEnd()) + return false; + + Position SavedPos = Pos; + + consumeWhitespace(); + SmallString<32> WordText; + const char *WordBegin = Pos.BufferPtr; + SourceLocation Loc = getSourceLocation(); + bool Error = false; + if (!isEnd()) { + const char C = peek(); + if (C == OpenDelim) { + WordText.push_back(C); + consumeChar(); + } else + Error = true; + } + char C = '\0'; + while (!Error && !isEnd()) { + C = peek(); + WordText.push_back(C); + consumeChar(); + if (C == CloseDelim) + break; + } + if (!Error && C != CloseDelim) + Error = true; + + if (Error) { + Pos = SavedPos; + return false; + } + + const unsigned Length = WordText.size(); + char *TextPtr = Allocator.Allocate(Length + 1); + + memcpy(TextPtr, WordText.c_str(), Length + 1); + StringRef Text = StringRef(TextPtr, Length); + + formTokenWithChars(Tok, Loc, WordBegin, + Pos.BufferPtr - WordBegin, Text); + return true; + } + + /// Put back tokens that we didn't consume. + void putBackLeftoverTokens() { + if (isEnd()) + return; + + bool HavePartialTok = false; + Token PartialTok; + if (Pos.BufferPtr != Pos.BufferStart) { + formTokenWithChars(PartialTok, getSourceLocation(), + Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, + StringRef(Pos.BufferPtr, + Pos.BufferEnd - Pos.BufferPtr)); + HavePartialTok = true; + Pos.CurToken++; + } + + P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); + Pos.CurToken = Toks.size(); + + if (HavePartialTok) + P.putBack(PartialTok); + } +}; + +Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, + const SourceManager &SourceMgr, DiagnosticsEngine &Diags, + const CommandTraits &Traits): + L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), + Traits(Traits) { + consumeToken(); +} + +void Parser::parseParamCommandArgs(ParamCommandComment *PC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + // Check if argument looks like direction specification: [dir] + // e.g., [in], [out], [in,out] + if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) + S.actOnParamCommandDirectionArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); + + if (Retokenizer.lexWord(Arg)) + S.actOnParamCommandParamNameArg(PC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); +} + +void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, + TextTokenRetokenizer &Retokenizer) { + Token Arg; + if (Retokenizer.lexWord(Arg)) + S.actOnTParamCommandParamNameArg(TPC, + Arg.getLocation(), + Arg.getEndLocation(), + Arg.getText()); +} + +void Parser::parseBlockCommandArgs(BlockCommandComment *BC, + TextTokenRetokenizer &Retokenizer, + unsigned NumArgs) { + typedef BlockCommandComment::Argument Argument; + Argument *Args = + new (Allocator.Allocate(NumArgs)) Argument[NumArgs]; + unsigned ParsedArgs = 0; + Token Arg; + while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { + Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), + Arg.getEndLocation()), + Arg.getText()); + ParsedArgs++; + } + + S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); +} + +BlockCommandComment *Parser::parseBlockCommand() { + assert(Tok.is(tok::command)); + + ParamCommandComment *PC; + TParamCommandComment *TPC; + BlockCommandComment *BC; + bool IsParam = false; + bool IsTParam = false; + unsigned NumArgs = 0; + if (Traits.isParamCommand(Tok.getCommandName())) { + IsParam = true; + PC = S.actOnParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } if (Traits.isTParamCommand(Tok.getCommandName())) { + IsTParam = true; + TPC = S.actOnTParamCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } else { + NumArgs = Traits.getBlockCommandNumArgs(Tok.getCommandName()); + BC = S.actOnBlockCommandStart(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName()); + } + consumeToken(); + + if (Tok.is(tok::command) && Traits.isBlockCommand(Tok.getCommandName())) { + // Block command ahead. We can't nest block commands, so pretend that this + // command has an empty argument. + ParagraphComment *Paragraph = S.actOnParagraphComment( + ArrayRef()); + if (IsParam) { + S.actOnParamCommandFinish(PC, Paragraph); + return PC; + } else if (IsTParam) { + S.actOnTParamCommandFinish(TPC, Paragraph); + return TPC; + } else { + S.actOnBlockCommandFinish(BC, Paragraph); + return BC; + } + } + + if (IsParam || IsTParam || NumArgs > 0) { + // In order to parse command arguments we need to retokenize a few + // following text tokens. + TextTokenRetokenizer Retokenizer(Allocator, *this); + + if (IsParam) + parseParamCommandArgs(PC, Retokenizer); + else if (IsTParam) + parseTParamCommandArgs(TPC, Retokenizer); + else + parseBlockCommandArgs(BC, Retokenizer, NumArgs); + + Retokenizer.putBackLeftoverTokens(); + } + + BlockContentComment *Block = parseParagraphOrBlockCommand(); + // Since we have checked for a block command, we should have parsed a + // paragraph. + ParagraphComment *Paragraph = cast(Block); + if (IsParam) { + S.actOnParamCommandFinish(PC, Paragraph); + return PC; + } else if (IsTParam) { + S.actOnTParamCommandFinish(TPC, Paragraph); + return TPC; + } else { + S.actOnBlockCommandFinish(BC, Paragraph); + return BC; + } +} + +InlineCommandComment *Parser::parseInlineCommand() { + assert(Tok.is(tok::command)); + + const Token CommandTok = Tok; + consumeToken(); + + TextTokenRetokenizer Retokenizer(Allocator, *this); + + Token ArgTok; + bool ArgTokValid = Retokenizer.lexWord(ArgTok); + + InlineCommandComment *IC; + if (ArgTokValid) { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName(), + ArgTok.getLocation(), + ArgTok.getEndLocation(), + ArgTok.getText()); + } else { + IC = S.actOnInlineCommand(CommandTok.getLocation(), + CommandTok.getEndLocation(), + CommandTok.getCommandName()); + } + + Retokenizer.putBackLeftoverTokens(); + + return IC; +} + +HTMLStartTagComment *Parser::parseHTMLStartTag() { + assert(Tok.is(tok::html_start_tag)); + HTMLStartTagComment *HST = + S.actOnHTMLStartTagStart(Tok.getLocation(), + Tok.getHTMLTagStartName()); + consumeToken(); + + SmallVector Attrs; + while (true) { + switch (Tok.getKind()) { + case tok::html_ident: { + Token Ident = Tok; + consumeToken(); + if (Tok.isNot(tok::html_equals)) { + Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + continue; + } + Token Equals = Tok; + consumeToken(); + if (Tok.isNot(tok::html_quoted_string)) { + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_quoted_string) + << SourceRange(Equals.getLocation()); + Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), + Ident.getHTMLIdent())); + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + continue; + } + Attrs.push_back(HTMLStartTagComment::Attribute( + Ident.getLocation(), + Ident.getHTMLIdent(), + Equals.getLocation(), + SourceRange(Tok.getLocation(), + Tok.getEndLocation()), + Tok.getHTMLQuotedString())); + consumeToken(); + continue; + } + + case tok::html_greater: + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation(), + /* IsSelfClosing = */ false); + consumeToken(); + return HST; + + case tok::html_slash_greater: + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + Tok.getLocation(), + /* IsSelfClosing = */ true); + consumeToken(); + return HST; + + case tok::html_equals: + case tok::html_quoted_string: + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater); + while (Tok.is(tok::html_equals) || + Tok.is(tok::html_quoted_string)) + consumeToken(); + if (Tok.is(tok::html_ident) || + Tok.is(tok::html_greater) || + Tok.is(tok::html_slash_greater)) + continue; + + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation(), + /* IsSelfClosing = */ false); + return HST; + + default: + // Not a token from an HTML start tag. Thus HTML tag prematurely ended. + S.actOnHTMLStartTagFinish(HST, + S.copyArray(llvm::makeArrayRef(Attrs)), + SourceLocation(), + /* IsSelfClosing = */ false); + bool StartLineInvalid; + const unsigned StartLine = SourceMgr.getPresumedLineNumber( + HST->getLocation(), + &StartLineInvalid); + bool EndLineInvalid; + const unsigned EndLine = SourceMgr.getPresumedLineNumber( + Tok.getLocation(), + &EndLineInvalid); + if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater) + << HST->getSourceRange(); + else { + Diag(Tok.getLocation(), + diag::warn_doc_html_start_tag_expected_ident_or_greater); + Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) + << HST->getSourceRange(); + } + return HST; + } + } +} + +HTMLEndTagComment *Parser::parseHTMLEndTag() { + assert(Tok.is(tok::html_end_tag)); + Token TokEndTag = Tok; + consumeToken(); + SourceLocation Loc; + if (Tok.is(tok::html_greater)) { + Loc = Tok.getLocation(); + consumeToken(); + } + + return S.actOnHTMLEndTag(TokEndTag.getLocation(), + Loc, + TokEndTag.getHTMLTagEndName()); +} + +BlockContentComment *Parser::parseParagraphOrBlockCommand() { + SmallVector Content; + + while (true) { + switch (Tok.getKind()) { + case tok::verbatim_block_begin: + case tok::verbatim_line_name: + case tok::eof: + assert(Content.size() != 0); + break; // Block content or EOF ahead, finish this parapgaph. + + case tok::command: + if (Traits.isBlockCommand(Tok.getCommandName())) { + if (Content.size() == 0) + return parseBlockCommand(); + break; // Block command ahead, finish this parapgaph. + } + if (Traits.isInlineCommand(Tok.getCommandName())) { + Content.push_back(parseInlineCommand()); + continue; + } + + // Not a block command, not an inline command ==> an unknown command. + Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getCommandName())); + consumeToken(); + continue; + + case tok::newline: { + consumeToken(); + if (Tok.is(tok::newline) || Tok.is(tok::eof)) { + consumeToken(); + break; // Two newlines -- end of paragraph. + } + if (Content.size() > 0) + Content.back()->addTrailingNewline(); + continue; + } + + // Don't deal with HTML tag soup now. + case tok::html_start_tag: + Content.push_back(parseHTMLStartTag()); + continue; + + case tok::html_end_tag: + Content.push_back(parseHTMLEndTag()); + continue; + + case tok::text: + Content.push_back(S.actOnText(Tok.getLocation(), + Tok.getEndLocation(), + Tok.getText())); + consumeToken(); + continue; + + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + case tok::html_slash_greater: + llvm_unreachable("should not see this token"); + } + break; + } + + return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content))); +} + +VerbatimBlockComment *Parser::parseVerbatimBlock() { + assert(Tok.is(tok::verbatim_block_begin)); + + VerbatimBlockComment *VB = + S.actOnVerbatimBlockStart(Tok.getLocation(), + Tok.getVerbatimBlockName()); + consumeToken(); + + // Don't create an empty line if verbatim opening command is followed + // by a newline. + if (Tok.is(tok::newline)) + consumeToken(); + + SmallVector Lines; + while (Tok.is(tok::verbatim_block_line) || + Tok.is(tok::newline)) { + VerbatimBlockLineComment *Line; + if (Tok.is(tok::verbatim_block_line)) { + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), + Tok.getVerbatimBlockText()); + consumeToken(); + if (Tok.is(tok::newline)) { + consumeToken(); + } + } else { + // Empty line, just a tok::newline. + Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); + consumeToken(); + } + Lines.push_back(Line); + } + + if (Tok.is(tok::verbatim_block_end)) { + S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), + Tok.getVerbatimBlockName(), + S.copyArray(llvm::makeArrayRef(Lines))); + consumeToken(); + } else { + // Unterminated \\verbatim block + S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", + S.copyArray(llvm::makeArrayRef(Lines))); + } + + return VB; +} + +VerbatimLineComment *Parser::parseVerbatimLine() { + assert(Tok.is(tok::verbatim_line_name)); + + Token NameTok = Tok; + consumeToken(); + + SourceLocation TextBegin; + StringRef Text; + // Next token might not be a tok::verbatim_line_text if verbatim line + // starting command comes just before a newline or comment end. + if (Tok.is(tok::verbatim_line_text)) { + TextBegin = Tok.getLocation(); + Text = Tok.getVerbatimLineText(); + } else { + TextBegin = NameTok.getEndLocation(); + Text = ""; + } + + VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), + NameTok.getVerbatimLineName(), + TextBegin, + Text); + consumeToken(); + return VL; +} + +BlockContentComment *Parser::parseBlockContent() { + switch (Tok.getKind()) { + case tok::text: + case tok::command: + case tok::html_start_tag: + case tok::html_end_tag: + return parseParagraphOrBlockCommand(); + + case tok::verbatim_block_begin: + return parseVerbatimBlock(); + + case tok::verbatim_line_name: + return parseVerbatimLine(); + + case tok::eof: + case tok::newline: + case tok::verbatim_block_line: + case tok::verbatim_block_end: + case tok::verbatim_line_text: + case tok::html_ident: + case tok::html_equals: + case tok::html_quoted_string: + case tok::html_greater: + case tok::html_slash_greater: + llvm_unreachable("should not see this token"); + } + llvm_unreachable("bogus token kind"); +} + +FullComment *Parser::parseFullComment() { + // Skip newlines at the beginning of the comment. + while (Tok.is(tok::newline)) + consumeToken(); + + SmallVector Blocks; + while (Tok.isNot(tok::eof)) { + Blocks.push_back(parseBlockContent()); + + // Skip extra newlines after paragraph end. + while (Tok.is(tok::newline)) + consumeToken(); + } + return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks))); +} + +} // end namespace comments +} // end namespace clang diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp new file mode 100644 index 0000000..c39ee57 --- /dev/null +++ b/lib/AST/CommentSema.cpp @@ -0,0 +1,739 @@ +//===--- CommentSema.cpp - Doxygen comment semantic analysis --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentSema.h" +#include "clang/AST/CommentDiagnostic.h" +#include "clang/AST/CommentCommandTraits.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { +namespace comments { + +Sema::Sema(llvm::BumpPtrAllocator &Allocator, const SourceManager &SourceMgr, + DiagnosticsEngine &Diags, const CommandTraits &Traits) : + Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), Traits(Traits), + ThisDeclInfo(NULL), BriefCommand(NULL), ReturnsCommand(NULL) { +} + +void Sema::setDecl(const Decl *D) { + if (!D) + return; + + ThisDeclInfo = new (Allocator) DeclInfo; + ThisDeclInfo->ThisDecl = D; + ThisDeclInfo->IsFilled = false; +} + +ParagraphComment *Sema::actOnParagraphComment( + ArrayRef Content) { + return new (Allocator) ParagraphComment(Content); +} + +BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + return new (Allocator) BlockCommandComment(LocBegin, LocEnd, Name); +} + +void Sema::actOnBlockCommandArgs(BlockCommandComment *Command, + ArrayRef Args) { + Command->setArgs(Args); +} + +void Sema::actOnBlockCommandFinish(BlockCommandComment *Command, + ParagraphComment *Paragraph) { + Command->setParagraph(Paragraph); + checkBlockCommandEmptyParagraph(Command); + checkBlockCommandDuplicate(Command); + checkReturnsCommand(Command); +} + +ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + ParamCommandComment *Command = + new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name); + + if (!isFunctionDecl()) + Diag(Command->getLocation(), + diag::warn_doc_param_not_attached_to_a_function_decl) + << Command->getCommandNameRange(); + + return Command; +} + +void Sema::actOnParamCommandDirectionArg(ParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg) { + ParamCommandComment::PassDirection Direction; + std::string ArgLower = Arg.lower(); + // TODO: optimize: lower Name first (need an API in SmallString for that), + // after that StringSwitch. + if (ArgLower == "[in]") + Direction = ParamCommandComment::In; + else if (ArgLower == "[out]") + Direction = ParamCommandComment::Out; + else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") + Direction = ParamCommandComment::InOut; + else { + // Remove spaces. + std::string::iterator O = ArgLower.begin(); + for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end(); + I != E; ++I) { + const char C = *I; + if (C != ' ' && C != '\n' && C != '\r' && + C != '\t' && C != '\v' && C != '\f') + *O++ = C; + } + ArgLower.resize(O - ArgLower.begin()); + + bool RemovingWhitespaceHelped = false; + if (ArgLower == "[in]") { + Direction = ParamCommandComment::In; + RemovingWhitespaceHelped = true; + } else if (ArgLower == "[out]") { + Direction = ParamCommandComment::Out; + RemovingWhitespaceHelped = true; + } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") { + Direction = ParamCommandComment::InOut; + RemovingWhitespaceHelped = true; + } else { + Direction = ParamCommandComment::In; + RemovingWhitespaceHelped = false; + } + + SourceRange ArgRange(ArgLocBegin, ArgLocEnd); + if (RemovingWhitespaceHelped) + Diag(ArgLocBegin, diag::warn_doc_param_spaces_in_direction) + << ArgRange + << FixItHint::CreateReplacement( + ArgRange, + ParamCommandComment::getDirectionAsString(Direction)); + else + Diag(ArgLocBegin, diag::warn_doc_param_invalid_direction) + << ArgRange; + } + Command->setDirection(Direction, /* Explicit = */ true); +} + +void Sema::actOnParamCommandParamNameArg(ParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg) { + // Parser will not feed us more arguments than needed. + assert(Command->getNumArgs() == 0); + + if (!Command->isDirectionExplicit()) { + // User didn't provide a direction argument. + Command->setDirection(ParamCommandComment::In, /* Explicit = */ false); + } + typedef BlockCommandComment::Argument Argument; + Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin, + ArgLocEnd), + Arg); + Command->setArgs(llvm::makeArrayRef(A, 1)); + + if (!isFunctionDecl()) { + // We already warned that this \\param is not attached to a function decl. + return; + } + + ArrayRef ParamVars = getParamVars(); + + // Check that referenced parameter name is in the function decl. + const unsigned ResolvedParamIndex = resolveParmVarReference(Arg, ParamVars); + if (ResolvedParamIndex != ParamCommandComment::InvalidParamIndex) { + Command->setParamIndex(ResolvedParamIndex); + if (ParamVarDocs[ResolvedParamIndex]) { + SourceRange ArgRange(ArgLocBegin, ArgLocEnd); + Diag(ArgLocBegin, diag::warn_doc_param_duplicate) + << Arg << ArgRange; + ParamCommandComment *PrevCommand = ParamVarDocs[ResolvedParamIndex]; + Diag(PrevCommand->getLocation(), diag::note_doc_param_previous) + << PrevCommand->getParamNameRange(); + } + ParamVarDocs[ResolvedParamIndex] = Command; + return; + } + + SourceRange ArgRange(ArgLocBegin, ArgLocEnd); + Diag(ArgLocBegin, diag::warn_doc_param_not_found) + << Arg << ArgRange; + + // No parameters -- can't suggest a correction. + if (ParamVars.size() == 0) + return; + + unsigned CorrectedParamIndex = ParamCommandComment::InvalidParamIndex; + if (ParamVars.size() == 1) { + // If function has only one parameter then only that parameter + // can be documented. + CorrectedParamIndex = 0; + } else { + // Do typo correction. + CorrectedParamIndex = correctTypoInParmVarReference(Arg, ParamVars); + } + if (CorrectedParamIndex != ParamCommandComment::InvalidParamIndex) { + const ParmVarDecl *CorrectedPVD = ParamVars[CorrectedParamIndex]; + if (const IdentifierInfo *CorrectedII = CorrectedPVD->getIdentifier()) + Diag(ArgLocBegin, diag::note_doc_param_name_suggestion) + << CorrectedII->getName() + << FixItHint::CreateReplacement(ArgRange, CorrectedII->getName()); + } + + return; +} + +void Sema::actOnParamCommandFinish(ParamCommandComment *Command, + ParagraphComment *Paragraph) { + Command->setParagraph(Paragraph); + checkBlockCommandEmptyParagraph(Command); +} + +TParamCommandComment *Sema::actOnTParamCommandStart(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + TParamCommandComment *Command = + new (Allocator) TParamCommandComment(LocBegin, LocEnd, Name); + + if (!isTemplateOrSpecialization()) + Diag(Command->getLocation(), + diag::warn_doc_tparam_not_attached_to_a_template_decl) + << Command->getCommandNameRange(); + + return Command; +} + +void Sema::actOnTParamCommandParamNameArg(TParamCommandComment *Command, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg) { + // Parser will not feed us more arguments than needed. + assert(Command->getNumArgs() == 0); + + typedef BlockCommandComment::Argument Argument; + Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin, + ArgLocEnd), + Arg); + Command->setArgs(llvm::makeArrayRef(A, 1)); + + if (!isTemplateOrSpecialization()) { + // We already warned that this \\tparam is not attached to a template decl. + return; + } + + const TemplateParameterList *TemplateParameters = + ThisDeclInfo->TemplateParameters; + SmallVector Position; + if (resolveTParamReference(Arg, TemplateParameters, &Position)) { + Command->setPosition(copyArray(llvm::makeArrayRef(Position))); + llvm::StringMap::iterator PrevCommandIt = + TemplateParameterDocs.find(Arg); + if (PrevCommandIt != TemplateParameterDocs.end()) { + SourceRange ArgRange(ArgLocBegin, ArgLocEnd); + Diag(ArgLocBegin, diag::warn_doc_tparam_duplicate) + << Arg << ArgRange; + TParamCommandComment *PrevCommand = PrevCommandIt->second; + Diag(PrevCommand->getLocation(), diag::note_doc_tparam_previous) + << PrevCommand->getParamNameRange(); + } + TemplateParameterDocs[Arg] = Command; + return; + } + + SourceRange ArgRange(ArgLocBegin, ArgLocEnd); + Diag(ArgLocBegin, diag::warn_doc_tparam_not_found) + << Arg << ArgRange; + + if (!TemplateParameters || TemplateParameters->size() == 0) + return; + + StringRef CorrectedName; + if (TemplateParameters->size() == 1) { + const NamedDecl *Param = TemplateParameters->getParam(0); + const IdentifierInfo *II = Param->getIdentifier(); + if (II) + CorrectedName = II->getName(); + } else { + CorrectedName = correctTypoInTParamReference(Arg, TemplateParameters); + } + + if (!CorrectedName.empty()) { + Diag(ArgLocBegin, diag::note_doc_tparam_name_suggestion) + << CorrectedName + << FixItHint::CreateReplacement(ArgRange, CorrectedName); + } + + return; +} + +void Sema::actOnTParamCommandFinish(TParamCommandComment *Command, + ParagraphComment *Paragraph) { + Command->setParagraph(Paragraph); + checkBlockCommandEmptyParagraph(Command); +} + +InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName) { + ArrayRef Args; + return new (Allocator) InlineCommandComment( + CommandLocBegin, + CommandLocEnd, + CommandName, + getInlineCommandRenderKind(CommandName), + Args); +} + +InlineCommandComment *Sema::actOnInlineCommand(SourceLocation CommandLocBegin, + SourceLocation CommandLocEnd, + StringRef CommandName, + SourceLocation ArgLocBegin, + SourceLocation ArgLocEnd, + StringRef Arg) { + typedef InlineCommandComment::Argument Argument; + Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin, + ArgLocEnd), + Arg); + + return new (Allocator) InlineCommandComment( + CommandLocBegin, + CommandLocEnd, + CommandName, + getInlineCommandRenderKind(CommandName), + llvm::makeArrayRef(A, 1)); +} + +InlineContentComment *Sema::actOnUnknownCommand(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Name) { + ArrayRef Args; + return new (Allocator) InlineCommandComment( + LocBegin, LocEnd, Name, + InlineCommandComment::RenderNormal, + Args); +} + +TextComment *Sema::actOnText(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef Text) { + return new (Allocator) TextComment(LocBegin, LocEnd, Text); +} + +VerbatimBlockComment *Sema::actOnVerbatimBlockStart(SourceLocation Loc, + StringRef Name) { + return new (Allocator) VerbatimBlockComment( + Loc, + Loc.getLocWithOffset(1 + Name.size()), + Name); +} + +VerbatimBlockLineComment *Sema::actOnVerbatimBlockLine(SourceLocation Loc, + StringRef Text) { + return new (Allocator) VerbatimBlockLineComment(Loc, Text); +} + +void Sema::actOnVerbatimBlockFinish( + VerbatimBlockComment *Block, + SourceLocation CloseNameLocBegin, + StringRef CloseName, + ArrayRef Lines) { + Block->setCloseName(CloseName, CloseNameLocBegin); + Block->setLines(Lines); +} + +VerbatimLineComment *Sema::actOnVerbatimLine(SourceLocation LocBegin, + StringRef Name, + SourceLocation TextBegin, + StringRef Text) { + return new (Allocator) VerbatimLineComment( + LocBegin, + TextBegin.getLocWithOffset(Text.size()), + Name, + TextBegin, + Text); +} + +HTMLStartTagComment *Sema::actOnHTMLStartTagStart(SourceLocation LocBegin, + StringRef TagName) { + return new (Allocator) HTMLStartTagComment(LocBegin, TagName); +} + +void Sema::actOnHTMLStartTagFinish( + HTMLStartTagComment *Tag, + ArrayRef Attrs, + SourceLocation GreaterLoc, + bool IsSelfClosing) { + Tag->setAttrs(Attrs); + Tag->setGreaterLoc(GreaterLoc); + if (IsSelfClosing) + Tag->setSelfClosing(); + else if (!isHTMLEndTagForbidden(Tag->getTagName())) + HTMLOpenTags.push_back(Tag); +} + +HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin, + SourceLocation LocEnd, + StringRef TagName) { + HTMLEndTagComment *HET = + new (Allocator) HTMLEndTagComment(LocBegin, LocEnd, TagName); + if (isHTMLEndTagForbidden(TagName)) { + Diag(HET->getLocation(), diag::warn_doc_html_end_forbidden) + << TagName << HET->getSourceRange(); + return HET; + } + + bool FoundOpen = false; + for (SmallVectorImpl::const_reverse_iterator + I = HTMLOpenTags.rbegin(), E = HTMLOpenTags.rend(); + I != E; ++I) { + if ((*I)->getTagName() == TagName) { + FoundOpen = true; + break; + } + } + if (!FoundOpen) { + Diag(HET->getLocation(), diag::warn_doc_html_end_unbalanced) + << HET->getSourceRange(); + return HET; + } + + while (!HTMLOpenTags.empty()) { + const HTMLStartTagComment *HST = HTMLOpenTags.back(); + HTMLOpenTags.pop_back(); + StringRef LastNotClosedTagName = HST->getTagName(); + if (LastNotClosedTagName == TagName) + break; + + if (isHTMLEndTagOptional(LastNotClosedTagName)) + continue; + + bool OpenLineInvalid; + const unsigned OpenLine = SourceMgr.getPresumedLineNumber( + HST->getLocation(), + &OpenLineInvalid); + bool CloseLineInvalid; + const unsigned CloseLine = SourceMgr.getPresumedLineNumber( + HET->getLocation(), + &CloseLineInvalid); + + if (OpenLineInvalid || CloseLineInvalid || OpenLine == CloseLine) + Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch) + << HST->getTagName() << HET->getTagName() + << HST->getSourceRange() << HET->getSourceRange(); + else { + Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch) + << HST->getTagName() << HET->getTagName() + << HST->getSourceRange(); + Diag(HET->getLocation(), diag::note_doc_html_end_tag) + << HET->getSourceRange(); + } + } + + return HET; +} + +FullComment *Sema::actOnFullComment( + ArrayRef Blocks) { + return new (Allocator) FullComment(Blocks, ThisDeclInfo); +} + +void Sema::checkBlockCommandEmptyParagraph(BlockCommandComment *Command) { + ParagraphComment *Paragraph = Command->getParagraph(); + if (Paragraph->isWhitespace()) { + SourceLocation DiagLoc; + if (Command->getNumArgs() > 0) + DiagLoc = Command->getArgRange(Command->getNumArgs() - 1).getEnd(); + if (!DiagLoc.isValid()) + DiagLoc = Command->getCommandNameRange().getEnd(); + Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph) + << Command->getCommandName() + << Command->getSourceRange(); + } +} + +void Sema::checkReturnsCommand(const BlockCommandComment *Command) { + if (!Traits.isReturnsCommand(Command->getCommandName())) + return; + if (isFunctionDecl()) { + if (ThisDeclInfo->ResultType->isVoidType()) { + unsigned DiagKind; + switch (ThisDeclInfo->ThisDecl->getKind()) { + default: + if (ThisDeclInfo->IsObjCMethod) + DiagKind = 3; + else + DiagKind = 0; + break; + case Decl::CXXConstructor: + DiagKind = 1; + break; + case Decl::CXXDestructor: + DiagKind = 2; + break; + } + Diag(Command->getLocation(), + diag::warn_doc_returns_attached_to_a_void_function) + << Command->getCommandName() + << DiagKind + << Command->getSourceRange(); + } + return; + } + Diag(Command->getLocation(), + diag::warn_doc_returns_not_attached_to_a_function_decl) + << Command->getCommandName() + << Command->getSourceRange(); +} + +void Sema::checkBlockCommandDuplicate(const BlockCommandComment *Command) { + StringRef Name = Command->getCommandName(); + const BlockCommandComment *PrevCommand = NULL; + if (Traits.isBriefCommand(Name)) { + if (!BriefCommand) { + BriefCommand = Command; + return; + } + PrevCommand = BriefCommand; + } else if (Traits.isReturnsCommand(Name)) { + if (!ReturnsCommand) { + ReturnsCommand = Command; + return; + } + PrevCommand = ReturnsCommand; + } else { + // We don't want to check this command for duplicates. + return; + } + Diag(Command->getLocation(), diag::warn_doc_block_command_duplicate) + << Name + << Command->getSourceRange(); + if (Name == PrevCommand->getCommandName()) + Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous) + << PrevCommand->getCommandName() + << Command->getSourceRange(); + else + Diag(PrevCommand->getLocation(), + diag::note_doc_block_command_previous_alias) + << PrevCommand->getCommandName() + << Name; +} + +bool Sema::isFunctionDecl() { + if (!ThisDeclInfo) + return false; + if (!ThisDeclInfo->IsFilled) + inspectThisDecl(); + return ThisDeclInfo->getKind() == DeclInfo::FunctionKind; +} + +bool Sema::isTemplateOrSpecialization() { + if (!ThisDeclInfo) + return false; + if (!ThisDeclInfo->IsFilled) + inspectThisDecl(); + return ThisDeclInfo->getTemplateKind() != DeclInfo::NotTemplate; +} + +ArrayRef Sema::getParamVars() { + if (!ThisDeclInfo->IsFilled) + inspectThisDecl(); + return ThisDeclInfo->ParamVars; +} + +void Sema::inspectThisDecl() { + ThisDeclInfo->fill(); + ParamVarDocs.resize(ThisDeclInfo->ParamVars.size(), NULL); +} + +unsigned Sema::resolveParmVarReference(StringRef Name, + ArrayRef ParamVars) { + for (unsigned i = 0, e = ParamVars.size(); i != e; ++i) { + const IdentifierInfo *II = ParamVars[i]->getIdentifier(); + if (II && II->getName() == Name) + return i; + } + return ParamCommandComment::InvalidParamIndex; +} + +namespace { +class SimpleTypoCorrector { + StringRef Typo; + const unsigned MaxEditDistance; + + const NamedDecl *BestDecl; + unsigned BestEditDistance; + unsigned BestIndex; + unsigned NextIndex; + +public: + SimpleTypoCorrector(StringRef Typo) : + Typo(Typo), MaxEditDistance((Typo.size() + 2) / 3), + BestDecl(NULL), BestEditDistance(MaxEditDistance + 1), + BestIndex(0), NextIndex(0) + { } + + void addDecl(const NamedDecl *ND); + + const NamedDecl *getBestDecl() const { + if (BestEditDistance > MaxEditDistance) + return NULL; + + return BestDecl; + } + + unsigned getBestDeclIndex() const { + assert(getBestDecl()); + return BestIndex; + } +}; + +void SimpleTypoCorrector::addDecl(const NamedDecl *ND) { + unsigned CurrIndex = NextIndex++; + + const IdentifierInfo *II = ND->getIdentifier(); + if (!II) + return; + + StringRef Name = II->getName(); + unsigned MinPossibleEditDistance = abs((int)Name.size() - (int)Typo.size()); + if (MinPossibleEditDistance > 0 && + Typo.size() / MinPossibleEditDistance < 3) + return; + + unsigned EditDistance = Typo.edit_distance(Name, true, MaxEditDistance); + if (EditDistance < BestEditDistance) { + BestEditDistance = EditDistance; + BestDecl = ND; + BestIndex = CurrIndex; + } +} +} // unnamed namespace + +unsigned Sema::correctTypoInParmVarReference( + StringRef Typo, + ArrayRef ParamVars) { + SimpleTypoCorrector Corrector(Typo); + for (unsigned i = 0, e = ParamVars.size(); i != e; ++i) + Corrector.addDecl(ParamVars[i]); + if (Corrector.getBestDecl()) + return Corrector.getBestDeclIndex(); + else + return ParamCommandComment::InvalidParamIndex;; +} + +namespace { +bool ResolveTParamReferenceHelper( + StringRef Name, + const TemplateParameterList *TemplateParameters, + SmallVectorImpl *Position) { + for (unsigned i = 0, e = TemplateParameters->size(); i != e; ++i) { + const NamedDecl *Param = TemplateParameters->getParam(i); + const IdentifierInfo *II = Param->getIdentifier(); + if (II && II->getName() == Name) { + Position->push_back(i); + return true; + } + + if (const TemplateTemplateParmDecl *TTP = + dyn_cast(Param)) { + Position->push_back(i); + if (ResolveTParamReferenceHelper(Name, TTP->getTemplateParameters(), + Position)) + return true; + Position->pop_back(); + } + } + return false; +} +} // unnamed namespace + +bool Sema::resolveTParamReference( + StringRef Name, + const TemplateParameterList *TemplateParameters, + SmallVectorImpl *Position) { + Position->clear(); + if (!TemplateParameters) + return false; + + return ResolveTParamReferenceHelper(Name, TemplateParameters, Position); +} + +namespace { +void CorrectTypoInTParamReferenceHelper( + const TemplateParameterList *TemplateParameters, + SimpleTypoCorrector &Corrector) { + for (unsigned i = 0, e = TemplateParameters->size(); i != e; ++i) { + const NamedDecl *Param = TemplateParameters->getParam(i); + Corrector.addDecl(Param); + + if (const TemplateTemplateParmDecl *TTP = + dyn_cast(Param)) + CorrectTypoInTParamReferenceHelper(TTP->getTemplateParameters(), + Corrector); + } +} +} // unnamed namespace + +StringRef Sema::correctTypoInTParamReference( + StringRef Typo, + const TemplateParameterList *TemplateParameters) { + SimpleTypoCorrector Corrector(Typo); + CorrectTypoInTParamReferenceHelper(TemplateParameters, Corrector); + if (const NamedDecl *ND = Corrector.getBestDecl()) { + const IdentifierInfo *II = ND->getIdentifier(); + assert(II && "SimpleTypoCorrector should not return this decl"); + return II->getName(); + } + return StringRef(); +} + +InlineCommandComment::RenderKind +Sema::getInlineCommandRenderKind(StringRef Name) const { + assert(Traits.isInlineCommand(Name)); + + return llvm::StringSwitch(Name) + .Case("b", InlineCommandComment::RenderBold) + .Cases("c", "p", InlineCommandComment::RenderMonospaced) + .Cases("a", "e", "em", InlineCommandComment::RenderEmphasized) + .Default(InlineCommandComment::RenderNormal); +} + +bool Sema::isHTMLEndTagOptional(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("p", true) + .Case("li", true) + .Case("dt", true) + .Case("dd", true) + .Case("tr", true) + .Case("th", true) + .Case("td", true) + .Case("thead", true) + .Case("tfoot", true) + .Case("tbody", true) + .Case("colgroup", true) + .Default(false); +} + +bool Sema::isHTMLEndTagForbidden(StringRef Name) { + return llvm::StringSwitch(Name) + .Case("br", true) + .Case("hr", true) + .Case("img", true) + .Case("col", true) + .Default(false); +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp index 53032bc..d5b0be3 100644 --- a/lib/AST/Decl.cpp +++ b/lib/AST/Decl.cpp @@ -66,32 +66,6 @@ static llvm::Optional getVisibilityOf(const Decl *D) { typedef NamedDecl::LinkageInfo LinkageInfo; -namespace { -/// Flags controlling the computation of linkage and visibility. -struct LVFlags { - const bool ConsiderGlobalVisibility; - const bool ConsiderVisibilityAttributes; - const bool ConsiderTemplateParameterTypes; - - LVFlags() : ConsiderGlobalVisibility(true), - ConsiderVisibilityAttributes(true), - ConsiderTemplateParameterTypes(true) { - } - - LVFlags(bool Global, bool Attributes, bool Parameters) : - ConsiderGlobalVisibility(Global), - ConsiderVisibilityAttributes(Attributes), - ConsiderTemplateParameterTypes(Parameters) { - } - - /// \brief Returns a set of flags that is only useful for computing the - /// linkage, not the visibility, of a declaration. - static LVFlags CreateOnlyDeclLinkage() { - return LVFlags(false, false, false); - } -}; -} // end anonymous namespace - static LinkageInfo getLVForType(QualType T) { std::pair P = T->getLinkageAndVisibility(); return LinkageInfo(P.first, P.second, T->isVisibilityExplicit()); @@ -131,13 +105,13 @@ getLVForTemplateParameterList(const TemplateParameterList *Params) { } /// getLVForDecl - Get the linkage and visibility for the given declaration. -static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags F); +static LinkageInfo getLVForDecl(const NamedDecl *D, bool OnlyTemplate); /// \brief Get the most restrictive linkage for the types and /// declarations in the given template argument list. static LinkageInfo getLVForTemplateArgumentList(const TemplateArgument *Args, unsigned NumArgs, - LVFlags &F) { + bool OnlyTemplate) { LinkageInfo LV(ExternalLinkage, DefaultVisibility, false); for (unsigned I = 0; I != NumArgs; ++I) { @@ -148,7 +122,7 @@ static LinkageInfo getLVForTemplateArgumentList(const TemplateArgument *Args, break; case TemplateArgument::Type: - LV.merge(getLVForType(Args[I].getAsType())); + LV.mergeWithMin(getLVForType(Args[I].getAsType())); break; case TemplateArgument::Declaration: @@ -156,7 +130,7 @@ static LinkageInfo getLVForTemplateArgumentList(const TemplateArgument *Args, // arguments, valid only in C++0x. if (Decl *D = Args[I].getAsDecl()) { if (NamedDecl *ND = dyn_cast(D)) - LV = merge(LV, getLVForDecl(ND, F)); + LV.mergeWithMin(getLVForDecl(ND, OnlyTemplate)); } break; @@ -164,13 +138,13 @@ static LinkageInfo getLVForTemplateArgumentList(const TemplateArgument *Args, case TemplateArgument::TemplateExpansion: if (TemplateDecl *Template = Args[I].getAsTemplateOrTemplatePattern().getAsTemplateDecl()) - LV.merge(getLVForDecl(Template, F)); + LV.mergeWithMin(getLVForDecl(Template, OnlyTemplate)); break; case TemplateArgument::Pack: LV.mergeWithMin(getLVForTemplateArgumentList(Args[I].pack_begin(), Args[I].pack_size(), - F)); + OnlyTemplate)); break; } } @@ -180,21 +154,50 @@ static LinkageInfo getLVForTemplateArgumentList(const TemplateArgument *Args, static LinkageInfo getLVForTemplateArgumentList(const TemplateArgumentList &TArgs, - LVFlags &F) { - return getLVForTemplateArgumentList(TArgs.data(), TArgs.size(), F); + bool OnlyTemplate) { + return getLVForTemplateArgumentList(TArgs.data(), TArgs.size(), OnlyTemplate); } -static bool shouldConsiderTemplateLV(const FunctionDecl *fn, +static bool shouldConsiderTemplateVis(const FunctionDecl *fn, const FunctionTemplateSpecializationInfo *spec) { - return !(spec->isExplicitSpecialization() && - fn->hasAttr()); + return !fn->hasAttr() || spec->isExplicitSpecialization(); } -static bool shouldConsiderTemplateLV(const ClassTemplateSpecializationDecl *d) { - return !(d->isExplicitSpecialization() && d->hasAttr()); +static bool +shouldConsiderTemplateVis(const ClassTemplateSpecializationDecl *d) { + return !d->hasAttr() || d->isExplicitSpecialization(); } -static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { +static bool useInlineVisibilityHidden(const NamedDecl *D) { + // FIXME: we should warn if -fvisibility-inlines-hidden is used with c. + const LangOptions &Opts = D->getASTContext().getLangOpts(); + if (!Opts.CPlusPlus || !Opts.InlineVisibilityHidden) + return false; + + const FunctionDecl *FD = dyn_cast(D); + if (!FD) + return false; + + TemplateSpecializationKind TSK = TSK_Undeclared; + if (FunctionTemplateSpecializationInfo *spec + = FD->getTemplateSpecializationInfo()) { + TSK = spec->getTemplateSpecializationKind(); + } else if (MemberSpecializationInfo *MSI = + FD->getMemberSpecializationInfo()) { + TSK = MSI->getTemplateSpecializationKind(); + } + + const FunctionDecl *Def = 0; + // InlineVisibilityHidden only applies to definitions, and + // isInlined() only gives meaningful answers on definitions + // anyway. + return TSK != TSK_ExplicitInstantiationDeclaration && + TSK != TSK_ExplicitInstantiationDefinition && + FD->hasBody(Def) && Def->isInlined(); +} + +static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, + bool OnlyTemplate) { assert(D->getDeclContext()->getRedeclContext()->isFileContext() && "Not a name having namespace scope"); ASTContext &Context = D->getASTContext(); @@ -271,11 +274,10 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // scope and no storage-class specifier, its linkage is // external. LinkageInfo LV; - LV.mergeVisibility(Context.getLangOpts().getVisibilityMode()); - if (F.ConsiderVisibilityAttributes) { + if (!OnlyTemplate) { if (llvm::Optional Vis = D->getExplicitVisibility()) { - LV.setVisibility(*Vis, true); + LV.mergeVisibility(*Vis, true); } else { // If we're declared in a namespace with a visibility attribute, // use that namespace's visibility, but don't call it explicit. @@ -285,13 +287,21 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { const NamespaceDecl *ND = dyn_cast(DC); if (!ND) continue; if (llvm::Optional Vis = ND->getExplicitVisibility()) { - LV.setVisibility(*Vis, true); + LV.mergeVisibility(*Vis, true); break; } } } } + if (!OnlyTemplate) { + LV.mergeVisibility(Context.getLangOpts().getVisibilityMode()); + // If we're paying attention to global visibility, apply + // -finline-visibility-hidden if this is an inline method. + if (!LV.visibilityExplicit() && useInlineVisibilityHidden(D)) + LV.mergeVisibility(HiddenVisibility, true); + } + // C++ [basic.link]p4: // A name having namespace scope has external linkage if it is the @@ -325,11 +335,11 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { LinkageInfo TypeLV = getLVForType(Var->getType()); if (TypeLV.linkage() != ExternalLinkage) return LinkageInfo::uniqueExternal(); - LV.mergeVisibilityWithMin(TypeLV); + LV.mergeVisibility(TypeLV); } if (Var->getStorageClass() == SC_PrivateExtern) - LV.setVisibility(HiddenVisibility, true); + LV.mergeVisibility(HiddenVisibility, true); if (!Context.getLangOpts().CPlusPlus && (Var->getStorageClass() == SC_Extern || @@ -345,7 +355,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // is visible, or if the prior declaration specifies no // linkage, then the identifier has external linkage. if (const VarDecl *PrevVar = Var->getPreviousDecl()) { - LinkageInfo PrevLV = getLVForDecl(PrevVar, F); + LinkageInfo PrevLV = getLVForDecl(PrevVar, OnlyTemplate); if (PrevLV.linkage()) LV.setLinkage(PrevLV.linkage()); LV.mergeVisibility(PrevLV); } @@ -359,7 +369,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // just too painful to make work. if (Function->getStorageClass() == SC_PrivateExtern) - LV.setVisibility(HiddenVisibility, true); + LV.mergeVisibility(HiddenVisibility, true); // C99 6.2.2p5: // If the declaration of an identifier for a function has no @@ -380,7 +390,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // is visible, or if the prior declaration specifies no // linkage, then the identifier has external linkage. if (const FunctionDecl *PrevFunc = Function->getPreviousDecl()) { - LinkageInfo PrevLV = getLVForDecl(PrevFunc, F); + LinkageInfo PrevLV = getLVForDecl(PrevFunc, OnlyTemplate); if (PrevLV.linkage()) LV.setLinkage(PrevLV.linkage()); LV.mergeVisibility(PrevLV); } @@ -399,11 +409,16 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // this is an explicit specialization with a visibility attribute. if (FunctionTemplateSpecializationInfo *specInfo = Function->getTemplateSpecializationInfo()) { - if (shouldConsiderTemplateLV(Function, specInfo)) { - LV.merge(getLVForDecl(specInfo->getTemplate(), - LVFlags::CreateOnlyDeclLinkage())); - const TemplateArgumentList &templateArgs = *specInfo->TemplateArguments; - LV.mergeWithMin(getLVForTemplateArgumentList(templateArgs, F)); + LinkageInfo TempLV = getLVForDecl(specInfo->getTemplate(), true); + const TemplateArgumentList &templateArgs = *specInfo->TemplateArguments; + LinkageInfo ArgsLV = getLVForTemplateArgumentList(templateArgs, + OnlyTemplate); + if (shouldConsiderTemplateVis(Function, specInfo)) { + LV.mergeWithMin(TempLV); + LV.mergeWithMin(ArgsLV); + } else { + LV.mergeLinkage(TempLV); + LV.mergeLinkage(ArgsLV); } } @@ -422,20 +437,26 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // linkage of the template and template arguments. if (const ClassTemplateSpecializationDecl *spec = dyn_cast(Tag)) { - if (shouldConsiderTemplateLV(spec)) { - // From the template. - LV.merge(getLVForDecl(spec->getSpecializedTemplate(), - LVFlags::CreateOnlyDeclLinkage())); - - // The arguments at which the template was instantiated. - const TemplateArgumentList &TemplateArgs = spec->getTemplateArgs(); - LV.mergeWithMin(getLVForTemplateArgumentList(TemplateArgs, F)); + // From the template. + LinkageInfo TempLV = getLVForDecl(spec->getSpecializedTemplate(), true); + + // The arguments at which the template was instantiated. + const TemplateArgumentList &TemplateArgs = spec->getTemplateArgs(); + LinkageInfo ArgsLV = getLVForTemplateArgumentList(TemplateArgs, + OnlyTemplate); + if (shouldConsiderTemplateVis(spec)) { + LV.mergeWithMin(TempLV); + LV.mergeWithMin(ArgsLV); + } else { + LV.mergeLinkage(TempLV); + LV.mergeLinkage(ArgsLV); } } // - an enumerator belonging to an enumeration with external linkage; } else if (isa(D)) { - LinkageInfo EnumLV = getLVForDecl(cast(D->getDeclContext()), F); + LinkageInfo EnumLV = getLVForDecl(cast(D->getDeclContext()), + OnlyTemplate); if (!isExternalLinkage(EnumLV.linkage())) return LinkageInfo::none(); LV.merge(EnumLV); @@ -443,9 +464,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { // - a template, unless it is a function template that has // internal linkage (Clause 14); } else if (const TemplateDecl *temp = dyn_cast(D)) { - if (F.ConsiderTemplateParameterTypes) - LV.merge(getLVForTemplateParameterList(temp->getTemplateParameters())); - + LV.merge(getLVForTemplateParameterList(temp->getTemplateParameters())); // - a namespace (7.3), unless it is declared within an unnamed // namespace. } else if (isa(D) && !D->isInAnonymousNamespace()) { @@ -469,7 +488,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D, LVFlags F) { return LV; } -static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { +static LinkageInfo getLVForClassMember(const NamedDecl *D, bool OnlyTemplate) { // Only certain class members have linkage. Note that fields don't // really have linkage, but it's convenient to say they do for the // purposes of calculating linkage of pointer-to-data-member @@ -482,53 +501,32 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { return LinkageInfo::none(); LinkageInfo LV; - LV.mergeVisibility(D->getASTContext().getLangOpts().getVisibilityMode()); - bool DHasExplicitVisibility = false; // If we have an explicit visibility attribute, merge that in. - if (F.ConsiderVisibilityAttributes) { - if (llvm::Optional Vis = D->getExplicitVisibility()) { + if (!OnlyTemplate) { + if (llvm::Optional Vis = D->getExplicitVisibility()) LV.mergeVisibility(*Vis, true); - - DHasExplicitVisibility = true; - } - } - // Ignore both global visibility and attributes when computing our - // parent's visibility if we already have an explicit one. - LVFlags ClassF = DHasExplicitVisibility ? - LVFlags::CreateOnlyDeclLinkage() : F; - - // If we're paying attention to global visibility, apply - // -finline-visibility-hidden if this is an inline method. - // - // Note that we do this before merging information about - // the class visibility. - if (const CXXMethodDecl *MD = dyn_cast(D)) { - TemplateSpecializationKind TSK = TSK_Undeclared; - if (FunctionTemplateSpecializationInfo *spec - = MD->getTemplateSpecializationInfo()) { - TSK = spec->getTemplateSpecializationKind(); - } else if (MemberSpecializationInfo *MSI = - MD->getMemberSpecializationInfo()) { - TSK = MSI->getTemplateSpecializationKind(); - } - - const FunctionDecl *Def = 0; - // InlineVisibilityHidden only applies to definitions, and - // isInlined() only gives meaningful answers on definitions - // anyway. - if (TSK != TSK_ExplicitInstantiationDeclaration && - TSK != TSK_ExplicitInstantiationDefinition && - F.ConsiderGlobalVisibility && - !LV.visibilityExplicit() && - MD->getASTContext().getLangOpts().InlineVisibilityHidden && - MD->hasBody(Def) && Def->isInlined()) + // If we're paying attention to global visibility, apply + // -finline-visibility-hidden if this is an inline method. + // + // Note that we do this before merging information about + // the class visibility. + if (!LV.visibilityExplicit() && useInlineVisibilityHidden(D)) LV.mergeVisibility(HiddenVisibility, true); } - // Class members only have linkage if their class has external - // linkage. - LV.merge(getLVForDecl(cast(D->getDeclContext()), ClassF)); + // If this class member has an explicit visibility attribute, the only + // thing that can change its visibility is the template arguments, so + // only look for them when processing the class. + bool ClassOnlyTemplate = LV.visibilityExplicit() ? true : OnlyTemplate; + + // If this member has an visibility attribute, ClassF will exclude + // attributes on the class or command line options, keeping only information + // about the template instantiation. If the member has no visibility + // attributes, mergeWithMin behaves like merge, so in both cases mergeWithMin + // produces the desired result. + LV.mergeWithMin(getLVForDecl(cast(D->getDeclContext()), + ClassOnlyTemplate)); if (!isExternalLinkage(LV.linkage())) return LinkageInfo::none(); @@ -536,6 +534,9 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { if (LV.linkage() == UniqueExternalLinkage) return LinkageInfo::uniqueExternal(); + if (!OnlyTemplate) + LV.mergeVisibility(D->getASTContext().getLangOpts().getVisibilityMode()); + if (const CXXMethodDecl *MD = dyn_cast(D)) { // If the type of the function uses a type with unique-external // linkage, it's not legally usable from outside this translation unit. @@ -546,12 +547,20 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { // the template parameters and arguments. if (FunctionTemplateSpecializationInfo *spec = MD->getTemplateSpecializationInfo()) { - if (shouldConsiderTemplateLV(MD, spec)) { - LV.mergeWithMin(getLVForTemplateArgumentList(*spec->TemplateArguments, - F)); - if (F.ConsiderTemplateParameterTypes) - LV.merge(getLVForTemplateParameterList( - spec->getTemplate()->getTemplateParameters())); + const TemplateArgumentList &TemplateArgs = *spec->TemplateArguments; + LinkageInfo ArgsLV = getLVForTemplateArgumentList(TemplateArgs, + OnlyTemplate); + TemplateParameterList *TemplateParams = + spec->getTemplate()->getTemplateParameters(); + LinkageInfo ParamsLV = getLVForTemplateParameterList(TemplateParams); + if (shouldConsiderTemplateVis(MD, spec)) { + LV.mergeWithMin(ArgsLV); + if (!OnlyTemplate) + LV.mergeWithMin(ParamsLV); + } else { + LV.mergeLinkage(ArgsLV); + if (!OnlyTemplate) + LV.mergeLinkage(ParamsLV); } } @@ -561,14 +570,22 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { } else if (const CXXRecordDecl *RD = dyn_cast(D)) { if (const ClassTemplateSpecializationDecl *spec = dyn_cast(RD)) { - if (shouldConsiderTemplateLV(spec)) { - // Merge template argument/parameter information for member - // class template specializations. - LV.mergeWithMin(getLVForTemplateArgumentList(spec->getTemplateArgs(), - F)); - if (F.ConsiderTemplateParameterTypes) - LV.merge(getLVForTemplateParameterList( - spec->getSpecializedTemplate()->getTemplateParameters())); + // Merge template argument/parameter information for member + // class template specializations. + const TemplateArgumentList &TemplateArgs = spec->getTemplateArgs(); + LinkageInfo ArgsLV = getLVForTemplateArgumentList(TemplateArgs, + OnlyTemplate); + TemplateParameterList *TemplateParams = + spec->getSpecializedTemplate()->getTemplateParameters(); + LinkageInfo ParamsLV = getLVForTemplateParameterList(TemplateParams); + if (shouldConsiderTemplateVis(spec)) { + LV.mergeWithMin(ArgsLV); + if (!OnlyTemplate) + LV.mergeWithMin(ParamsLV); + } else { + LV.mergeLinkage(ArgsLV); + if (!OnlyTemplate) + LV.mergeLinkage(ParamsLV); } } @@ -579,8 +596,7 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D, LVFlags F) { LinkageInfo TypeLV = getLVForType(VD->getType()); if (TypeLV.linkage() != ExternalLinkage) LV.mergeLinkage(UniqueExternalLinkage); - if (!LV.visibilityExplicit()) - LV.mergeVisibility(TypeLV); + LV.mergeVisibility(TypeLV); } return LV; @@ -636,18 +652,17 @@ void NamedDecl::ClearLinkageCache() { Linkage NamedDecl::getLinkage() const { if (HasCachedLinkage) { assert(Linkage(CachedLinkage) == - getLVForDecl(this, LVFlags::CreateOnlyDeclLinkage()).linkage()); + getLVForDecl(this, true).linkage()); return Linkage(CachedLinkage); } - CachedLinkage = getLVForDecl(this, - LVFlags::CreateOnlyDeclLinkage()).linkage(); + CachedLinkage = getLVForDecl(this, true).linkage(); HasCachedLinkage = 1; return Linkage(CachedLinkage); } LinkageInfo NamedDecl::getLinkageAndVisibility() const { - LinkageInfo LI = getLVForDecl(this, LVFlags()); + LinkageInfo LI = getLVForDecl(this, false); assert(!HasCachedLinkage || Linkage(CachedLinkage) == LI.linkage()); HasCachedLinkage = 1; CachedLinkage = LI.linkage(); @@ -656,9 +671,19 @@ LinkageInfo NamedDecl::getLinkageAndVisibility() const { llvm::Optional NamedDecl::getExplicitVisibility() const { // Use the most recent declaration of a variable. - if (const VarDecl *var = dyn_cast(this)) - return getVisibilityOf(var->getMostRecentDecl()); + if (const VarDecl *Var = dyn_cast(this)) { + if (llvm::Optional V = + getVisibilityOf(Var->getMostRecentDecl())) + return V; + + if (Var->isStaticDataMember()) { + VarDecl *InstantiatedFrom = Var->getInstantiatedFromStaticDataMember(); + if (InstantiatedFrom) + return getVisibilityOf(InstantiatedFrom); + } + return llvm::Optional(); + } // Use the most recent declaration of a function, and also handle // function template specializations. if (const FunctionDecl *fn = dyn_cast(this)) { @@ -685,6 +710,10 @@ llvm::Optional NamedDecl::getExplicitVisibility() const { if (llvm::Optional V = getVisibilityOf(this)) return V; + // The visibility of a template is stored in the templated decl. + if (const TemplateDecl *TD = dyn_cast(this)) + return getVisibilityOf(TD->getTemplatedDecl()); + // If there wasn't explicit visibility there, and this is a // specialization of a class template, check for visibility // on the pattern. @@ -703,7 +732,7 @@ llvm::Optional NamedDecl::getExplicitVisibility() const { return llvm::Optional(); } -static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { +static LinkageInfo getLVForDecl(const NamedDecl *D, bool OnlyTemplate) { // Objective-C: treat all Objective-C declarations as having external // linkage. switch (D->getKind()) { @@ -738,11 +767,12 @@ static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { if (isa(ContextDecl)) DC = ContextDecl->getDeclContext()->getRedeclContext(); else - return getLVForDecl(cast(ContextDecl), Flags); + return getLVForDecl(cast(ContextDecl), + OnlyTemplate); } if (const NamedDecl *ND = dyn_cast(DC)) - return getLVForDecl(ND, Flags); + return getLVForDecl(ND, OnlyTemplate); return LinkageInfo::external(); } @@ -753,7 +783,7 @@ static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { // Handle linkage for namespace-scope names. if (D->getDeclContext()->getRedeclContext()->isFileContext()) - return getLVForNamespaceScopeDecl(D, Flags); + return getLVForNamespaceScopeDecl(D, OnlyTemplate); // C++ [basic.link]p5: // In addition, a member function, static data member, a named @@ -763,7 +793,7 @@ static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { // purposes (7.1.3), has external linkage if the name of the class // has external linkage. if (D->getDeclContext()->isRecord()) - return getLVForClassMember(D, Flags); + return getLVForClassMember(D, OnlyTemplate); // C++ [basic.link]p6: // The name of a function declared in block scope and the name of @@ -783,13 +813,13 @@ static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { return LinkageInfo::uniqueExternal(); LinkageInfo LV; - if (Flags.ConsiderVisibilityAttributes) { + if (!OnlyTemplate) { if (llvm::Optional Vis = Function->getExplicitVisibility()) - LV.setVisibility(*Vis, true); + LV.mergeVisibility(*Vis, true); } if (const FunctionDecl *Prev = Function->getPreviousDecl()) { - LinkageInfo PrevLV = getLVForDecl(Prev, Flags); + LinkageInfo PrevLV = getLVForDecl(Prev, OnlyTemplate); if (PrevLV.linkage()) LV.setLinkage(PrevLV.linkage()); LV.mergeVisibility(PrevLV); } @@ -806,14 +836,14 @@ static LinkageInfo getLVForDecl(const NamedDecl *D, LVFlags Flags) { LinkageInfo LV; if (Var->getStorageClass() == SC_PrivateExtern) - LV.setVisibility(HiddenVisibility, true); - else if (Flags.ConsiderVisibilityAttributes) { + LV.mergeVisibility(HiddenVisibility, true); + else if (!OnlyTemplate) { if (llvm::Optional Vis = Var->getExplicitVisibility()) - LV.setVisibility(*Vis, true); + LV.mergeVisibility(*Vis, true); } if (const VarDecl *Prev = Var->getPreviousDecl()) { - LinkageInfo PrevLV = getLVForDecl(Prev, Flags); + LinkageInfo PrevLV = getLVForDecl(Prev, OnlyTemplate); if (PrevLV.linkage()) LV.setLinkage(PrevLV.linkage()); LV.mergeVisibility(PrevLV); } @@ -881,9 +911,7 @@ std::string NamedDecl::getQualifiedNameAsString(const PrintingPolicy &P) const { for (unsigned i = 0; i < NumParams; ++i) { if (i) OS << ", "; - std::string Param; - FD->getParamDecl(i)->getType().getAsStringInternal(Param, P); - OS << Param; + OS << FD->getParamDecl(i)->getType().stream(P); } if (FT->isVariadic()) { @@ -1672,6 +1700,13 @@ void FunctionDecl::setPure(bool P) { Parent->markedVirtualFunctionPure(); } +void FunctionDecl::setConstexpr(bool IC) { + IsConstexpr = IC; + CXXConstructorDecl *CD = dyn_cast(this); + if (IC && CD) + CD->getParent()->markedConstructorConstexpr(CD); +} + bool FunctionDecl::isMain() const { const TranslationUnitDecl *tunit = dyn_cast(getDeclContext()->getRedeclContext()); @@ -2446,15 +2481,15 @@ FieldDecl *FieldDecl::Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, - bool HasInit) { + InClassInitStyle InitStyle) { return new (C) FieldDecl(Decl::Field, DC, StartLoc, IdLoc, Id, T, TInfo, - BW, Mutable, HasInit); + BW, Mutable, InitStyle); } FieldDecl *FieldDecl::CreateDeserialized(ASTContext &C, unsigned ID) { void *Mem = AllocateDeserializedDecl(C, ID, sizeof(FieldDecl)); return new (Mem) FieldDecl(Field, 0, SourceLocation(), SourceLocation(), - 0, QualType(), 0, 0, false, false); + 0, QualType(), 0, 0, false, ICIS_NoInit); } bool FieldDecl::isAnonymousStructOrUnion() const { @@ -2483,15 +2518,15 @@ unsigned FieldDecl::getFieldIndex() const { for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Index) { - (*I)->CachedFieldIndex = Index + 1; + I->CachedFieldIndex = Index + 1; if (IsMsStruct) { // Zero-length bitfields following non-bitfield members are ignored. - if (getASTContext().ZeroBitfieldFollowsNonBitfield((*I), LastFD)) { + if (getASTContext().ZeroBitfieldFollowsNonBitfield(*I, LastFD)) { --Index; continue; } - LastFD = (*I); + LastFD = *I; } } @@ -2505,11 +2540,16 @@ SourceRange FieldDecl::getSourceRange() const { return DeclaratorDecl::getSourceRange(); } +void FieldDecl::setBitWidth(Expr *Width) { + assert(!InitializerOrBitWidth.getPointer() && !hasInClassInitializer() && + "bit width or initializer already set"); + InitializerOrBitWidth.setPointer(Width); +} + void FieldDecl::setInClassInitializer(Expr *Init) { - assert(!InitializerOrBitWidth.getPointer() && + assert(!InitializerOrBitWidth.getPointer() && hasInClassInitializer() && "bit width or initializer already set"); InitializerOrBitWidth.setPointer(Init); - InitializerOrBitWidth.setInt(0); } //===----------------------------------------------------------------------===// diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp index 47a0d25..f9ce46d 100644 --- a/lib/AST/DeclBase.cpp +++ b/lib/AST/DeclBase.cpp @@ -411,23 +411,32 @@ AvailabilityResult Decl::getAvailability(std::string *Message) const { bool Decl::canBeWeakImported(bool &IsDefinition) const { IsDefinition = false; + + // Variables, if they aren't definitions. if (const VarDecl *Var = dyn_cast(this)) { if (!Var->hasExternalStorage() || Var->getInit()) { IsDefinition = true; return false; } + return true; + + // Functions, if they aren't definitions. } else if (const FunctionDecl *FD = dyn_cast(this)) { if (FD->hasBody()) { IsDefinition = true; return false; } - } else if (isa(this) || isa(this)) - return false; - else if (!(getASTContext().getLangOpts().ObjCNonFragileABI && - isa(this))) - return false; + return true; - return true; + // Objective-C classes, if this is the non-fragile runtime. + } else if (isa(this) && + getASTContext().getLangOpts().ObjCRuntime.hasWeakClassImport()) { + return true; + + // Nothing else. + } else { + return false; + } } bool Decl::isWeakImported() const { @@ -974,10 +983,6 @@ DeclContext::decl_iterator DeclContext::noload_decls_begin() const { return decl_iterator(FirstDecl); } -DeclContext::decl_iterator DeclContext::noload_decls_end() const { - return decl_iterator(); -} - DeclContext::decl_iterator DeclContext::decls_begin() const { if (hasExternalLexicalStorage()) LoadLexicalDeclsFromExternalStorage(); @@ -985,13 +990,6 @@ DeclContext::decl_iterator DeclContext::decls_begin() const { return decl_iterator(FirstDecl); } -DeclContext::decl_iterator DeclContext::decls_end() const { - if (hasExternalLexicalStorage()) - LoadLexicalDeclsFromExternalStorage(); - - return decl_iterator(); -} - bool DeclContext::decls_empty() const { if (hasExternalLexicalStorage()) LoadLexicalDeclsFromExternalStorage(); @@ -1192,34 +1190,31 @@ DeclContext::lookup(DeclarationName Name) { return I->second.getLookupResult(); } -DeclContext::lookup_const_result -DeclContext::lookup(DeclarationName Name) const { - return const_cast(this)->lookup(Name); -} - void DeclContext::localUncachedLookup(DeclarationName Name, llvm::SmallVectorImpl &Results) { Results.clear(); // If there's no external storage, just perform a normal lookup and copy // the results. - if (!hasExternalVisibleStorage() && !hasExternalLexicalStorage()) { + if (!hasExternalVisibleStorage() && !hasExternalLexicalStorage() && Name) { lookup_result LookupResults = lookup(Name); Results.insert(Results.end(), LookupResults.first, LookupResults.second); return; } // If we have a lookup table, check there first. Maybe we'll get lucky. - if (StoredDeclsMap *Map = LookupPtr.getPointer()) { - StoredDeclsMap::iterator Pos = Map->find(Name); - if (Pos != Map->end()) { - Results.insert(Results.end(), - Pos->second.getLookupResult().first, - Pos->second.getLookupResult().second); - return; + if (Name) { + if (StoredDeclsMap *Map = LookupPtr.getPointer()) { + StoredDeclsMap::iterator Pos = Map->find(Name); + if (Pos != Map->end()) { + Results.insert(Results.end(), + Pos->second.getLookupResult().first, + Pos->second.getLookupResult().second); + return; + } } } - + // Slow case: grovel through the declarations in our chain looking for // matches. for (Decl *D = FirstDecl; D; D = D->getNextDeclInContext()) { diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp index 114322b..eec2e9d 100644 --- a/lib/AST/DeclCXX.cpp +++ b/lib/AST/DeclCXX.cpp @@ -43,13 +43,11 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) Abstract(false), IsStandardLayout(true), HasNoNonEmptyBases(true), HasPrivateFields(false), HasProtectedFields(false), HasPublicFields(false), HasMutableFields(false), HasOnlyCMembers(true), + HasInClassInitializer(false), HasTrivialDefaultConstructor(true), HasConstexprNonCopyMoveConstructor(false), DefaultedDefaultConstructorIsConstexpr(true), - DefaultedCopyConstructorIsConstexpr(true), - DefaultedMoveConstructorIsConstexpr(true), - HasConstexprDefaultConstructor(false), HasConstexprCopyConstructor(false), - HasConstexprMoveConstructor(false), HasTrivialCopyConstructor(true), + HasConstexprDefaultConstructor(false), HasTrivialCopyConstructor(true), HasTrivialMoveConstructor(true), HasTrivialCopyAssignment(true), HasTrivialMoveAssignment(true), HasTrivialDestructor(true), HasIrrelevantDestructor(true), @@ -62,6 +60,14 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend(0) { } +CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { + return Bases.get(Definition->getASTContext().getExternalSource()); +} + +CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getVBasesSlowCase() const { + return VBases.get(Definition->getASTContext().getExternalSource()); +} + CXXRecordDecl::CXXRecordDecl(Kind K, TagKind TK, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, CXXRecordDecl *PrevDecl) @@ -219,8 +225,6 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, // In the definition of a constexpr constructor [...] // -- the class shall not have any virtual base classes data().DefaultedDefaultConstructorIsConstexpr = false; - data().DefaultedCopyConstructorIsConstexpr = false; - data().DefaultedMoveConstructorIsConstexpr = false; } else { // C++ [class.ctor]p5: // A default constructor is trivial [...] if: @@ -259,25 +263,6 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, // default constructor is constexpr. if (!BaseClassDecl->hasConstexprDefaultConstructor()) data().DefaultedDefaultConstructorIsConstexpr = false; - - // C++11 [class.copy]p13: - // If the implicitly-defined constructor would satisfy the requirements - // of a constexpr constructor, the implicitly-defined constructor is - // constexpr. - // C++11 [dcl.constexpr]p4: - // -- every constructor involved in initializing [...] base class - // sub-objects shall be a constexpr constructor - if (!BaseClassDecl->hasConstexprCopyConstructor()) - data().DefaultedCopyConstructorIsConstexpr = false; - if (BaseClassDecl->hasDeclaredMoveConstructor() || - BaseClassDecl->needsImplicitMoveConstructor()) - // FIXME: If the implicit move constructor generated for the base class - // would be ill-formed, the implicit move constructor generated for the - // derived class calls the base class' copy constructor. - data().DefaultedMoveConstructorIsConstexpr &= - BaseClassDecl->hasConstexprMoveConstructor(); - else if (!BaseClassDecl->hasConstexprCopyConstructor()) - data().DefaultedMoveConstructorIsConstexpr = false; } // C++ [class.ctor]p3: @@ -359,8 +344,8 @@ GetBestOverloadCandidateSimple( if (Cands[Best].second.compatiblyIncludes(Cands[I].second)) Best = I; - for (unsigned I = 1; I != N; ++I) - if (Cands[Best].second.compatiblyIncludes(Cands[I].second)) + for (unsigned I = 0; I != N; ++I) + if (I != Best && Cands[Best].second.compatiblyIncludes(Cands[I].second)) return 0; return Cands[Best].first; @@ -469,6 +454,14 @@ void CXXRecordDecl::markedVirtualFunctionPure() { data().Abstract = true; } +void CXXRecordDecl::markedConstructorConstexpr(CXXConstructorDecl *CD) { + if (!CD->isCopyOrMoveConstructor()) + data().HasConstexprNonCopyMoveConstructor = true; + + if (CD->isDefaultConstructor()) + data().HasConstexprDefaultConstructor = true; +} + void CXXRecordDecl::addedMember(Decl *D) { if (!D->isImplicit() && !isa(D) && @@ -545,12 +538,8 @@ void CXXRecordDecl::addedMember(Decl *D) { } } else if (Constructor->isCopyConstructor()) { data().DeclaredCopyConstructor = true; - if (Constructor->isConstexpr()) - data().HasConstexprCopyConstructor = true; } else if (Constructor->isMoveConstructor()) { data().DeclaredMoveConstructor = true; - if (Constructor->isConstexpr()) - data().HasConstexprMoveConstructor = true; } else goto NotASpecialMember; return; @@ -607,9 +596,6 @@ NotASpecialMember:; // user-provided [...] if (UserProvided) data().HasTrivialCopyConstructor = false; - - if (Constructor->isConstexpr()) - data().HasConstexprCopyConstructor = true; } else if (Constructor->isMoveConstructor()) { data().UserDeclaredMoveConstructor = true; data().DeclaredMoveConstructor = true; @@ -619,9 +605,6 @@ NotASpecialMember:; // user-provided [...] if (UserProvided) data().HasTrivialMoveConstructor = false; - - if (Constructor->isConstexpr()) - data().HasConstexprMoveConstructor = true; } } if (Constructor->isConstexpr() && !Constructor->isCopyOrMoveConstructor()) { @@ -663,19 +646,9 @@ NotASpecialMember:; // C++11 [class.dtor]p5: // A destructor is trivial if it is not user-provided and if // -- the destructor is not virtual. - if (DD->isUserProvided() || DD->isVirtual()) { + if (DD->isUserProvided() || DD->isVirtual()) data().HasTrivialDestructor = false; - // C++11 [dcl.constexpr]p1: - // The constexpr specifier shall be applied only to [...] the - // declaration of a static data member of a literal type. - // C++11 [basic.types]p10: - // A type is a literal type if it is [...] a class type that [...] has - // a trivial destructor. - data().DefaultedDefaultConstructorIsConstexpr = false; - data().DefaultedCopyConstructorIsConstexpr = false; - data().DefaultedMoveConstructorIsConstexpr = false; - } - + return; } @@ -792,7 +765,7 @@ NotASpecialMember:; // that does not explicitly have no lifetime makes the class a non-POD. // However, we delay setting PlainOldData to false in this case so that // Sema has a chance to diagnostic causes where the same class will be - // non-POD with Automatic Reference Counting but a POD without Instant Objects. + // non-POD with Automatic Reference Counting but a POD without ARC. // In this case, the class will become a non-POD class when we complete // the definition. ASTContext &Context = getASTContext(); @@ -818,17 +791,19 @@ NotASpecialMember:; data().HasNonLiteralTypeFieldsOrBases = true; if (Field->hasInClassInitializer()) { - // C++0x [class]p5: + data().HasInClassInitializer = true; + + // C++11 [class]p5: // A default constructor is trivial if [...] no non-static data member // of its class has a brace-or-equal-initializer. data().HasTrivialDefaultConstructor = false; - // C++0x [dcl.init.aggr]p1: + // C++11 [dcl.init.aggr]p1: // An aggregate is a [...] class with [...] no // brace-or-equal-initializers for non-static data members. data().Aggregate = false; - // C++0x [class]p10: + // C++11 [class]p10: // A POD struct is [...] a trivial class. data().PlainOldData = false; } @@ -920,31 +895,15 @@ NotASpecialMember:; // -- every constructor involved in initializing non-static data // members [...] shall be a constexpr constructor if (!Field->hasInClassInitializer() && - !FieldRec->hasConstexprDefaultConstructor()) + !FieldRec->hasConstexprDefaultConstructor() && !isUnion()) // The standard requires any in-class initializer to be a constant // expression. We consider this to be a defect. data().DefaultedDefaultConstructorIsConstexpr = false; - - if (!FieldRec->hasConstexprCopyConstructor()) - data().DefaultedCopyConstructorIsConstexpr = false; - - if (FieldRec->hasDeclaredMoveConstructor() || - FieldRec->needsImplicitMoveConstructor()) - // FIXME: If the implicit move constructor generated for the member's - // class would be ill-formed, the implicit move constructor generated - // for this class calls the member's copy constructor. - data().DefaultedMoveConstructorIsConstexpr &= - FieldRec->hasConstexprMoveConstructor(); - else if (!FieldRec->hasConstexprCopyConstructor()) - data().DefaultedMoveConstructorIsConstexpr = false; } } else { // Base element type of field is a non-class type. - if (!T->isLiteralType()) { - data().DefaultedDefaultConstructorIsConstexpr = false; - data().DefaultedCopyConstructorIsConstexpr = false; - data().DefaultedMoveConstructorIsConstexpr = false; - } else if (!Field->hasInClassInitializer()) + if (!T->isLiteralType() || + (!Field->hasInClassInitializer() && !isUnion())) data().DefaultedDefaultConstructorIsConstexpr = false; } @@ -1018,7 +977,7 @@ static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) { /// Collect the visible conversions of a base class. /// -/// \param Base a base class of the class we're considering +/// \param Record a base class of the class we're considering /// \param InVirtual whether this base class is a virtual base (or a base /// of a virtual base) /// \param Access the access along the inheritance path to this base @@ -1050,8 +1009,10 @@ static void CollectVisibleConversions(ASTContext &Context, HiddenTypes = &HiddenTypesBuffer; for (UnresolvedSetIterator I = Cs.begin(), E = Cs.end(); I != E; ++I) { - bool Hidden = - !HiddenTypesBuffer.insert(GetConversionType(Context, I.getDecl())); + CanQualType ConvType(GetConversionType(Context, I.getDecl())); + bool Hidden = ParentHiddenTypes.count(ConvType); + if (!Hidden) + HiddenTypesBuffer.insert(ConvType); // If this conversion is hidden and we're in a virtual base, // remember that it's hidden along some inheritance path. @@ -1247,13 +1208,16 @@ void CXXRecordDecl::completeDefinition(CXXFinalOverriderMap *FinalOverriders) { // Objective-C Automatic Reference Counting: // If a class has a non-static data member of Objective-C pointer // type (or array thereof), it is a non-POD type and its - // default constructor (if any), copy constructor, copy assignment - // operator, and destructor are non-trivial. + // default constructor (if any), copy constructor, move constructor, + // copy assignment operator, move assignment operator, and destructor are + // non-trivial. struct DefinitionData &Data = data(); Data.PlainOldData = false; Data.HasTrivialDefaultConstructor = false; Data.HasTrivialCopyConstructor = false; + Data.HasTrivialMoveConstructor = false; Data.HasTrivialCopyAssignment = false; + Data.HasTrivialMoveAssignment = false; Data.HasTrivialDestructor = false; Data.HasIrrelevantDestructor = false; } @@ -1316,6 +1280,55 @@ bool CXXRecordDecl::mayBeAbstract() const { void CXXMethodDecl::anchor() { } +static bool recursivelyOverrides(const CXXMethodDecl *DerivedMD, + const CXXMethodDecl *BaseMD) { + for (CXXMethodDecl::method_iterator I = DerivedMD->begin_overridden_methods(), + E = DerivedMD->end_overridden_methods(); I != E; ++I) { + const CXXMethodDecl *MD = *I; + if (MD->getCanonicalDecl() == BaseMD->getCanonicalDecl()) + return true; + if (recursivelyOverrides(MD, BaseMD)) + return true; + } + return false; +} + +CXXMethodDecl * +CXXMethodDecl::getCorrespondingMethodInClass(const CXXRecordDecl *RD) { + if (this->getParent()->getCanonicalDecl() == RD->getCanonicalDecl()) + return this; + + // Lookup doesn't work for destructors, so handle them separately. + if (isa(this)) { + CXXMethodDecl *MD = RD->getDestructor(); + if (MD && recursivelyOverrides(MD, this)) + return MD; + return NULL; + } + + lookup_const_result Candidates = RD->lookup(getDeclName()); + for (NamedDecl * const * I = Candidates.first; I != Candidates.second; ++I) { + CXXMethodDecl *MD = dyn_cast(*I); + if (!MD) + continue; + if (recursivelyOverrides(MD, this)) + return MD; + } + + for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), + E = RD->bases_end(); I != E; ++I) { + const RecordType *RT = I->getType()->getAs(); + if (!RT) + continue; + const CXXRecordDecl *Base = cast(RT->getDecl()); + CXXMethodDecl *T = this->getCorrespondingMethodInClass(Base); + if (T) + return T; + } + + return NULL; +} + CXXMethodDecl * CXXMethodDecl::Create(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc, @@ -1690,7 +1703,9 @@ bool CXXConstructorDecl::isConvertingConstructor(bool AllowExplicit) const { return (getNumParams() == 0 && getType()->getAs()->isVariadic()) || (getNumParams() == 1) || - (getNumParams() > 1 && getParamDecl(1)->hasDefaultArg()); + (getNumParams() > 1 && + (getParamDecl(1)->hasDefaultArg() || + getParamDecl(1)->isParameterPack())); } bool CXXConstructorDecl::isSpecializationCopyingObject() const { @@ -1993,15 +2008,17 @@ StaticAssertDecl *StaticAssertDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation StaticAssertLoc, Expr *AssertExpr, StringLiteral *Message, - SourceLocation RParenLoc) { + SourceLocation RParenLoc, + bool Failed) { return new (C) StaticAssertDecl(DC, StaticAssertLoc, AssertExpr, Message, - RParenLoc); + RParenLoc, Failed); } StaticAssertDecl *StaticAssertDecl::CreateDeserialized(ASTContext &C, unsigned ID) { void *Mem = AllocateDeserializedDecl(C, ID, sizeof(StaticAssertDecl)); - return new (Mem) StaticAssertDecl(0, SourceLocation(), 0, 0,SourceLocation()); + return new (Mem) StaticAssertDecl(0, SourceLocation(), 0, 0, + SourceLocation(), false); } static const char *getAccessName(AccessSpecifier AS) { diff --git a/lib/AST/DeclFriend.cpp b/lib/AST/DeclFriend.cpp index 6e3bd8d..553d170 100644 --- a/lib/AST/DeclFriend.cpp +++ b/lib/AST/DeclFriend.cpp @@ -12,12 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclTemplate.h" using namespace clang; void FriendDecl::anchor() { } +FriendDecl *FriendDecl::getNextFriendSlowCase() { + return cast_or_null( + NextFriend.get(getASTContext().getExternalSource())); +} + FriendDecl *FriendDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L, FriendUnion Friend, diff --git a/lib/AST/DeclObjC.cpp b/lib/AST/DeclObjC.cpp index 2370d3c..4d48ad8 100644 --- a/lib/AST/DeclObjC.cpp +++ b/lib/AST/DeclObjC.cpp @@ -363,9 +363,12 @@ ObjCMethodDecl *ObjCInterfaceDecl::lookupMethod(Selector Sel, return NULL; } +// Will search "local" class/category implementations for a method decl. +// If failed, then we search in class's root for an instance method. +// Returns 0 if no method is found. ObjCMethodDecl *ObjCInterfaceDecl::lookupPrivateMethod( const Selector &Sel, - bool Instance) { + bool Instance) const { // FIXME: Should make sure no callers ever do this. if (!hasDefinition()) return 0; @@ -377,7 +380,23 @@ ObjCMethodDecl *ObjCInterfaceDecl::lookupPrivateMethod( if (ObjCImplementationDecl *ImpDecl = getImplementation()) Method = Instance ? ImpDecl->getInstanceMethod(Sel) : ImpDecl->getClassMethod(Sel); - + + // Look through local category implementations associated with the class. + if (!Method) + Method = Instance ? getCategoryInstanceMethod(Sel) + : getCategoryClassMethod(Sel); + + // Before we give up, check if the selector is an instance method. + // But only in the root. This matches gcc's behavior and what the + // runtime expects. + if (!Instance && !Method && !getSuperClass()) { + Method = lookupInstanceMethod(Sel); + // Look through local category implementations associated + // with the root class. + if (!Method) + Method = lookupPrivateMethod(Sel, true); + } + if (!Method && getSuperClass()) return getSuperClass()->lookupPrivateMethod(Sel, Instance); return Method; @@ -451,7 +470,8 @@ void ObjCMethodDecl::setMethodParams(ASTContext &C, if (isImplicit()) return setParamsAndSelLocs(C, Params, ArrayRef()); - SelLocsKind = hasStandardSelectorLocs(getSelector(), SelLocs, Params, EndLoc); + SelLocsKind = hasStandardSelectorLocs(getSelector(), SelLocs, Params, + DeclEndLoc); if (SelLocsKind != SelLoc_NonStandard) return setParamsAndSelLocs(C, Params, ArrayRef()); @@ -523,6 +543,12 @@ ObjCMethodDecl *ObjCMethodDecl::getCanonicalDecl() { return this; } +SourceLocation ObjCMethodDecl::getLocEnd() const { + if (Stmt *Body = getBody()) + return Body->getLocEnd(); + return DeclEndLoc; +} + ObjCMethodFamily ObjCMethodDecl::getMethodFamily() const { ObjCMethodFamily family = static_cast(Family); if (family != static_cast(InvalidObjCMethodFamily)) @@ -767,7 +793,7 @@ ObjCIvarDecl *ObjCInterfaceDecl::all_declared_ivar_begin() { ObjCIvarDecl *curIvar = 0; if (!ivar_empty()) { ObjCInterfaceDecl::ivar_iterator I = ivar_begin(), E = ivar_end(); - data().IvarList = (*I); ++I; + data().IvarList = *I; ++I; for (curIvar = data().IvarList; I != E; curIvar = *I, ++I) curIvar->setNextIvar(*I); } @@ -778,7 +804,7 @@ ObjCIvarDecl *ObjCInterfaceDecl::all_declared_ivar_begin() { ObjCCategoryDecl::ivar_iterator I = CDecl->ivar_begin(), E = CDecl->ivar_end(); if (!data().IvarList) { - data().IvarList = (*I); ++I; + data().IvarList = *I; ++I; curIvar = data().IvarList; } for ( ;I != E; curIvar = *I, ++I) @@ -791,7 +817,7 @@ ObjCIvarDecl *ObjCInterfaceDecl::all_declared_ivar_begin() { ObjCImplementationDecl::ivar_iterator I = ImplDecl->ivar_begin(), E = ImplDecl->ivar_end(); if (!data().IvarList) { - data().IvarList = (*I); ++I; + data().IvarList = *I; ++I; curIvar = data().IvarList; } for ( ;I != E; curIvar = *I, ++I) @@ -915,16 +941,10 @@ ObjCIvarDecl *ObjCIvarDecl::Create(ASTContext &C, ObjCContainerDecl *DC, // decl contexts, the previously built IvarList must be rebuilt. ObjCInterfaceDecl *ID = dyn_cast(DC); if (!ID) { - if (ObjCImplementationDecl *IM = dyn_cast(DC)) { + if (ObjCImplementationDecl *IM = dyn_cast(DC)) ID = IM->getClassInterface(); - if (BW) - IM->setHasSynthBitfield(true); - } else { - ObjCCategoryDecl *CD = cast(DC); - ID = CD->getClassInterface(); - if (BW) - CD->setHasSynthBitfield(true); - } + else + ID = cast(DC)->getClassInterface(); } ID->setIvarList(0); } @@ -1169,7 +1189,7 @@ void ObjCImplDecl::setClassInterface(ObjCInterfaceDecl *IFace) { } /// FindPropertyImplIvarDecl - This method lookup the ivar in the list of -/// properties implemented in this category @implementation block and returns +/// properties implemented in this category \@implementation block and returns /// the implemented property that uses it. /// ObjCPropertyImplDecl *ObjCImplDecl:: @@ -1184,8 +1204,8 @@ FindPropertyImplIvarDecl(IdentifierInfo *ivarId) const { } /// FindPropertyImplDecl - This method looks up a previous ObjCPropertyImplDecl -/// added to the list of those properties @synthesized/@dynamic in this -/// category @implementation block. +/// added to the list of those properties \@synthesized/\@dynamic in this +/// category \@implementation block. /// ObjCPropertyImplDecl *ObjCImplDecl:: FindPropertyImplDecl(IdentifierInfo *Id) const { diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp index 74e1c1b..aad0ca1 100644 --- a/lib/AST/DeclPrinter.cpp +++ b/lib/AST/DeclPrinter.cpp @@ -114,6 +114,8 @@ static QualType GetBaseType(QualType T) { BaseType = FTy->getResultType(); else if (const VectorType *VTy = BaseType->getAs()) BaseType = VTy->getElementType(); + else if (const ReferenceType *RTy = BaseType->getAs()) + BaseType = RTy->getPointeeType(); else llvm_unreachable("Unknown declarator!"); } @@ -173,8 +175,10 @@ void DeclContext::dumpDeclContext() const { Printer.VisitDeclContext(const_cast(this), /*Indent=*/false); } -void Decl::dump() const { - print(llvm::errs()); +void Decl::dump(raw_ostream &Out) const { + PrintingPolicy Policy = getASTContext().getPrintingPolicy(); + Policy.Dump = true; + print(Out, Policy, /*Indentation*/ 0, /*PrintInstantiation*/ true); } raw_ostream& DeclPrinter::Indent(unsigned Indentation) { @@ -322,15 +326,13 @@ void DeclPrinter::VisitTranslationUnitDecl(TranslationUnitDecl *D) { } void DeclPrinter::VisitTypedefDecl(TypedefDecl *D) { - std::string S = D->getNameAsString(); - D->getUnderlyingType().getAsStringInternal(S, Policy); if (!Policy.SuppressSpecifiers) { Out << "typedef "; if (D->isModulePrivate()) Out << "__module_private__ "; } - Out << S; + D->getUnderlyingType().print(Out, Policy, D->getName()); prettyPrintAttributes(D); } @@ -350,11 +352,8 @@ void DeclPrinter::VisitEnumDecl(EnumDecl *D) { } Out << *D; - if (D->isFixed()) { - std::string Underlying; - D->getIntegerType().getAsStringInternal(Underlying, Policy); - Out << " : " << Underlying; - } + if (D->isFixed()) + Out << " : " << D->getIntegerType().stream(Policy); if (D->isCompleteDefinition()) { Out << " {\n"; @@ -441,13 +440,12 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { Proto += ")"; - if (FT && FT->getTypeQuals()) { - unsigned TypeQuals = FT->getTypeQuals(); - if (TypeQuals & Qualifiers::Const) + if (FT) { + if (FT->isConst()) Proto += " const"; - if (TypeQuals & Qualifiers::Volatile) + if (FT->isVolatile()) Proto += " volatile"; - if (TypeQuals & Qualifiers::Restrict) + if (FT->isRestrict()) Proto += " restrict"; } @@ -460,9 +458,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { if (I) Proto += ", "; - std::string ExceptionType; - FT->getExceptionType(I).getAsStringInternal(ExceptionType, SubPolicy); - Proto += ExceptionType; + Proto += FT->getExceptionType(I).getAsString(SubPolicy);; } Proto += ")"; } else if (FT && isNoexceptExceptionSpec(FT->getExceptionSpecType())) { @@ -542,12 +538,11 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { } } else - AFT->getResultType().getAsStringInternal(Proto, Policy); + AFT->getResultType().print(Out, Policy, Proto); } else { - Ty.getAsStringInternal(Proto, Policy); + Ty.print(Out, Policy, Proto); } - Out << Proto; prettyPrintAttributes(D); if (D->isPure()) @@ -581,9 +576,7 @@ void DeclPrinter::VisitFieldDecl(FieldDecl *D) { if (!Policy.SuppressSpecifiers && D->isModulePrivate()) Out << "__module_private__ "; - std::string Name = D->getNameAsString(); - D->getType().getAsStringInternal(Name, Policy); - Out << Name; + Out << D->getType().stream(Policy, D->getName()); if (D->isBitField()) { Out << " : "; @@ -592,7 +585,10 @@ void DeclPrinter::VisitFieldDecl(FieldDecl *D) { Expr *Init = D->getInClassInitializer(); if (!Policy.SuppressInitializers && Init) { - Out << " = "; + if (D->getInClassInitStyle() == ICIS_ListInit) + Out << " "; + else + Out << " = "; Init->printPretty(Out, Context, 0, Policy, Indentation); } prettyPrintAttributes(D); @@ -613,12 +609,10 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) { if (!Policy.SuppressSpecifiers && D->isModulePrivate()) Out << "__module_private__ "; - std::string Name = D->getNameAsString(); QualType T = D->getType(); if (ParmVarDecl *Parm = dyn_cast(D)) T = Parm->getOriginalType(); - T.getAsStringInternal(Name, Policy); - Out << Name; + T.print(Out, Policy, D->getName()); Expr *Init = D->getInit(); if (!Policy.SuppressInitializers && Init) { bool ImplicitInit = false; @@ -666,6 +660,8 @@ void DeclPrinter::VisitStaticAssertDecl(StaticAssertDecl *D) { // C++ declarations //---------------------------------------------------------------------------- void DeclPrinter::VisitNamespaceDecl(NamespaceDecl *D) { + if (D->isInline()) + Out << "inline "; Out << "namespace " << *D << " {\n"; VisitDeclContext(D); Indent() << "}"; @@ -923,7 +919,7 @@ void DeclPrinter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *OID) { Indentation += Policy.Indentation; for (ObjCInterfaceDecl::ivar_iterator I = OID->ivar_begin(), E = OID->ivar_end(); I != E; ++I) { - Indent() << (*I)->getType().getAsString(Policy) << ' ' << **I << ";\n"; + Indent() << I->getType().getAsString(Policy) << ' ' << **I << ";\n"; } Indentation -= Policy.Indentation; Out << "}\n"; diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp index 4590195..5aebc2b 100644 --- a/lib/AST/DeclTemplate.cpp +++ b/lib/AST/DeclTemplate.cpp @@ -145,7 +145,7 @@ RedeclarableTemplateDecl::CommonBase *RedeclarableTemplateDecl::getCommonPtr() { template typename RedeclarableTemplateDecl::SpecEntryTraits::DeclType* RedeclarableTemplateDecl::findSpecializationImpl( - llvm::FoldingSet &Specs, + llvm::FoldingSetVector &Specs, const TemplateArgument *Args, unsigned NumArgs, void *&InsertPos) { typedef SpecEntryTraits SETraits; @@ -298,13 +298,13 @@ void ClassTemplateDecl::LoadLazySpecializations() { } } -llvm::FoldingSet & +llvm::FoldingSetVector & ClassTemplateDecl::getSpecializations() { LoadLazySpecializations(); return getCommonPtr()->Specializations; } -llvm::FoldingSet & +llvm::FoldingSetVector & ClassTemplateDecl::getPartialSpecializations() { LoadLazySpecializations(); return getCommonPtr()->PartialSpecializations; @@ -363,11 +363,11 @@ void ClassTemplateDecl::AddPartialSpecialization( void ClassTemplateDecl::getPartialSpecializations( SmallVectorImpl &PS) { - llvm::FoldingSet &PartialSpecs + llvm::FoldingSetVector &PartialSpecs = getPartialSpecializations(); PS.clear(); PS.resize(PartialSpecs.size()); - for (llvm::FoldingSet::iterator + for (llvm::FoldingSetVector::iterator P = PartialSpecs.begin(), PEnd = PartialSpecs.end(); P != PEnd; ++P) { assert(!PS[P->getSequenceNumber()]); @@ -378,7 +378,8 @@ void ClassTemplateDecl::getPartialSpecializations( ClassTemplatePartialSpecializationDecl * ClassTemplateDecl::findPartialSpecialization(QualType T) { ASTContext &Context = getASTContext(); - typedef llvm::FoldingSet::iterator + using llvm::FoldingSetVector; + typedef FoldingSetVector::iterator partial_spec_iterator; for (partial_spec_iterator P = getPartialSpecializations().begin(), PEnd = getPartialSpecializations().end(); @@ -394,7 +395,7 @@ ClassTemplatePartialSpecializationDecl * ClassTemplateDecl::findPartialSpecInstantiatedFromMember( ClassTemplatePartialSpecializationDecl *D) { Decl *DCanon = D->getCanonicalDecl(); - for (llvm::FoldingSet::iterator + for (llvm::FoldingSetVector::iterator P = getPartialSpecializations().begin(), PEnd = getPartialSpecializations().end(); P != PEnd; ++P) { @@ -868,5 +869,6 @@ ClassScopeFunctionSpecializationDecl::CreateDeserialized(ASTContext &C, unsigned ID) { void *Mem = AllocateDeserializedDecl(C, ID, sizeof(ClassScopeFunctionSpecializationDecl)); - return new (Mem) ClassScopeFunctionSpecializationDecl(0, SourceLocation(), 0); + return new (Mem) ClassScopeFunctionSpecializationDecl(0, SourceLocation(), 0, + false, TemplateArgumentListInfo()); } diff --git a/lib/AST/DeclarationName.cpp b/lib/AST/DeclarationName.cpp index 64924ad..28188d9 100644 --- a/lib/AST/DeclarationName.cpp +++ b/lib/AST/DeclarationName.cpp @@ -135,33 +135,6 @@ int DeclarationName::compare(DeclarationName LHS, DeclarationName RHS) { } // end namespace clang -DeclarationName::DeclarationName(Selector Sel) { - if (!Sel.getAsOpaquePtr()) { - Ptr = 0; - return; - } - - switch (Sel.getNumArgs()) { - case 0: - Ptr = reinterpret_cast(Sel.getAsIdentifierInfo()); - assert((Ptr & PtrMask) == 0 && "Improperly aligned IdentifierInfo"); - Ptr |= StoredObjCZeroArgSelector; - break; - - case 1: - Ptr = reinterpret_cast(Sel.getAsIdentifierInfo()); - assert((Ptr & PtrMask) == 0 && "Improperly aligned IdentifierInfo"); - Ptr |= StoredObjCOneArgSelector; - break; - - default: - Ptr = Sel.InfoPtr & ~Selector::ArgFlags; - assert((Ptr & PtrMask) == 0 && "Improperly aligned MultiKeywordSelector"); - Ptr |= StoredDeclarationNameExtra; - break; - } -} - DeclarationName::NameKind DeclarationName::getNameKind() const { switch (getStoredNameKind()) { case StoredIdentifier: return Identifier; @@ -305,28 +278,10 @@ IdentifierInfo *DeclarationName::getCXXLiteralIdentifier() const { return 0; } -Selector DeclarationName::getObjCSelector() const { - switch (getNameKind()) { - case ObjCZeroArgSelector: - return Selector(reinterpret_cast(Ptr & ~PtrMask), 0); - - case ObjCOneArgSelector: - return Selector(reinterpret_cast(Ptr & ~PtrMask), 1); - - case ObjCMultiArgSelector: - return Selector(reinterpret_cast(Ptr & ~PtrMask)); - - default: - break; - } - - return Selector(); -} - -void *DeclarationName::getFETokenInfoAsVoid() const { +void *DeclarationName::getFETokenInfoAsVoidSlow() const { switch (getNameKind()) { case Identifier: - return getAsIdentifierInfo()->getFETokenInfo(); + llvm_unreachable("Handled by getFETokenInfo()"); case CXXConstructorName: case CXXDestructorName: @@ -481,12 +436,6 @@ DeclarationNameTable::getCXXLiteralOperatorName(IdentifierInfo *II) { return DeclarationName(LiteralName); } -unsigned -llvm::DenseMapInfo:: -getHashValue(clang::DeclarationName N) { - return DenseMapInfo::getHashValue(N.getAsOpaquePtr()); -} - DeclarationNameLoc::DeclarationNameLoc(DeclarationName Name) { switch (Name.getNameKind()) { case DeclarationName::Identifier: diff --git a/lib/AST/DumpXML.cpp b/lib/AST/DumpXML.cpp index 4c7cd8a..c1432b5 100644 --- a/lib/AST/DumpXML.cpp +++ b/lib/AST/DumpXML.cpp @@ -326,7 +326,7 @@ struct XMLDumper : public XMLDeclVisitor, } case TemplateArgument::Integral: { push("integer"); - setInteger("value", *A.getAsIntegral()); + setInteger("value", A.getAsIntegral()); completeAttrs(); pop(); break; @@ -778,7 +778,6 @@ struct XMLDumper : public XMLDeclVisitor, // ObjCCategoryDecl void visitObjCCategoryDeclAttrs(ObjCCategoryDecl *D) { setFlag("extension", D->IsClassExtension()); - setFlag("synth_bitfield", D->hasSynthBitfield()); } void visitObjCCategoryDeclChildren(ObjCCategoryDecl *D) { visitDeclRef("interface", D->getClassInterface()); @@ -804,7 +803,6 @@ struct XMLDumper : public XMLDeclVisitor, // ObjCImplementationDecl void visitObjCImplementationDeclAttrs(ObjCImplementationDecl *D) { - setFlag("synth_bitfield", D->hasSynthBitfield()); set("identifier", D->getName()); } void visitObjCImplementationDeclChildren(ObjCImplementationDecl *D) { @@ -973,9 +971,9 @@ struct XMLDumper : public XMLDeclVisitor, } void visitFunctionProtoTypeAttrs(FunctionProtoType *T) { - setFlag("const", T->getTypeQuals() & Qualifiers::Const); - setFlag("volatile", T->getTypeQuals() & Qualifiers::Volatile); - setFlag("restrict", T->getTypeQuals() & Qualifiers::Restrict); + setFlag("const", T->isConst()); + setFlag("volatile", T->isVolatile()); + setFlag("restrict", T->isRestrict()); } void visitFunctionProtoTypeChildren(FunctionProtoType *T) { push("parameters"); @@ -1024,17 +1022,12 @@ struct XMLDumper : public XMLDeclVisitor, }; } -void Decl::dumpXML() const { - dumpXML(llvm::errs()); -} - void Decl::dumpXML(raw_ostream &out) const { XMLDumper(out, getASTContext()).dispatch(const_cast(this)); } #else /* ifndef NDEBUG */ -void Decl::dumpXML() const {} void Decl::dumpXML(raw_ostream &out) const {} #endif diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index fcde542..24361ef 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -33,6 +33,21 @@ #include using namespace clang; +const CXXRecordDecl *Expr::getBestDynamicClassType() const { + const Expr *E = ignoreParenBaseCasts(); + + QualType DerivedType = E->getType(); + if (const PointerType *PTy = DerivedType->getAs()) + DerivedType = PTy->getPointeeType(); + + if (DerivedType->isDependentType()) + return NULL; + + const RecordType *Ty = DerivedType->castAs(); + Decl *D = Ty->getDecl(); + return cast(D); +} + /// isKnownToHaveBooleanValue - Return true if this is an integer expression /// that is known to return 0 or 1. This happens for _Bool/bool expressions /// but also int expressions which are produced by things like comparisons in @@ -196,7 +211,7 @@ static void computeDeclRefDependence(ASTContext &Ctx, NamedDecl *D, QualType T, if ((Ctx.getLangOpts().CPlusPlus0x ? Var->getType()->isLiteralType() : Var->getType()->isIntegralOrEnumerationType()) && - (Var->getType().getCVRQualifiers() == Qualifiers::Const || + (Var->getType().isConstQualified() || Var->getType()->isReferenceType())) { if (const Expr *Init = Var->getAnyInitializer()) if (Init->isValueDependent()) { @@ -414,9 +429,7 @@ std::string PredefinedExpr::ComputeName(IdentType IT, const Decl *CurrentDecl) { if (FT) { for (unsigned i = 0, e = Decl->getNumParams(); i != e; ++i) { if (i) POut << ", "; - std::string Param; - Decl->getParamDecl(i)->getType().getAsStringInternal(Param, Policy); - POut << Param; + POut << Decl->getParamDecl(i)->getType().stream(Policy); } if (FT->isVariadic()) { @@ -427,10 +440,10 @@ std::string PredefinedExpr::ComputeName(IdentType IT, const Decl *CurrentDecl) { POut << ")"; if (const CXXMethodDecl *MD = dyn_cast(FD)) { - Qualifiers ThisQuals = Qualifiers::fromCVRMask(MD->getTypeQualifiers()); - if (ThisQuals.hasConst()) + const FunctionType *FT = cast(MD->getType().getTypePtr()); + if (FT->isConst()) POut << " const"; - if (ThisQuals.hasVolatile()) + if (FT->isVolatile()) POut << " volatile"; RefQualifierKind Ref = MD->getRefQualifier(); if (Ref == RQ_LValue) @@ -545,6 +558,17 @@ void APNumericStorage::setIntValue(ASTContext &C, const llvm::APInt &Val) { VAL = 0; } +IntegerLiteral::IntegerLiteral(ASTContext &C, const llvm::APInt &V, + QualType type, SourceLocation l) + : Expr(IntegerLiteralClass, type, VK_RValue, OK_Ordinary, false, false, + false, false), + Loc(l) { + assert(type->isIntegerType() && "Illegal type in IntegerLiteral"); + assert(V.getBitWidth() == C.getIntWidth(type) && + "Integer type is not the correct size for constant."); + setValue(C, V); +} + IntegerLiteral * IntegerLiteral::Create(ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l) { @@ -556,6 +580,23 @@ IntegerLiteral::Create(ASTContext &C, EmptyShell Empty) { return new (C) IntegerLiteral(Empty); } +FloatingLiteral::FloatingLiteral(ASTContext &C, const llvm::APFloat &V, + bool isexact, QualType Type, SourceLocation L) + : Expr(FloatingLiteralClass, Type, VK_RValue, OK_Ordinary, false, false, + false, false), Loc(L) { + FloatingLiteralBits.IsIEEE = + &C.getTargetInfo().getLongDoubleFormat() == &llvm::APFloat::IEEEquad; + FloatingLiteralBits.IsExact = isexact; + setValue(C, V); +} + +FloatingLiteral::FloatingLiteral(ASTContext &C, EmptyShell Empty) + : Expr(FloatingLiteralClass, Empty) { + FloatingLiteralBits.IsIEEE = + &C.getTargetInfo().getLongDoubleFormat() == &llvm::APFloat::IEEEquad; + FloatingLiteralBits.IsExact = false; +} + FloatingLiteral * FloatingLiteral::Create(ASTContext &C, const llvm::APFloat &V, bool isexact, QualType Type, SourceLocation L) { @@ -635,6 +676,99 @@ StringLiteral *StringLiteral::CreateEmpty(ASTContext &C, unsigned NumStrs) { return SL; } +void StringLiteral::outputString(raw_ostream &OS) { + switch (getKind()) { + case Ascii: break; // no prefix. + case Wide: OS << 'L'; break; + case UTF8: OS << "u8"; break; + case UTF16: OS << 'u'; break; + case UTF32: OS << 'U'; break; + } + OS << '"'; + static const char Hex[] = "0123456789ABCDEF"; + + unsigned LastSlashX = getLength(); + for (unsigned I = 0, N = getLength(); I != N; ++I) { + switch (uint32_t Char = getCodeUnit(I)) { + default: + // FIXME: Convert UTF-8 back to codepoints before rendering. + + // Convert UTF-16 surrogate pairs back to codepoints before rendering. + // Leave invalid surrogates alone; we'll use \x for those. + if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 && + Char <= 0xdbff) { + uint32_t Trail = getCodeUnit(I + 1); + if (Trail >= 0xdc00 && Trail <= 0xdfff) { + Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); + ++I; + } + } + + if (Char > 0xff) { + // If this is a wide string, output characters over 0xff using \x + // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a + // codepoint: use \x escapes for invalid codepoints. + if (getKind() == Wide || + (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { + // FIXME: Is this the best way to print wchar_t? + OS << "\\x"; + int Shift = 28; + while ((Char >> Shift) == 0) + Shift -= 4; + for (/**/; Shift >= 0; Shift -= 4) + OS << Hex[(Char >> Shift) & 15]; + LastSlashX = I; + break; + } + + if (Char > 0xffff) + OS << "\\U00" + << Hex[(Char >> 20) & 15] + << Hex[(Char >> 16) & 15]; + else + OS << "\\u"; + OS << Hex[(Char >> 12) & 15] + << Hex[(Char >> 8) & 15] + << Hex[(Char >> 4) & 15] + << Hex[(Char >> 0) & 15]; + break; + } + + // If we used \x... for the previous character, and this character is a + // hexadecimal digit, prevent it being slurped as part of the \x. + if (LastSlashX + 1 == I) { + switch (Char) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + OS << "\"\""; + } + } + + assert(Char <= 0xff && + "Characters above 0xff should already have been handled."); + + if (isprint(Char)) + OS << (char)Char; + else // Output anything hard as an octal escape. + OS << '\\' + << (char)('0' + ((Char >> 6) & 7)) + << (char)('0' + ((Char >> 3) & 7)) + << (char)('0' + ((Char >> 0) & 7)); + break; + // Handle some common non-printable cases to make dumps prettier. + case '\\': OS << "\\\\"; break; + case '"': OS << "\\\""; break; + case '\n': OS << "\\n"; break; + case '\t': OS << "\\t"; break; + case '\a': OS << "\\a"; break; + case '\b': OS << "\\b"; break; + } + } + OS << '"'; +} + void StringLiteral::setString(ASTContext &C, StringRef Str, StringKind Kind, bool IsPascal) { //FIXME: we assume that the string data comes from a target that uses the same @@ -681,7 +815,8 @@ void StringLiteral::setString(ASTContext &C, StringRef Str, SourceLocation StringLiteral:: getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, const TargetInfo &Target) const { - assert(Kind == StringLiteral::Ascii && "This only works for ASCII strings"); + assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) && + "Only narrow string literals are currently supported"); // Loop over all of the tokens in this string until we find the one that // contains the byte we're looking for. @@ -704,14 +839,9 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM, const char *StrData = Buffer.data()+LocInfo.second; - // Create a langops struct and enable trigraphs. This is sufficient for - // relexing tokens. - LangOptions LangOpts; - LangOpts.Trigraphs = true; - // Create a lexer starting at the beginning of this token. - Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData, - Buffer.end()); + Lexer TheLexer(SM.getLocForStartOfFile(LocInfo.first), Features, + Buffer.begin(), StrData, Buffer.end()); Token TheTok; TheLexer.LexFromRawLexer(TheTok); @@ -1656,8 +1786,9 @@ Stmt *BlockExpr::getBody() { /// be warned about if the result is unused. If so, fill in Loc and Ranges /// with location to warn on and the source range[s] to report with the /// warning. -bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, - SourceRange &R2, ASTContext &Ctx) const { +bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc, + SourceRange &R1, SourceRange &R2, + ASTContext &Ctx) const { // Don't warn if the expr is type dependent. The type could end up // instantiating to void. if (isTypeDependent()) @@ -1667,30 +1798,32 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, default: if (getType()->isVoidType()) return false; + WarnE = this; Loc = getExprLoc(); R1 = getSourceRange(); return true; case ParenExprClass: return cast(this)->getSubExpr()-> - isUnusedResultAWarning(Loc, R1, R2, Ctx); + isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); case GenericSelectionExprClass: return cast(this)->getResultExpr()-> - isUnusedResultAWarning(Loc, R1, R2, Ctx); + isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); case UnaryOperatorClass: { const UnaryOperator *UO = cast(this); switch (UO->getOpcode()) { - default: break; + case UO_Plus: + case UO_Minus: + case UO_AddrOf: + case UO_Not: + case UO_LNot: + case UO_Deref: + break; case UO_PostInc: case UO_PostDec: case UO_PreInc: case UO_PreDec: // ++/-- return false; // Not a warning. - case UO_Deref: - // Dereferencing a volatile pointer is a side-effect. - if (Ctx.getCanonicalType(getType()).isVolatileQualified()) - return false; - break; case UO_Real: case UO_Imag: // accessing a piece of a volatile complex is a side-effect. @@ -1699,8 +1832,9 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, return false; break; case UO_Extension: - return UO->getSubExpr()->isUnusedResultAWarning(Loc, R1, R2, Ctx); + return UO->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); } + WarnE = this; Loc = UO->getOperatorLoc(); R1 = UO->getSubExpr()->getSourceRange(); return true; @@ -1719,17 +1853,18 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, dyn_cast(BO->getRHS()->IgnoreParens())) if (IE->getValue() == 0) return false; - return BO->getRHS()->isUnusedResultAWarning(Loc, R1, R2, Ctx); + return BO->getRHS()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); // Consider '||', '&&' to have side effects if the LHS or RHS does. case BO_LAnd: case BO_LOr: - if (!BO->getLHS()->isUnusedResultAWarning(Loc, R1, R2, Ctx) || - !BO->getRHS()->isUnusedResultAWarning(Loc, R1, R2, Ctx)) + if (!BO->getLHS()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx) || + !BO->getRHS()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx)) return false; break; } if (BO->isAssignmentOp()) return false; + WarnE = this; Loc = BO->getOperatorLoc(); R1 = BO->getLHS()->getSourceRange(); R2 = BO->getRHS()->getSourceRange(); @@ -1745,28 +1880,22 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, // be being used for control flow. Only warn if both the LHS and // RHS are warnings. const ConditionalOperator *Exp = cast(this); - if (!Exp->getRHS()->isUnusedResultAWarning(Loc, R1, R2, Ctx)) + if (!Exp->getRHS()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx)) return false; if (!Exp->getLHS()) return true; - return Exp->getLHS()->isUnusedResultAWarning(Loc, R1, R2, Ctx); + return Exp->getLHS()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); } case MemberExprClass: - // If the base pointer or element is to a volatile pointer/field, accessing - // it is a side effect. - if (Ctx.getCanonicalType(getType()).isVolatileQualified()) - return false; + WarnE = this; Loc = cast(this)->getMemberLoc(); R1 = SourceRange(Loc, Loc); R2 = cast(this)->getBase()->getSourceRange(); return true; case ArraySubscriptExprClass: - // If the base pointer or element is to a volatile pointer/field, accessing - // it is a side effect. - if (Ctx.getCanonicalType(getType()).isVolatileQualified()) - return false; + WarnE = this; Loc = cast(this)->getRBracketLoc(); R1 = cast(this)->getLHS()->getSourceRange(); R2 = cast(this)->getRHS()->getSourceRange(); @@ -1782,6 +1911,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, const CXXOperatorCallExpr *Op = cast(this); if (Op->getOperator() == OO_EqualEqual || Op->getOperator() == OO_ExclaimEqual) { + WarnE = this; Loc = Op->getOperatorLoc(); R1 = Op->getSourceRange(); return true; @@ -1802,6 +1932,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, // updated to match for QoI. if (FD->getAttr() || FD->getAttr() || FD->getAttr()) { + WarnE = this; Loc = CE->getCallee()->getLocStart(); R1 = CE->getCallee()->getSourceRange(); @@ -1826,6 +1957,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, ME->getSelector().getIdentifierInfoForSlot(0) && ME->getSelector().getIdentifierInfoForSlot(0) ->getName().startswith("init")) { + WarnE = this; Loc = getExprLoc(); R1 = ME->getSourceRange(); return true; @@ -1833,6 +1965,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, const ObjCMethodDecl *MD = ME->getMethodDecl(); if (MD && MD->getAttr()) { + WarnE = this; Loc = getExprLoc(); return true; } @@ -1840,6 +1973,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, } case ObjCPropertyRefExprClass: + WarnE = this; Loc = getExprLoc(); R1 = getSourceRange(); return true; @@ -1852,6 +1986,7 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, isa(PO->getSyntacticForm())) return false; + WarnE = this; Loc = getExprLoc(); R1 = getSourceRange(); return true; @@ -1866,50 +2001,67 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, const CompoundStmt *CS = cast(this)->getSubStmt(); if (!CS->body_empty()) { if (const Expr *E = dyn_cast(CS->body_back())) - return E->isUnusedResultAWarning(Loc, R1, R2, Ctx); + return E->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); if (const LabelStmt *Label = dyn_cast(CS->body_back())) if (const Expr *E = dyn_cast(Label->getSubStmt())) - return E->isUnusedResultAWarning(Loc, R1, R2, Ctx); + return E->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); } if (getType()->isVoidType()) return false; + WarnE = this; Loc = cast(this)->getLParenLoc(); R1 = getSourceRange(); return true; } - case CStyleCastExprClass: - // If this is an explicit cast to void, allow it. People do this when they - // think they know what they're doing :). - if (getType()->isVoidType()) - return false; - Loc = cast(this)->getLParenLoc(); - R1 = cast(this)->getSubExpr()->getSourceRange(); - return true; - case CXXFunctionalCastExprClass: { - if (getType()->isVoidType()) - return false; + case CStyleCastExprClass: { + // Ignore an explicit cast to void unless the operand is a non-trivial + // volatile lvalue. const CastExpr *CE = cast(this); - - // If this is a cast to void or a constructor conversion, check the operand. + if (CE->getCastKind() == CK_ToVoid) { + if (CE->getSubExpr()->isGLValue() && + CE->getSubExpr()->getType().isVolatileQualified()) { + const DeclRefExpr *DRE = + dyn_cast(CE->getSubExpr()->IgnoreParens()); + if (!(DRE && isa(DRE->getDecl()) && + cast(DRE->getDecl())->hasLocalStorage())) { + return CE->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, + R1, R2, Ctx); + } + } + return false; + } + + // If this is a cast to a constructor conversion, check the operand. // Otherwise, the result of the cast is unused. - if (CE->getCastKind() == CK_ToVoid || - CE->getCastKind() == CK_ConstructorConversion) - return (cast(this)->getSubExpr() - ->isUnusedResultAWarning(Loc, R1, R2, Ctx)); - Loc = cast(this)->getTypeBeginLoc(); - R1 = cast(this)->getSubExpr()->getSourceRange(); + if (CE->getCastKind() == CK_ConstructorConversion) + return CE->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); + + WarnE = this; + if (const CXXFunctionalCastExpr *CXXCE = + dyn_cast(this)) { + Loc = CXXCE->getTypeBeginLoc(); + R1 = CXXCE->getSubExpr()->getSourceRange(); + } else { + const CStyleCastExpr *CStyleCE = cast(this); + Loc = CStyleCE->getLParenLoc(); + R1 = CStyleCE->getSubExpr()->getSourceRange(); + } return true; } + case ImplicitCastExprClass: { + const CastExpr *ICE = cast(this); - case ImplicitCastExprClass: - // Check the operand, since implicit casts are inserted by Sema - return (cast(this) - ->getSubExpr()->isUnusedResultAWarning(Loc, R1, R2, Ctx)); + // lvalue-to-rvalue conversion on a volatile lvalue is a side-effect. + if (ICE->getCastKind() == CK_LValueToRValue && + ICE->getSubExpr()->getType().isVolatileQualified()) + return false; + return ICE->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx); + } case CXXDefaultArgExprClass: return (cast(this) - ->getExpr()->isUnusedResultAWarning(Loc, R1, R2, Ctx)); + ->getExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx)); case CXXNewExprClass: // FIXME: In theory, there might be new expressions that don't have side @@ -1918,10 +2070,10 @@ bool Expr::isUnusedResultAWarning(SourceLocation &Loc, SourceRange &R1, return false; case CXXBindTemporaryExprClass: return (cast(this) - ->getSubExpr()->isUnusedResultAWarning(Loc, R1, R2, Ctx)); + ->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx)); case ExprWithCleanupsClass: return (cast(this) - ->getSubExpr()->isUnusedResultAWarning(Loc, R1, R2, Ctx)); + ->getSubExpr()->isUnusedResultAWarning(WarnE, Loc, R1, R2, Ctx)); } } @@ -2096,7 +2248,27 @@ Expr *Expr::IgnoreParenLValueCasts() { } return E; } - + +Expr *Expr::ignoreParenBaseCasts() { + Expr *E = this; + while (true) { + if (ParenExpr *P = dyn_cast(E)) { + E = P->getSubExpr(); + continue; + } + if (CastExpr *CE = dyn_cast(E)) { + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) { + E = CE->getSubExpr(); + continue; + } + } + + return E; + } +} + Expr *Expr::IgnoreParenImpCasts() { Expr *E = this; while (true) { @@ -2267,6 +2439,10 @@ bool Expr::isTemporaryObject(ASTContext &C, const CXXRecordDecl *TempTy) const { if (isa(E)) return false; + if (const BinaryOperator *BO = dyn_cast(E)) + if (BO->isPtrMemOp()) + return false; + // - opaque values (all) if (isa(E)) return false; @@ -2446,6 +2622,207 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef) const { return isEvaluatable(Ctx); } +bool Expr::HasSideEffects(const ASTContext &Ctx) const { + if (isInstantiationDependent()) + return true; + + switch (getStmtClass()) { + case NoStmtClass: + #define ABSTRACT_STMT(Type) + #define STMT(Type, Base) case Type##Class: + #define EXPR(Type, Base) + #include "clang/AST/StmtNodes.inc" + llvm_unreachable("unexpected Expr kind"); + + case DependentScopeDeclRefExprClass: + case CXXUnresolvedConstructExprClass: + case CXXDependentScopeMemberExprClass: + case UnresolvedLookupExprClass: + case UnresolvedMemberExprClass: + case PackExpansionExprClass: + case SubstNonTypeTemplateParmPackExprClass: + llvm_unreachable("shouldn't see dependent / unresolved nodes here"); + + case DeclRefExprClass: + case ObjCIvarRefExprClass: + case PredefinedExprClass: + case IntegerLiteralClass: + case FloatingLiteralClass: + case ImaginaryLiteralClass: + case StringLiteralClass: + case CharacterLiteralClass: + case OffsetOfExprClass: + case ImplicitValueInitExprClass: + case UnaryExprOrTypeTraitExprClass: + case AddrLabelExprClass: + case GNUNullExprClass: + case CXXBoolLiteralExprClass: + case CXXNullPtrLiteralExprClass: + case CXXThisExprClass: + case CXXScalarValueInitExprClass: + case TypeTraitExprClass: + case UnaryTypeTraitExprClass: + case BinaryTypeTraitExprClass: + case ArrayTypeTraitExprClass: + case ExpressionTraitExprClass: + case CXXNoexceptExprClass: + case SizeOfPackExprClass: + case ObjCStringLiteralClass: + case ObjCEncodeExprClass: + case ObjCBoolLiteralExprClass: + case CXXUuidofExprClass: + case OpaqueValueExprClass: + // These never have a side-effect. + return false; + + case CallExprClass: + case CompoundAssignOperatorClass: + case VAArgExprClass: + case AtomicExprClass: + case StmtExprClass: + case CXXOperatorCallExprClass: + case CXXMemberCallExprClass: + case UserDefinedLiteralClass: + case CXXThrowExprClass: + case CXXNewExprClass: + case CXXDeleteExprClass: + case ExprWithCleanupsClass: + case CXXBindTemporaryExprClass: + case BlockExprClass: + case CUDAKernelCallExprClass: + // These always have a side-effect. + return true; + + case ParenExprClass: + case ArraySubscriptExprClass: + case MemberExprClass: + case ConditionalOperatorClass: + case BinaryConditionalOperatorClass: + case CompoundLiteralExprClass: + case ExtVectorElementExprClass: + case DesignatedInitExprClass: + case ParenListExprClass: + case CXXPseudoDestructorExprClass: + case SubstNonTypeTemplateParmExprClass: + case MaterializeTemporaryExprClass: + case ShuffleVectorExprClass: + case AsTypeExprClass: + // These have a side-effect if any subexpression does. + break; + + case UnaryOperatorClass: + if (cast(this)->isIncrementDecrementOp()) + return true; + break; + + case BinaryOperatorClass: + if (cast(this)->isAssignmentOp()) + return true; + break; + + case InitListExprClass: + // FIXME: The children for an InitListExpr doesn't include the array filler. + if (const Expr *E = cast(this)->getArrayFiller()) + if (E->HasSideEffects(Ctx)) + return true; + break; + + case GenericSelectionExprClass: + return cast(this)->getResultExpr()-> + HasSideEffects(Ctx); + + case ChooseExprClass: + return cast(this)->getChosenSubExpr(Ctx)->HasSideEffects(Ctx); + + case CXXDefaultArgExprClass: + return cast(this)->getExpr()->HasSideEffects(Ctx); + + case CXXDynamicCastExprClass: { + // A dynamic_cast expression has side-effects if it can throw. + const CXXDynamicCastExpr *DCE = cast(this); + if (DCE->getTypeAsWritten()->isReferenceType() && + DCE->getCastKind() == CK_Dynamic) + return true; + } // Fall through. + case ImplicitCastExprClass: + case CStyleCastExprClass: + case CXXStaticCastExprClass: + case CXXReinterpretCastExprClass: + case CXXConstCastExprClass: + case CXXFunctionalCastExprClass: { + const CastExpr *CE = cast(this); + if (CE->getCastKind() == CK_LValueToRValue && + CE->getSubExpr()->getType().isVolatileQualified()) + return true; + break; + } + + case CXXTypeidExprClass: + // typeid might throw if its subexpression is potentially-evaluated, so has + // side-effects in that case whether or not its subexpression does. + return cast(this)->isPotentiallyEvaluated(); + + case CXXConstructExprClass: + case CXXTemporaryObjectExprClass: { + const CXXConstructExpr *CE = cast(this); + if (!CE->getConstructor()->isTrivial()) + return true; + // A trivial constructor does not add any side-effects of its own. Just look + // at its arguments. + break; + } + + case LambdaExprClass: { + const LambdaExpr *LE = cast(this); + for (LambdaExpr::capture_iterator I = LE->capture_begin(), + E = LE->capture_end(); I != E; ++I) + if (I->getCaptureKind() == LCK_ByCopy) + // FIXME: Only has a side-effect if the variable is volatile or if + // the copy would invoke a non-trivial copy constructor. + return true; + return false; + } + + case PseudoObjectExprClass: { + // Only look for side-effects in the semantic form, and look past + // OpaqueValueExpr bindings in that form. + const PseudoObjectExpr *PO = cast(this); + for (PseudoObjectExpr::const_semantics_iterator I = PO->semantics_begin(), + E = PO->semantics_end(); + I != E; ++I) { + const Expr *Subexpr = *I; + if (const OpaqueValueExpr *OVE = dyn_cast(Subexpr)) + Subexpr = OVE->getSourceExpr(); + if (Subexpr->HasSideEffects(Ctx)) + return true; + } + return false; + } + + case ObjCBoxedExprClass: + case ObjCArrayLiteralClass: + case ObjCDictionaryLiteralClass: + case ObjCMessageExprClass: + case ObjCSelectorExprClass: + case ObjCProtocolExprClass: + case ObjCPropertyRefExprClass: + case ObjCIsaExprClass: + case ObjCIndirectCopyRestoreExprClass: + case ObjCSubscriptRefExprClass: + case ObjCBridgedCastExprClass: + // FIXME: Classify these cases better. + return true; + } + + // Recurse to children. + for (const_child_range SubStmts = children(); SubStmts; ++SubStmts) + if (const Stmt *S = *SubStmts) + if (cast(S)->HasSideEffects(Ctx)) + return true; + + return false; +} + namespace { /// \brief Look for a call to a non-trivial function within an expression. class NonTrivialCallFinder : public EvaluatedExprVisitor @@ -2514,7 +2891,7 @@ Expr::isNullPointerConstant(ASTContext &Ctx, llvm_unreachable("Unexpected value dependent expression!"); case NPC_ValueDependentIsNull: if (isTypeDependent() || getType()->isIntegralType(Ctx)) - return NPCK_ZeroInteger; + return NPCK_ZeroExpression; else return NPCK_NotNull; @@ -2588,7 +2965,12 @@ Expr::isNullPointerConstant(ASTContext &Ctx, return NPCK_NotNull; } - return (EvaluateKnownConstInt(Ctx) == 0) ? NPCK_ZeroInteger : NPCK_NotNull; + if (EvaluateKnownConstInt(Ctx) != 0) + return NPCK_NotNull; + + if (isa(this)) + return NPCK_ZeroLiteral; + return NPCK_ZeroExpression; } /// \brief If this expression is an l-value for an Objective C diff --git a/lib/AST/ExprCXX.cpp b/lib/AST/ExprCXX.cpp index 8cf519c..3fa49e0 100644 --- a/lib/AST/ExprCXX.cpp +++ b/lib/AST/ExprCXX.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/IdentifierTable.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" @@ -23,6 +24,21 @@ using namespace clang; // Child Iterators for iterating over subexpressions/substatements //===----------------------------------------------------------------------===// +bool CXXTypeidExpr::isPotentiallyEvaluated() const { + if (isTypeOperand()) + return false; + + // C++11 [expr.typeid]p3: + // When typeid is applied to an expression other than a glvalue of + // polymorphic class type, [...] the expression is an unevaluated operand. + const Expr *E = getExprOperand(); + if (const CXXRecordDecl *RD = E->getType()->getAsCXXRecordDecl()) + if (RD->isPolymorphic() && E->isGLValue()) + return true; + + return false; +} + QualType CXXTypeidExpr::getTypeOperand() const { assert(isTypeOperand() && "Cannot call getTypeOperand for typeid(expr)"); return Operand.get()->getType().getNonReferenceType() @@ -126,13 +142,6 @@ SourceLocation CXXNewExpr::getEndLoc() const { // CXXDeleteExpr QualType CXXDeleteExpr::getDestroyedType() const { const Expr *Arg = getArgument(); - while (const ImplicitCastExpr *ICE = dyn_cast(Arg)) { - if (ICE->getCastKind() != CK_UserDefinedConversion && - ICE->getType()->isVoidPointerType()) - Arg = ICE->getSubExpr(); - else - break; - } // The type-to-delete may not be a pointer if it's a dependent type. const QualType ArgType = Arg->getType(); @@ -268,6 +277,7 @@ OverloadExpr::OverloadExpr(StmtClass K, ASTContext &C, isa(*I)) { ExprBits.TypeDependent = true; ExprBits.ValueDependent = true; + ExprBits.InstantiationDependent = true; } } @@ -415,37 +425,37 @@ SourceRange CXXConstructExpr::getSourceRange() const { return SourceRange(Loc, End); } -SourceRange CXXOperatorCallExpr::getSourceRange() const { +SourceRange CXXOperatorCallExpr::getSourceRangeImpl() const { OverloadedOperatorKind Kind = getOperator(); if (Kind == OO_PlusPlus || Kind == OO_MinusMinus) { if (getNumArgs() == 1) // Prefix operator - return SourceRange(getOperatorLoc(), - getArg(0)->getSourceRange().getEnd()); + return SourceRange(getOperatorLoc(), getArg(0)->getLocEnd()); else // Postfix operator - return SourceRange(getArg(0)->getSourceRange().getBegin(), - getOperatorLoc()); + return SourceRange(getArg(0)->getLocStart(), getOperatorLoc()); } else if (Kind == OO_Arrow) { return getArg(0)->getSourceRange(); } else if (Kind == OO_Call) { - return SourceRange(getArg(0)->getSourceRange().getBegin(), getRParenLoc()); + return SourceRange(getArg(0)->getLocStart(), getRParenLoc()); } else if (Kind == OO_Subscript) { - return SourceRange(getArg(0)->getSourceRange().getBegin(), getRParenLoc()); + return SourceRange(getArg(0)->getLocStart(), getRParenLoc()); } else if (getNumArgs() == 1) { - return SourceRange(getOperatorLoc(), getArg(0)->getSourceRange().getEnd()); + return SourceRange(getOperatorLoc(), getArg(0)->getLocEnd()); } else if (getNumArgs() == 2) { - return SourceRange(getArg(0)->getSourceRange().getBegin(), - getArg(1)->getSourceRange().getEnd()); + return SourceRange(getArg(0)->getLocStart(), getArg(1)->getLocEnd()); } else { - return SourceRange(); + return getOperatorLoc(); } } Expr *CXXMemberCallExpr::getImplicitObjectArgument() const { - if (const MemberExpr *MemExpr = - dyn_cast(getCallee()->IgnoreParens())) + const Expr *Callee = getCallee()->IgnoreParens(); + if (const MemberExpr *MemExpr = dyn_cast(Callee)) return MemExpr->getBase(); + if (const BinaryOperator *BO = dyn_cast(Callee)) + if (BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI) + return BO->getLHS(); // FIXME: Will eventually need to cope with member pointers. return 0; @@ -461,7 +471,7 @@ CXXMethodDecl *CXXMemberCallExpr::getMethodDecl() const { } -CXXRecordDecl *CXXMemberCallExpr::getRecordDecl() { +CXXRecordDecl *CXXMemberCallExpr::getRecordDecl() const { Expr* ThisArg = getImplicitObjectArgument(); if (!ThisArg) return 0; @@ -556,6 +566,9 @@ bool CXXDynamicCastExpr::isAlwaysNull() const DestType = DestType->castAs()->getPointeeType(); } + if (DestType->isVoidType()) + return false; + const CXXRecordDecl *SrcRD = cast(SrcType->castAs()->getDecl()); @@ -796,10 +809,11 @@ LambdaExpr::LambdaExpr(QualType T, ArrayRef CaptureInits, ArrayRef ArrayIndexVars, ArrayRef ArrayIndexStarts, - SourceLocation ClosingBrace) + SourceLocation ClosingBrace, + bool ContainsUnexpandedParameterPack) : Expr(LambdaExprClass, T, VK_RValue, OK_Ordinary, T->isDependentType(), T->isDependentType(), T->isDependentType(), - /*ContainsUnexpandedParameterPack=*/false), + ContainsUnexpandedParameterPack), IntroducerRange(IntroducerRange), NumCaptures(Captures.size()), CaptureDefault(CaptureDefault), @@ -856,7 +870,8 @@ LambdaExpr *LambdaExpr::Create(ASTContext &Context, ArrayRef CaptureInits, ArrayRef ArrayIndexVars, ArrayRef ArrayIndexStarts, - SourceLocation ClosingBrace) { + SourceLocation ClosingBrace, + bool ContainsUnexpandedParameterPack) { // Determine the type of the expression (i.e., the type of the // function object we're creating). QualType T = Context.getTypeDeclType(Class); @@ -869,7 +884,7 @@ LambdaExpr *LambdaExpr::Create(ASTContext &Context, return new (Mem) LambdaExpr(T, IntroducerRange, CaptureDefault, Captures, ExplicitParams, ExplicitResultType, CaptureInits, ArrayIndexVars, ArrayIndexStarts, - ClosingBrace); + ClosingBrace, ContainsUnexpandedParameterPack); } LambdaExpr *LambdaExpr::CreateDeserialized(ASTContext &C, unsigned NumCaptures, @@ -944,7 +959,7 @@ CompoundStmt *LambdaExpr::getBody() const { } bool LambdaExpr::isMutable() const { - return (getCallOperator()->getTypeQualifiers() & Qualifiers::Const) == 0; + return !getCallOperator()->isConst(); } ExprWithCleanups::ExprWithCleanups(Expr *subexpr, diff --git a/lib/AST/ExprClassification.cpp b/lib/AST/ExprClassification.cpp index b091e19..f16d70b 100644 --- a/lib/AST/ExprClassification.cpp +++ b/lib/AST/ExprClassification.cpp @@ -77,6 +77,7 @@ Cl Expr::ClassifyImpl(ASTContext &Ctx, SourceLocation *Loc) const { case Cl::CL_MemberFunction: case Cl::CL_SubObjCPropertySetting: case Cl::CL_ClassTemporary: + case Cl::CL_ArrayTemporary: case Cl::CL_ObjCMessageRValue: case Cl::CL_PRValue: assert(getValueKind() == VK_RValue); break; } @@ -87,6 +88,18 @@ Cl Expr::ClassifyImpl(ASTContext &Ctx, SourceLocation *Loc) const { return Classification(kind, modifiable); } +/// Classify an expression which creates a temporary, based on its type. +static Cl::Kinds ClassifyTemporary(QualType T) { + if (T->isRecordType()) + return Cl::CL_ClassTemporary; + if (T->isArrayType()) + return Cl::CL_ArrayTemporary; + + // No special classification: these don't behave differently from normal + // prvalues. + return Cl::CL_PRValue; +} + static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { // This function takes the first stab at classifying expressions. const LangOptions &Lang = Ctx.getLangOpts(); @@ -124,10 +137,10 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { return Cl::CL_LValue; // C99 6.5.2.5p5 says that compound literals are lvalues. - // In C++, they're class temporaries. + // In C++, they're prvalue temporaries. case Expr::CompoundLiteralExprClass: - return Ctx.getLangOpts().CPlusPlus? Cl::CL_ClassTemporary - : Cl::CL_LValue; + return Ctx.getLangOpts().CPlusPlus ? ClassifyTemporary(E->getType()) + : Cl::CL_LValue; // Expressions that are prvalues. case Expr::CXXBoolLiteralExprClass: @@ -158,7 +171,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::ObjCSelectorExprClass: case Expr::ObjCProtocolExprClass: case Expr::ObjCStringLiteralClass: - case Expr::ObjCNumericLiteralClass: + case Expr::ObjCBoxedExprClass: case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: case Expr::ObjCBoolLiteralExprClass: @@ -417,7 +430,7 @@ static Cl::Kinds ClassifyUnnamed(ASTContext &Ctx, QualType T) { return Cl::CL_LValue; const RValueReferenceType *RV = T->getAs(); if (!RV) // Could still be a class temporary, though. - return T->isRecordType() ? Cl::CL_ClassTemporary : Cl::CL_PRValue; + return ClassifyTemporary(T); return RV->getPointeeType()->isFunctionType() ? Cl::CL_LValue : Cl::CL_XValue; } @@ -602,6 +615,7 @@ Expr::LValueClassification Expr::ClassifyLValue(ASTContext &Ctx) const { case Cl::CL_MemberFunction: return LV_MemberFunction; case Cl::CL_SubObjCPropertySetting: return LV_SubObjCPropertySetting; case Cl::CL_ClassTemporary: return LV_ClassTemporary; + case Cl::CL_ArrayTemporary: return LV_ArrayTemporary; case Cl::CL_ObjCMessageRValue: return LV_InvalidMessageExpression; case Cl::CL_PRValue: return LV_InvalidExpression; } @@ -622,6 +636,7 @@ Expr::isModifiableLvalue(ASTContext &Ctx, SourceLocation *Loc) const { case Cl::CL_MemberFunction: return MLV_MemberFunction; case Cl::CL_SubObjCPropertySetting: return MLV_SubObjCPropertySetting; case Cl::CL_ClassTemporary: return MLV_ClassTemporary; + case Cl::CL_ArrayTemporary: return MLV_ArrayTemporary; case Cl::CL_ObjCMessageRValue: return MLV_InvalidMessageExpression; case Cl::CL_PRValue: return VC.getModifiable() == Cl::CM_LValueCast ? diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp index 66a88b0..06c41a2 100644 --- a/lib/AST/ExprConstant.cpp +++ b/lib/AST/ExprConstant.cpp @@ -287,7 +287,9 @@ namespace { /// parameters' function scope indices. const APValue *Arguments; - typedef llvm::DenseMap MapTy; + // Note that we intentionally use std::map here so that references to + // values are stable. + typedef std::map MapTy; typedef MapTy::const_iterator temp_iterator; /// Temporaries - Temporary lvalues materialized within this stack frame. MapTy Temporaries; @@ -361,11 +363,6 @@ namespace { /// NextCallIndex - The next call index to assign. unsigned NextCallIndex; - typedef llvm::DenseMap MapTy; - /// OpaqueValues - Values used as the common expression in a - /// BinaryConditionalOperator. - MapTy OpaqueValues; - /// BottomFrame - The frame in which evaluation started. This must be /// initialized after CurrentCall and CallStackDepth. CallStackFrame BottomFrame; @@ -394,12 +391,6 @@ namespace { EvaluatingDecl(0), EvaluatingDeclValue(0), HasActiveDiagnostic(false), CheckingPotentialConstantExpression(false) {} - const APValue *getOpaqueValue(const OpaqueValueExpr *e) const { - MapTy::const_iterator i = OpaqueValues.find(e); - if (i == OpaqueValues.end()) return 0; - return &i->second; - } - void setEvaluatingDecl(const VarDecl *VD, APValue &Value) { EvaluatingDecl = VD; EvaluatingDeclValue = &Value; @@ -1072,8 +1063,8 @@ static bool CheckConstantExpression(EvalInfo &Info, SourceLocation DiagLoc, } for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I) { - if (!CheckConstantExpression(Info, DiagLoc, (*I)->getType(), - Value.getStructField((*I)->getFieldIndex()))) + if (!CheckConstantExpression(Info, DiagLoc, I->getType(), + Value.getStructField(I->getFieldIndex()))) return false; } } @@ -1160,11 +1151,10 @@ static bool EvaluateAsBooleanCondition(const Expr *E, bool &Result, } template -static bool HandleOverflow(EvalInfo &Info, const Expr *E, +static void HandleOverflow(EvalInfo &Info, const Expr *E, const T &SrcValue, QualType DestType) { - Info.Diag(E, diag::note_constexpr_overflow) + Info.CCEDiag(E, diag::note_constexpr_overflow) << SrcValue << DestType; - return false; } static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E, @@ -1178,7 +1168,7 @@ static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E, bool ignored; if (Value.convertToInteger(Result, llvm::APFloat::rmTowardZero, &ignored) & APFloat::opInvalidOp) - return HandleOverflow(Info, E, Value, DestType); + HandleOverflow(Info, E, Value, DestType); return true; } @@ -1190,7 +1180,7 @@ static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E, if (Result.convert(Info.Ctx.getFloatTypeSemantics(DestType), APFloat::rmNearestTiesToEven, &ignored) & APFloat::opOverflow) - return HandleOverflow(Info, E, Value, DestType); + HandleOverflow(Info, E, Value, DestType); return true; } @@ -1213,7 +1203,7 @@ static bool HandleIntToFloatCast(EvalInfo &Info, const Expr *E, if (Result.convertFromAPInt(Value, Value.isSigned(), APFloat::rmNearestTiesToEven) & APFloat::opOverflow) - return HandleOverflow(Info, E, Value, DestType); + HandleOverflow(Info, E, Value, DestType); return true; } @@ -1282,6 +1272,7 @@ static bool CastToDerivedClass(EvalInfo &Info, const Expr *E, LValue &Result, // Truncate the path to the subobject, and remove any derived-to-base offsets. const RecordDecl *RD = TruncatedType; for (unsigned I = TruncatedElements, N = D.Entries.size(); I != N; ++I) { + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); const CXXRecordDecl *Base = getAsBaseClass(D.Entries[I]); if (isVirtualBaseClass(D.Entries[I])) @@ -1294,13 +1285,18 @@ static bool CastToDerivedClass(EvalInfo &Info, const Expr *E, LValue &Result, return true; } -static void HandleLValueDirectBase(EvalInfo &Info, const Expr *E, LValue &Obj, +static bool HandleLValueDirectBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXRecordDecl *Derived, const CXXRecordDecl *Base, const ASTRecordLayout *RL = 0) { - if (!RL) RL = &Info.Ctx.getASTRecordLayout(Derived); + if (!RL) { + if (Derived->isInvalidDecl()) return false; + RL = &Info.Ctx.getASTRecordLayout(Derived); + } + Obj.getLValueOffset() += RL->getBaseClassOffset(Base); Obj.addDecl(Info, E, Base, /*Virtual*/ false); + return true; } static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, @@ -1308,10 +1304,8 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, const CXXBaseSpecifier *Base) { const CXXRecordDecl *BaseDecl = Base->getType()->getAsCXXRecordDecl(); - if (!Base->isVirtual()) { - HandleLValueDirectBase(Info, E, Obj, DerivedDecl, BaseDecl); - return true; - } + if (!Base->isVirtual()) + return HandleLValueDirectBase(Info, E, Obj, DerivedDecl, BaseDecl); SubobjectDesignator &D = Obj.Designator; if (D.Invalid) @@ -1323,6 +1317,7 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, return false; // Find the virtual base class. + if (DerivedDecl->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(DerivedDecl); Obj.getLValueOffset() += Layout.getVBaseClassOffset(BaseDecl); Obj.addDecl(Info, E, BaseDecl, /*Virtual*/ true); @@ -1331,24 +1326,29 @@ static bool HandleLValueBase(EvalInfo &Info, const Expr *E, LValue &Obj, /// Update LVal to refer to the given field, which must be a member of the type /// currently described by LVal. -static void HandleLValueMember(EvalInfo &Info, const Expr *E, LValue &LVal, +static bool HandleLValueMember(EvalInfo &Info, const Expr *E, LValue &LVal, const FieldDecl *FD, const ASTRecordLayout *RL = 0) { - if (!RL) + if (!RL) { + if (FD->getParent()->isInvalidDecl()) return false; RL = &Info.Ctx.getASTRecordLayout(FD->getParent()); + } unsigned I = FD->getFieldIndex(); LVal.Offset += Info.Ctx.toCharUnitsFromBits(RL->getFieldOffset(I)); LVal.addDecl(Info, E, FD); + return true; } /// Update LVal to refer to the given indirect field. -static void HandleLValueIndirectMember(EvalInfo &Info, const Expr *E, +static bool HandleLValueIndirectMember(EvalInfo &Info, const Expr *E, LValue &LVal, const IndirectFieldDecl *IFD) { for (IndirectFieldDecl::chain_iterator C = IFD->chain_begin(), CE = IFD->chain_end(); C != CE; ++C) - HandleLValueMember(Info, E, LVal, cast(*C)); + if (!HandleLValueMember(Info, E, LVal, cast(*C))) + return false; + return true; } /// Get the size of the given type in char units. @@ -1952,22 +1952,27 @@ static const ValueDecl *HandleMemberPointerAccess(EvalInfo &Info, // The first class in the path is that of the lvalue. for (unsigned I = 1, N = MemPtr.Path.size(); I != N; ++I) { const CXXRecordDecl *Base = MemPtr.Path[N - I - 1]; - HandleLValueDirectBase(Info, BO, LV, RD, Base); + if (!HandleLValueDirectBase(Info, BO, LV, RD, Base)) + return 0; RD = Base; } // Finally cast to the class containing the member. - HandleLValueDirectBase(Info, BO, LV, RD, MemPtr.getContainingRecord()); + if (!HandleLValueDirectBase(Info, BO, LV, RD, MemPtr.getContainingRecord())) + return 0; } // Add the member. Note that we cannot build bound member functions here. if (IncludeMember) { - if (const FieldDecl *FD = dyn_cast(MemPtr.getDecl())) - HandleLValueMember(Info, BO, LV, FD); - else if (const IndirectFieldDecl *IFD = - dyn_cast(MemPtr.getDecl())) - HandleLValueIndirectMember(Info, BO, LV, IFD); - else + if (const FieldDecl *FD = dyn_cast(MemPtr.getDecl())) { + if (!HandleLValueMember(Info, BO, LV, FD)) + return 0; + } else if (const IndirectFieldDecl *IFD = + dyn_cast(MemPtr.getDecl())) { + if (!HandleLValueIndirectMember(Info, BO, LV, IFD)) + return 0; + } else { llvm_unreachable("can't construct reference to bound member function"); + } } return MemPtr.getDecl(); @@ -2189,6 +2194,7 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This, Result = APValue(APValue::UninitStruct(), RD->getNumBases(), std::distance(RD->field_begin(), RD->field_end())); + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); bool Success = true; @@ -2212,11 +2218,13 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This, "base class initializers not in expected order"); ++BaseIt; #endif - HandleLValueDirectBase(Info, (*I)->getInit(), Subobject, RD, - BaseType->getAsCXXRecordDecl(), &Layout); + if (!HandleLValueDirectBase(Info, (*I)->getInit(), Subobject, RD, + BaseType->getAsCXXRecordDecl(), &Layout)) + return false; Value = &Result.getStructBase(BasesSeen++); } else if (FieldDecl *FD = (*I)->getMember()) { - HandleLValueMember(Info, (*I)->getInit(), Subobject, FD, &Layout); + if (!HandleLValueMember(Info, (*I)->getInit(), Subobject, FD, &Layout)) + return false; if (RD->isUnion()) { Result = APValue(FD); Value = &Result.getUnionValue(); @@ -2244,7 +2252,8 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This, *Value = APValue(APValue::UninitStruct(), CD->getNumBases(), std::distance(CD->field_begin(), CD->field_end())); } - HandleLValueMember(Info, (*I)->getInit(), Subobject, FD); + if (!HandleLValueMember(Info, (*I)->getInit(), Subobject, FD)) + return false; if (CD->isUnion()) Value = &Value->getUnionValue(); else @@ -2268,107 +2277,6 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This, return Success; } -namespace { -class HasSideEffect - : public ConstStmtVisitor { - const ASTContext &Ctx; -public: - - HasSideEffect(const ASTContext &C) : Ctx(C) {} - - // Unhandled nodes conservatively default to having side effects. - bool VisitStmt(const Stmt *S) { - return true; - } - - bool VisitParenExpr(const ParenExpr *E) { return Visit(E->getSubExpr()); } - bool VisitGenericSelectionExpr(const GenericSelectionExpr *E) { - return Visit(E->getResultExpr()); - } - bool VisitDeclRefExpr(const DeclRefExpr *E) { - if (Ctx.getCanonicalType(E->getType()).isVolatileQualified()) - return true; - return false; - } - bool VisitObjCIvarRefExpr(const ObjCIvarRefExpr *E) { - if (Ctx.getCanonicalType(E->getType()).isVolatileQualified()) - return true; - return false; - } - - // We don't want to evaluate BlockExprs multiple times, as they generate - // a ton of code. - bool VisitBlockExpr(const BlockExpr *E) { return true; } - bool VisitPredefinedExpr(const PredefinedExpr *E) { return false; } - bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) - { return Visit(E->getInitializer()); } - bool VisitMemberExpr(const MemberExpr *E) { return Visit(E->getBase()); } - bool VisitIntegerLiteral(const IntegerLiteral *E) { return false; } - bool VisitFloatingLiteral(const FloatingLiteral *E) { return false; } - bool VisitStringLiteral(const StringLiteral *E) { return false; } - bool VisitCharacterLiteral(const CharacterLiteral *E) { return false; } - bool VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *E) - { return false; } - bool VisitArraySubscriptExpr(const ArraySubscriptExpr *E) - { return Visit(E->getLHS()) || Visit(E->getRHS()); } - bool VisitChooseExpr(const ChooseExpr *E) - { return Visit(E->getChosenSubExpr(Ctx)); } - bool VisitCastExpr(const CastExpr *E) { return Visit(E->getSubExpr()); } - bool VisitBinAssign(const BinaryOperator *E) { return true; } - bool VisitCompoundAssignOperator(const BinaryOperator *E) { return true; } - bool VisitBinaryOperator(const BinaryOperator *E) - { return Visit(E->getLHS()) || Visit(E->getRHS()); } - bool VisitUnaryPreInc(const UnaryOperator *E) { return true; } - bool VisitUnaryPostInc(const UnaryOperator *E) { return true; } - bool VisitUnaryPreDec(const UnaryOperator *E) { return true; } - bool VisitUnaryPostDec(const UnaryOperator *E) { return true; } - bool VisitUnaryDeref(const UnaryOperator *E) { - if (Ctx.getCanonicalType(E->getType()).isVolatileQualified()) - return true; - return Visit(E->getSubExpr()); - } - bool VisitUnaryOperator(const UnaryOperator *E) { return Visit(E->getSubExpr()); } - - // Has side effects if any element does. - bool VisitInitListExpr(const InitListExpr *E) { - for (unsigned i = 0, e = E->getNumInits(); i != e; ++i) - if (Visit(E->getInit(i))) return true; - if (const Expr *filler = E->getArrayFiller()) - return Visit(filler); - return false; - } - - bool VisitSizeOfPackExpr(const SizeOfPackExpr *) { return false; } -}; - -class OpaqueValueEvaluation { - EvalInfo &info; - OpaqueValueExpr *opaqueValue; - -public: - OpaqueValueEvaluation(EvalInfo &info, OpaqueValueExpr *opaqueValue, - Expr *value) - : info(info), opaqueValue(opaqueValue) { - - // If evaluation fails, fail immediately. - if (!Evaluate(info.OpaqueValues[opaqueValue], info, value)) { - this->opaqueValue = 0; - return; - } - } - - bool hasError() const { return opaqueValue == 0; } - - ~OpaqueValueEvaluation() { - // FIXME: For a recursive constexpr call, an outer stack frame might have - // been using this opaque value too, and will now have to re-evaluate the - // source expression. - if (opaqueValue) info.OpaqueValues.erase(opaqueValue); - } -}; - -} // end anonymous namespace - //===----------------------------------------------------------------------===// // Generic Evaluation //===----------------------------------------------------------------------===// @@ -2509,9 +2417,10 @@ public: } RetTy VisitBinaryConditionalOperator(const BinaryConditionalOperator *E) { - // Cache the value of the common expression. - OpaqueValueEvaluation opaque(Info, E->getOpaqueValue(), E->getCommon()); - if (opaque.hasError()) + // Evaluate and cache the common expression. We treat it as a temporary, + // even though it's not quite the same thing. + if (!Evaluate(Info.CurrentCall->Temporaries[E->getOpaqueValue()], + Info, E->getCommon())) return false; return HandleConditionalOperator(E); @@ -2545,8 +2454,8 @@ public: } RetTy VisitOpaqueValueExpr(const OpaqueValueExpr *E) { - const APValue *Value = Info.getOpaqueValue(E); - if (!Value) { + APValue &Value = Info.CurrentCall->Temporaries[E]; + if (Value.isUninit()) { const Expr *Source = E->getSourceExpr(); if (!Source) return Error(E); @@ -2556,7 +2465,7 @@ public: } return StmtVisitorTy::Visit(Source); } - return DerivedSuccess(*Value, E); + return DerivedSuccess(Value, E); } RetTy VisitCallExpr(const CallExpr *E) { @@ -2773,9 +2682,11 @@ public: assert(BaseTy->getAs()->getDecl()->getCanonicalDecl() == FD->getParent()->getCanonicalDecl() && "record / field mismatch"); (void)BaseTy; - HandleLValueMember(this->Info, E, Result, FD); + if (!HandleLValueMember(this->Info, E, Result, FD)) + return false; } else if (const IndirectFieldDecl *IFD = dyn_cast(MD)) { - HandleLValueIndirectMember(this->Info, E, Result, IFD); + if (!HandleLValueIndirectMember(this->Info, E, Result, IFD)) + return false; } else return this->Error(E); @@ -2970,6 +2881,9 @@ bool LValueExprEvaluator::VisitCXXTypeidExpr(const CXXTypeidExpr *E) { if (E->isTypeOperand()) return Success(E); CXXRecordDecl *RD = E->getExprOperand()->getType()->getAsCXXRecordDecl(); + // FIXME: The standard says "a typeid expression whose operand is of a + // polymorphic class type" is not a constant expression, but it probably + // means "a typeid expression whose operand is potentially evaluated". if (RD && RD->isPolymorphic()) { Info.Diag(E, diag::note_constexpr_typeid_polymorphic) << E->getExprOperand()->getType() @@ -3073,7 +2987,7 @@ public: bool VisitUnaryAddrOf(const UnaryOperator *E); bool VisitObjCStringLiteral(const ObjCStringLiteral *E) { return Success(E); } - bool VisitObjCNumericLiteral(const ObjCNumericLiteral *E) + bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { return Success(E); } bool VisitAddrLabelExpr(const AddrLabelExpr *E) { return Success(E); } @@ -3373,6 +3287,7 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, Result = APValue(APValue::UninitStruct(), CD ? CD->getNumBases() : 0, std::distance(RD->field_begin(), RD->field_end())); + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); if (CD) { @@ -3381,7 +3296,8 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, End = CD->bases_end(); I != End; ++I, ++Index) { const CXXRecordDecl *Base = I->getType()->getAsCXXRecordDecl(); LValue Subobject = This; - HandleLValueDirectBase(Info, E, Subobject, CD, Base, &Layout); + if (!HandleLValueDirectBase(Info, E, Subobject, CD, Base, &Layout)) + return false; if (!HandleClassZeroInitialization(Info, E, Base, Subobject, Result.getStructBase(Index))) return false; @@ -3391,15 +3307,16 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, for (RecordDecl::field_iterator I = RD->field_begin(), End = RD->field_end(); I != End; ++I) { // -- if T is a reference type, no initialization is performed. - if ((*I)->getType()->isReferenceType()) + if (I->getType()->isReferenceType()) continue; LValue Subobject = This; - HandleLValueMember(Info, E, Subobject, *I, &Layout); + if (!HandleLValueMember(Info, E, Subobject, *I, &Layout)) + return false; - ImplicitValueInitExpr VIE((*I)->getType()); + ImplicitValueInitExpr VIE(I->getType()); if (!EvaluateInPlace( - Result.getStructField((*I)->getFieldIndex()), Info, Subobject, &VIE)) + Result.getStructField(I->getFieldIndex()), Info, Subobject, &VIE)) return false; } @@ -3408,6 +3325,7 @@ static bool HandleClassZeroInitialization(EvalInfo &Info, const Expr *E, bool RecordExprEvaluator::ZeroInitialization(const Expr *E) { const RecordDecl *RD = E->getType()->castAs()->getDecl(); + if (RD->isInvalidDecl()) return false; if (RD->isUnion()) { // C++11 [dcl.init]p5: If T is a (possibly cv-qualified) union type, the // object's first non-static named data member is zero-initialized @@ -3418,9 +3336,10 @@ bool RecordExprEvaluator::ZeroInitialization(const Expr *E) { } LValue Subobject = This; - HandleLValueMember(Info, E, Subobject, *I); + if (!HandleLValueMember(Info, E, Subobject, *I)) + return false; Result = APValue(*I); - ImplicitValueInitExpr VIE((*I)->getType()); + ImplicitValueInitExpr VIE(I->getType()); return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, &VIE); } @@ -3470,6 +3389,7 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { return false; const RecordDecl *RD = E->getType()->castAs()->getDecl(); + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(RD); if (RD->isUnion()) { @@ -3484,7 +3404,8 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { const Expr *InitExpr = E->getNumInits() ? E->getInit(0) : &VIE; LValue Subobject = This; - HandleLValueMember(Info, InitExpr, Subobject, Field, &Layout); + if (!HandleLValueMember(Info, InitExpr, Subobject, Field, &Layout)) + return false; return EvaluateInPlace(Result.getUnionValue(), Info, Subobject, InitExpr); } @@ -3507,15 +3428,16 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { // FIXME: Diagnostics here should point to the end of the initializer // list, not the start. - HandleLValueMember(Info, HaveInit ? E->getInit(ElementNo) : E, Subobject, - *Field, &Layout); + if (!HandleLValueMember(Info, HaveInit ? E->getInit(ElementNo) : E, + Subobject, *Field, &Layout)) + return false; // Perform an implicit value-initialization for members beyond the end of // the initializer list. ImplicitValueInitExpr VIE(HaveInit ? Info.Ctx.IntTy : Field->getType()); if (!EvaluateInPlace( - Result.getStructField((*Field)->getFieldIndex()), + Result.getStructField(Field->getFieldIndex()), Info, Subobject, HaveInit ? E->getInit(ElementNo++) : &VIE)) { if (!Info.keepEvaluatingAfterFailure()) return false; @@ -3528,6 +3450,8 @@ bool RecordExprEvaluator::VisitInitListExpr(const InitListExpr *E) { bool RecordExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { const CXXConstructorDecl *FD = E->getConstructor(); + if (FD->isInvalidDecl() || FD->getParent()->isInvalidDecl()) return false; + bool ZeroInit = E->requiresZeroInitialization(); if (CheckTrivialDefaultConstructor(Info, E->getExprLoc(), FD, ZeroInit)) { // If we've already performed zero-initialization, we're already done. @@ -3870,8 +3794,24 @@ bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) { bool Success = true; + assert((!Result.isArray() || Result.getArrayInitializedElts() == 0) && + "zero-initialized array shouldn't have any initialized elts"); + APValue Filler; + if (Result.isArray() && Result.hasArrayFiller()) + Filler = Result.getArrayFiller(); + Result = APValue(APValue::UninitArray(), E->getNumInits(), CAT->getSize().getZExtValue()); + + // If the array was previously zero-initialized, preserve the + // zero-initialized values. + if (!Filler.isUninit()) { + for (unsigned I = 0, E = Result.getArrayInitializedElts(); I != E; ++I) + Result.getArrayInitializedElt(I) = Filler; + if (Result.hasArrayFiller()) + Result.getArrayFiller() = Filler; + } + LValue Subobject = This; Subobject.addArray(Info, E, CAT); unsigned Index = 0; @@ -3898,15 +3838,29 @@ bool ArrayExprEvaluator::VisitInitListExpr(const InitListExpr *E) { } bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { - const ConstantArrayType *CAT = Info.Ctx.getAsConstantArrayType(E->getType()); - if (!CAT) - return Error(E); + // FIXME: The Subobject here isn't necessarily right. This rarely matters, + // but sometimes does: + // struct S { constexpr S() : p(&p) {} void *p; }; + // S s[10]; + LValue Subobject = This; - bool HadZeroInit = !Result.isUninit(); - if (!HadZeroInit) - Result = APValue(APValue::UninitArray(), 0, CAT->getSize().getZExtValue()); - if (!Result.hasArrayFiller()) - return true; + APValue *Value = &Result; + bool HadZeroInit = true; + QualType ElemTy = E->getType(); + while (const ConstantArrayType *CAT = + Info.Ctx.getAsConstantArrayType(ElemTy)) { + Subobject.addArray(Info, E, CAT); + HadZeroInit &= !Value->isUninit(); + if (!HadZeroInit) + *Value = APValue(APValue::UninitArray(), 0, CAT->getSize().getZExtValue()); + if (!Value->hasArrayFiller()) + return true; + Value = &Value->getArrayFiller(); + ElemTy = CAT->getElementType(); + } + + if (!ElemTy->isRecordType()) + return Error(E); const CXXConstructorDecl *FD = E->getConstructor(); @@ -3916,17 +3870,15 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { return true; if (ZeroInit) { - LValue Subobject = This; - Subobject.addArray(Info, E, CAT); - ImplicitValueInitExpr VIE(CAT->getElementType()); - return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE); + ImplicitValueInitExpr VIE(ElemTy); + return EvaluateInPlace(*Value, Info, Subobject, &VIE); } const CXXRecordDecl *RD = FD->getParent(); if (RD->isUnion()) - Result.getArrayFiller() = APValue((FieldDecl*)0); + *Value = APValue((FieldDecl*)0); else - Result.getArrayFiller() = + *Value = APValue(APValue::UninitStruct(), RD->getNumBases(), std::distance(RD->field_begin(), RD->field_end())); return true; @@ -3938,23 +3890,16 @@ bool ArrayExprEvaluator::VisitCXXConstructExpr(const CXXConstructExpr *E) { if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition)) return false; - // FIXME: The Subobject here isn't necessarily right. This rarely matters, - // but sometimes does: - // struct S { constexpr S() : p(&p) {} void *p; }; - // S s[10]; - LValue Subobject = This; - Subobject.addArray(Info, E, CAT); - if (ZeroInit && !HadZeroInit) { - ImplicitValueInitExpr VIE(CAT->getElementType()); - if (!EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE)) + ImplicitValueInitExpr VIE(ElemTy); + if (!EvaluateInPlace(*Value, Info, Subobject, &VIE)) return false; } llvm::ArrayRef Args(E->getArgs(), E->getNumArgs()); return HandleConstructorCall(E->getExprLoc(), Subobject, Args, cast(Definition), - Info, Result.getArrayFiller()); + Info, *Value); } //===----------------------------------------------------------------------===// @@ -4288,10 +4233,16 @@ QualType IntExprEvaluator::GetObjectType(APValue::LValueBase B) { } bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E) { - // TODO: Perhaps we should let LLVM lower this? LValue Base; - if (!EvaluatePointer(E->getArg(0), Base, Info)) - return false; + + { + // The operand of __builtin_object_size is never evaluated for side-effects. + // If there are any, but we can determine the pointed-to object anyway, then + // ignore the side-effects. + SpeculativeEvaluationRAII SpeculativeEval(Info); + if (!EvaluatePointer(E->getArg(0), Base, Info)) + return false; + } // If we can prove the base is null, lower to zero now. if (!Base.getLValueBase()) return Success(0, E); @@ -4323,14 +4274,17 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { if (TryEvaluateBuiltinObjectSize(E)) return true; - // If evaluating the argument has side-effects we can't determine - // the size of the object and lower it to unknown now. + // If evaluating the argument has side-effects, we can't determine the size + // of the object, and so we lower it to unknown now. CodeGen relies on us to + // handle all cases where the expression has side-effects. if (E->getArg(0)->HasSideEffects(Info.Ctx)) { if (E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue() <= 1) return Success(-1ULL, E); return Success(0, E); } + // Expression had no side effects, but we couldn't statically determine the + // size of the referenced object. return Error(E); } @@ -5280,6 +5234,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); unsigned i = MemberDecl->getFieldIndex(); assert(i < RL.getFieldCount() && "offsetof field in wrong type"); @@ -5301,6 +5256,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) { if (!RT) return Error(OOE); RecordDecl *RD = RT->getDecl(); + if (RD->isInvalidDecl()) return false; const ASTRecordLayout &RL = Info.Ctx.getASTRecordLayout(RD); // Find the base class itself. @@ -6385,10 +6341,6 @@ bool Expr::isEvaluatable(const ASTContext &Ctx) const { return EvaluateAsRValue(Result, Ctx) && !Result.HasSideEffects; } -bool Expr::HasSideEffects(const ASTContext &Ctx) const { - return HasSideEffect(Ctx).Visit(this); -} - APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx) const { EvalResult EvalResult; bool Result = EvaluateAsRValue(EvalResult, Ctx); @@ -6501,7 +6453,7 @@ static ICEDiag CheckICE(const Expr* E, ASTContext &Ctx) { case Expr::CXXDependentScopeMemberExprClass: case Expr::UnresolvedMemberExprClass: case Expr::ObjCStringLiteralClass: - case Expr::ObjCNumericLiteralClass: + case Expr::ObjCBoxedExprClass: case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: case Expr::ObjCEncodeExprClass: diff --git a/lib/AST/ItaniumCXXABI.cpp b/lib/AST/ItaniumCXXABI.cpp index 0027dbf..ce1244c 100644 --- a/lib/AST/ItaniumCXXABI.cpp +++ b/lib/AST/ItaniumCXXABI.cpp @@ -39,7 +39,7 @@ public: return 1; } - CallingConv getDefaultMethodCallConv() const { + CallingConv getDefaultMethodCallConv(bool isVariadic) const { return CC_C; } diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp index 0d405f1..7c7a5e5 100644 --- a/lib/AST/ItaniumMangle.cpp +++ b/lib/AST/ItaniumMangle.cpp @@ -657,7 +657,7 @@ void CXXNameMangler::mangleFloat(const llvm::APFloat &f) { // mistake; see the discussion on cxx-abi-dev beginning on // 2012-01-16. - // Our requirements here are just barely wierd enough to justify + // Our requirements here are just barely weird enough to justify // using a custom algorithm instead of post-processing APInt::toString(). llvm::APInt valueBits = f.bitcastToAPInt(); @@ -1032,17 +1032,14 @@ static const FieldDecl *FindFirstNamedDataMember(const RecordDecl *RD) { for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I) { - const FieldDecl *FD = *I; + if (I->getIdentifier()) + return *I; - if (FD->getIdentifier()) - return FD; - - if (const RecordType *RT = FD->getType()->getAs()) { + if (const RecordType *RT = I->getType()->getAs()) if (const FieldDecl *NamedDataMember = FindFirstNamedDataMember(RT->getDecl())) return NamedDataMember; } - } // We didn't find a named data member. return 0; @@ -1892,12 +1889,23 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { } // ::= -// ::= F [Y] E +// ::= [] F [Y] +// [] E +// (Proposal to cxx-abi-dev, 2012-05-11) void CXXNameMangler::mangleType(const FunctionProtoType *T) { + // Mangle CV-qualifiers, if present. These are 'this' qualifiers, + // e.g. "const" in "int (A::*)() const". + mangleQualifiers(Qualifiers::fromCVRMask(T->getTypeQuals())); + Out << 'F'; + // FIXME: We don't have enough information in the AST to produce the 'Y' // encoding for extern "C" function types. mangleBareFunctionType(T, /*MangleReturnType=*/true); + + // Mangle the ref-qualifier, if present. + mangleRefQualifier(T->getRefQualifier()); + Out << 'E'; } void CXXNameMangler::mangleType(const FunctionNoProtoType *T) { @@ -1990,8 +1998,6 @@ void CXXNameMangler::mangleType(const MemberPointerType *T) { mangleType(QualType(T->getClass(), 0)); QualType PointeeType = T->getPointeeType(); if (const FunctionProtoType *FPT = dyn_cast(PointeeType)) { - mangleQualifiers(Qualifiers::fromCVRMask(FPT->getTypeQuals())); - mangleRefQualifier(FPT->getRefQualifier()); mangleType(FPT); // Itanium C++ ABI 5.1.8: @@ -2005,9 +2011,11 @@ void CXXNameMangler::mangleType(const MemberPointerType *T) { // which the function is a member is considered part of the type of // function. + // Given that we already substitute member function pointers as a + // whole, the net effect of this rule is just to unconditionally + // suppress substitution on the function type in a member pointer. // We increment the SeqID here to emulate adding an entry to the - // substitution table. We can't actually add it because we don't want this - // particular function type to be substituted. + // substitution table. ++SeqID; } else mangleType(PointeeType); @@ -2390,7 +2398,7 @@ recurse: case Expr::ObjCProtocolExprClass: case Expr::ObjCSelectorExprClass: case Expr::ObjCStringLiteralClass: - case Expr::ObjCNumericLiteralClass: + case Expr::ObjCBoxedExprClass: case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: case Expr::ObjCSubscriptRefExprClass: @@ -2981,7 +2989,7 @@ void CXXNameMangler::mangleFunctionParam(const ParmVarDecl *parm) { // Top-level qualifiers. We don't have to worry about arrays here, // because parameters declared as arrays should already have been - // tranformed to have pointer type. FIXME: apparently these don't + // transformed to have pointer type. FIXME: apparently these don't // get mangled if used as an rvalue of a known non-class type? assert(!parm->getType()->isArrayType() && "parameter's type is still an array type?"); @@ -3124,7 +3132,7 @@ void CXXNameMangler::mangleTemplateArg(const NamedDecl *P, break; } case TemplateArgument::Integral: - mangleIntegerLiteral(A.getIntegralType(), *A.getAsIntegral()); + mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral()); break; case TemplateArgument::Declaration: { assert(P && "Missing template parameter for declaration argument"); diff --git a/lib/AST/LambdaMangleContext.cpp b/lib/AST/LambdaMangleContext.cpp index f5272a7..6f4fe2d 100644 --- a/lib/AST/LambdaMangleContext.cpp +++ b/lib/AST/LambdaMangleContext.cpp @@ -11,7 +11,9 @@ // the Itanium C++ ABI mangling numbers for lambda expressions. // //===----------------------------------------------------------------------===// + #include "clang/AST/LambdaMangleContext.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" using namespace clang; diff --git a/lib/AST/Mangle.cpp b/lib/AST/Mangle.cpp index 73c9f57..d5f8371 100644 --- a/lib/AST/Mangle.cpp +++ b/lib/AST/Mangle.cpp @@ -40,7 +40,11 @@ static void mangleFunctionBlock(MangleContext &Context, StringRef Outer, const BlockDecl *BD, raw_ostream &Out) { - Out << "__" << Outer << "_block_invoke_" << Context.getBlockId(BD, true); + unsigned discriminator = Context.getBlockId(BD, true); + if (discriminator == 0) + Out << "__" << Outer << "_block_invoke"; + else + Out << "__" << Outer << "_block_invoke_" << discriminator+1; } static void checkMangleDC(const DeclContext *DC, const BlockDecl *BD) { @@ -62,8 +66,20 @@ static void checkMangleDC(const DeclContext *DC, const BlockDecl *BD) { void MangleContext::anchor() { } void MangleContext::mangleGlobalBlock(const BlockDecl *BD, + const NamedDecl *ID, raw_ostream &Out) { - Out << "__block_global_" << getBlockId(BD, false); + unsigned discriminator = getBlockId(BD, false); + if (ID) { + if (shouldMangleDeclName(ID)) + mangleName(ID, Out); + else { + Out << ID->getIdentifier()->getName(); + } + } + if (discriminator == 0) + Out << "_block_invoke"; + else + Out << "_block_invoke_" << discriminator+1; } void MangleContext::mangleCtorBlock(const CXXConstructorDecl *CD, @@ -99,8 +115,8 @@ void MangleContext::mangleBlock(const DeclContext *DC, const BlockDecl *BD, mangleObjCMethodName(Method, Stream); } else { const NamedDecl *ND = cast(DC); - if (IdentifierInfo *II = ND->getIdentifier()) - Stream << II->getName(); + if (!shouldMangleDeclName(ND) && ND->getIdentifier()) + Stream << ND->getIdentifier()->getName(); else { // FIXME: We were doing a mangleUnqualifiedName() before, but that's // a private member of a class that will soon itself be private to the @@ -131,12 +147,13 @@ void MangleContext::mangleObjCMethodName(const ObjCMethodDecl *MD, } void MangleContext::mangleBlock(const BlockDecl *BD, - raw_ostream &Out) { + raw_ostream &Out, + const NamedDecl *ID) { const DeclContext *DC = BD->getDeclContext(); while (isa(DC) || isa(DC)) DC = DC->getParent(); if (DC->isFunctionOrMethod()) mangleBlock(DC, BD, Out); else - mangleGlobalBlock(BD, Out); + mangleGlobalBlock(BD, ID, Out); } diff --git a/lib/AST/MicrosoftCXXABI.cpp b/lib/AST/MicrosoftCXXABI.cpp index f33d6fe..51308ea 100644 --- a/lib/AST/MicrosoftCXXABI.cpp +++ b/lib/AST/MicrosoftCXXABI.cpp @@ -29,8 +29,8 @@ public: unsigned getMemberPointerSize(const MemberPointerType *MPT) const; - CallingConv getDefaultMethodCallConv() const { - if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) + CallingConv getDefaultMethodCallConv(bool isVariadic) const { + if (!isVariadic && Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) return CC_X86ThisCall; else return CC_C; diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp index ba9856a..e2cee7f 100644 --- a/lib/AST/MicrosoftMangle.cpp +++ b/lib/AST/MicrosoftMangle.cpp @@ -21,6 +21,8 @@ #include "clang/AST/ExprCXX.h" #include "clang/Basic/ABI.h" +#include + using namespace clang; namespace { @@ -31,36 +33,59 @@ class MicrosoftCXXNameMangler { MangleContext &Context; raw_ostream &Out; + // FIXME: audit the performance of BackRefMap as it might do way too many + // copying of strings. + typedef std::map BackRefMap; + BackRefMap NameBackReferences; + bool UseNameBackReferences; + + typedef llvm::DenseMap ArgBackRefMap; + ArgBackRefMap TypeBackReferences; + ASTContext &getASTContext() const { return Context.getASTContext(); } public: MicrosoftCXXNameMangler(MangleContext &C, raw_ostream &Out_) - : Context(C), Out(Out_) { } + : Context(C), Out(Out_), UseNameBackReferences(true) { } + + raw_ostream &getStream() const { return Out; } - void mangle(const NamedDecl *D, StringRef Prefix = "?"); + void mangle(const NamedDecl *D, StringRef Prefix = "\01?"); void mangleName(const NamedDecl *ND); void mangleFunctionEncoding(const FunctionDecl *FD); void mangleVariableEncoding(const VarDecl *VD); void mangleNumber(int64_t Number); - void mangleType(QualType T); + void mangleNumber(const llvm::APSInt &Value); + void mangleType(QualType T, SourceRange Range); private: + void disableBackReferences() { UseNameBackReferences = false; } void mangleUnqualifiedName(const NamedDecl *ND) { mangleUnqualifiedName(ND, ND->getDeclName()); } void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name); void mangleSourceName(const IdentifierInfo *II); void manglePostfix(const DeclContext *DC, bool NoFunction=false); - void mangleOperatorName(OverloadedOperatorKind OO); + void mangleOperatorName(OverloadedOperatorKind OO, SourceLocation Loc); void mangleQualifiers(Qualifiers Quals, bool IsMember); + void mangleUnscopedTemplateName(const TemplateDecl *ND); + void mangleTemplateInstantiationName(const TemplateDecl *TD, + const SmallVectorImpl &TemplateArgs); void mangleObjCMethodName(const ObjCMethodDecl *MD); + void mangleLocalName(const FunctionDecl *FD); + + void mangleTypeRepeated(QualType T, SourceRange Range); // Declare manglers for every type class. #define ABSTRACT_TYPE(CLASS, PARENT) #define NON_CANONICAL_TYPE(CLASS, PARENT) -#define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T); +#define TYPE(CLASS, PARENT) void mangleType(const CLASS##Type *T, \ + SourceRange Range); #include "clang/AST/TypeNodes.def" +#undef ABSTRACT_TYPE +#undef NON_CANONICAL_TYPE +#undef TYPE void mangleType(const TagType*); void mangleType(const FunctionType *T, const FunctionDecl *D, @@ -69,8 +94,12 @@ private: void mangleExtraDimensions(QualType T); void mangleFunctionClass(const FunctionDecl *FD); void mangleCallingConvention(const FunctionType *T, bool IsInstMethod = false); + void mangleIntegerLiteral(QualType T, const llvm::APSInt &Number); void mangleThrowSpecification(const FunctionProtoType *T); + void mangleTemplateArgs( + const SmallVectorImpl &TemplateArgs); + }; /// MicrosoftMangleContext - Overrides the default MangleContext for the @@ -157,15 +186,15 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) { // MSVC doesn't mangle C++ names the same way it mangles extern "C" names. // Therefore it's really important that we don't decorate the - // name with leading underscores or leading/trailing at signs. So, emit a - // asm marker at the start so we get the name right. - Out << '\01'; // LLVM IR Marker for __asm("foo") + // name with leading underscores or leading/trailing at signs. So, by + // default, we emit an asm marker at the start so we get the name right. + // Callers can override this with a custom prefix. // Any decl can be declared with __asm("foo") on it, and this takes precedence // over all other naming in the .o file. if (const AsmLabelAttr *ALA = D->getAttr()) { // If we have an asm name, then we use it as the mangling. - Out << ALA->getLabel(); + Out << '\01' << ALA->getLabel(); return; } @@ -176,7 +205,15 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, mangleFunctionEncoding(FD); else if (const VarDecl *VD = dyn_cast(D)) mangleVariableEncoding(VD); - // TODO: Fields? Can MSVC even mangle them? + else { + // TODO: Fields? Can MSVC even mangle them? + // Issue a diagnostic for now. + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this declaration yet"); + Diags.Report(D->getLocation(), DiagID) + << D->getSourceRange(); + } } void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) { @@ -188,7 +225,7 @@ void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) { // We should never ever see a FunctionNoProtoType at this point. // We don't even know how to mangle their types anyway :). - const FunctionProtoType *FT = cast(FD->getType()); + const FunctionProtoType *FT = FD->getType()->castAs(); bool InStructor = false, InInstMethod = false; const CXXMethodDecl *MD = dyn_cast(FD); @@ -232,16 +269,17 @@ void MicrosoftCXXNameMangler::mangleVariableEncoding(const VarDecl *VD) { // ::= A # pointers, references, arrays // Pointers and references are odd. The type of 'int * const foo;' gets // mangled as 'QAHA' instead of 'PAHB', for example. - QualType Ty = VD->getType(); + TypeLoc TL = VD->getTypeSourceInfo()->getTypeLoc(); + QualType Ty = TL.getType(); if (Ty->isPointerType() || Ty->isReferenceType()) { - mangleType(Ty); + mangleType(Ty, TL.getSourceRange()); Out << 'A'; - } else if (Ty->isArrayType()) { + } else if (const ArrayType *AT = getASTContext().getAsArrayType(Ty)) { // Global arrays are funny, too. - mangleType(cast(Ty.getTypePtr()), true); + mangleType(AT, true); Out << 'A'; } else { - mangleType(Ty.getLocalUnqualifiedType()); + mangleType(Ty.getLocalUnqualifiedType(), TL.getSourceRange()); mangleQualifiers(Ty.getLocalQualifiers(), false); } } @@ -266,35 +304,156 @@ void MicrosoftCXXNameMangler::mangleName(const NamedDecl *ND) { } void MicrosoftCXXNameMangler::mangleNumber(int64_t Number) { - // ::= [?] # <= 9 - // ::= [?] + @ # > 9; A = 0, B = 1, etc... + // ::= [?] # 1 <= Number <= 10 + // ::= [?] + @ # 0 or > 9; A = 0, B = 1, etc... + // ::= [?] @ # 0 (alternate mangling, not emitted by VC) if (Number < 0) { Out << '?'; Number = -Number; } - if (Number >= 1 && Number <= 10) { + // There's a special shorter mangling for 0, but Microsoft + // chose not to use it. Instead, 0 gets mangled as "A@". Oh well... + if (Number >= 1 && Number <= 10) Out << Number-1; - } else { + else { // We have to build up the encoding in reverse order, so it will come // out right when we write it out. char Encoding[16]; char *EndPtr = Encoding+sizeof(Encoding); char *CurPtr = EndPtr; - while (Number) { + do { *--CurPtr = 'A' + (Number % 16); Number /= 16; + } while (Number); + Out.write(CurPtr, EndPtr-CurPtr); + Out << '@'; + } +} + +void MicrosoftCXXNameMangler::mangleNumber(const llvm::APSInt &Value) { + if (Value.isSigned() && Value.isNegative()) { + Out << '?'; + mangleNumber(llvm::APSInt(Value.abs())); + return; + } + llvm::APSInt Temp(Value); + if (Value.uge(1) && Value.ule(10)) { + --Temp; + Temp.print(Out, false); + } else { + // We have to build up the encoding in reverse order, so it will come + // out right when we write it out. + char Encoding[64]; + char *EndPtr = Encoding+sizeof(Encoding); + char *CurPtr = EndPtr; + llvm::APSInt NibbleMask(Value.getBitWidth(), Value.isUnsigned()); + NibbleMask = 0xf; + for (int i = 0, e = Value.getActiveBits() / 4; i != e; ++i) { + *--CurPtr = 'A' + Temp.And(NibbleMask).getLimitedValue(0xf); + Temp = Temp.lshr(4); } Out.write(CurPtr, EndPtr-CurPtr); Out << '@'; } } +static const TemplateDecl * +isTemplate(const NamedDecl *ND, + SmallVectorImpl &TemplateArgs) { + // Check if we have a function template. + if (const FunctionDecl *FD = dyn_cast(ND)){ + if (const TemplateDecl *TD = FD->getPrimaryTemplate()) { + if (FD->getTemplateSpecializationArgsAsWritten()) { + const ASTTemplateArgumentListInfo *ArgList = + FD->getTemplateSpecializationArgsAsWritten(); + TemplateArgs.append(ArgList->getTemplateArgs(), + ArgList->getTemplateArgs() + + ArgList->NumTemplateArgs); + } else { + const TemplateArgumentList *ArgList = + FD->getTemplateSpecializationArgs(); + TemplateArgumentListInfo LI; + for (unsigned i = 0, e = ArgList->size(); i != e; ++i) + TemplateArgs.push_back(TemplateArgumentLoc(ArgList->get(i), + FD->getTypeSourceInfo())); + } + return TD; + } + } + + // Check if we have a class template. + if (const ClassTemplateSpecializationDecl *Spec = + dyn_cast(ND)) { + TypeSourceInfo *TSI = Spec->getTypeAsWritten(); + if (TSI) { + TemplateSpecializationTypeLoc &TSTL = + cast(TSI->getTypeLoc()); + TemplateArgumentListInfo LI(TSTL.getLAngleLoc(), TSTL.getRAngleLoc()); + for (unsigned i = 0, e = TSTL.getNumArgs(); i != e; ++i) + TemplateArgs.push_back(TSTL.getArgLoc(i)); + } else { + TemplateArgumentListInfo LI; + const TemplateArgumentList &ArgList = + Spec->getTemplateArgs(); + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) + TemplateArgs.push_back(TemplateArgumentLoc(ArgList[i], + TemplateArgumentLocInfo())); + } + return Spec->getSpecializedTemplate(); + } + + return 0; +} + void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name) { // ::= // ::= // ::= + // ::= + SmallVector TemplateArgs; + // Check if we have a template. + if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) { + // We have a template. + // Here comes the tricky thing: if we need to mangle something like + // void foo(A::X, B::X), + // the X part is aliased. However, if you need to mangle + // void foo(A::X, A::X), + // the A::X<> part is not aliased. + // That said, from the mangler's perspective we have a structure like this: + // namespace[s] -> type[ -> template-parameters] + // but from the Clang perspective we have + // type [ -> template-parameters] + // \-> namespace[s] + // What we do is we create a new mangler, mangle the same type (without + // a namespace suffix) using the extra mangler with back references + // disabled (to avoid infinite recursion) and then use the mangled type + // name as a key to check the mangling of different types for aliasing. + + std::string BackReferenceKey; + BackRefMap::iterator Found; + if (UseNameBackReferences) { + llvm::raw_string_ostream Stream(BackReferenceKey); + MicrosoftCXXNameMangler Extra(Context, Stream); + Extra.disableBackReferences(); + Extra.mangleUnqualifiedName(ND, Name); + Stream.flush(); + + Found = NameBackReferences.find(BackReferenceKey); + } + if (!UseNameBackReferences || Found == NameBackReferences.end()) { + mangleTemplateInstantiationName(TD, TemplateArgs); + if (UseNameBackReferences && NameBackReferences.size() < 10) { + size_t Size = NameBackReferences.size(); + NameBackReferences[BackReferenceKey] = Size; + } + } else { + Out << Found->second; + } + return; + } + switch (Name.getNameKind()) { case DeclarationName::Identifier: { if (const IdentifierInfo *II = Name.getAsIdentifierInfo()) { @@ -349,12 +508,17 @@ MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, break; case DeclarationName::CXXOperatorName: - mangleOperatorName(Name.getCXXOverloadedOperator()); + mangleOperatorName(Name.getCXXOverloadedOperator(), ND->getLocation()); break; - case DeclarationName::CXXLiteralOperatorName: + case DeclarationName::CXXLiteralOperatorName: { // FIXME: Was this added in VS2010? Does MS even know how to mangle this? - llvm_unreachable("Don't know how to mangle literal operators yet!"); + DiagnosticsEngine Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this literal operator yet"); + Diags.Report(ND->getLocation(), DiagID); + break; + } case DeclarationName::CXXUsingDirective: llvm_unreachable("Can't mangle a using directive name!"); @@ -364,8 +528,6 @@ MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND, void MicrosoftCXXNameMangler::manglePostfix(const DeclContext *DC, bool NoFunction) { // ::= [] - // ::= [] - // ::= // ::= [] if (!DC) return; @@ -386,13 +548,16 @@ void MicrosoftCXXNameMangler::manglePostfix(const DeclContext *DC, return; else if (const ObjCMethodDecl *Method = dyn_cast(DC)) mangleObjCMethodName(Method); + else if (const FunctionDecl *Func = dyn_cast(DC)) + mangleLocalName(Func); else { mangleUnqualifiedName(cast(DC)); manglePostfix(DC->getParent(), NoFunction); } } -void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO) { +void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO, + SourceLocation Loc) { switch (OO) { // ?0 # constructor // ?1 # destructor @@ -509,8 +674,13 @@ void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO) { // ::= ?_V # delete[] case OO_Array_Delete: Out << "?_V"; break; - case OO_Conditional: - llvm_unreachable("Don't know how to mangle ?:"); + case OO_Conditional: { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this conditional operator yet"); + Diags.Report(Loc, DiagID); + break; + } case OO_None: case NUM_OVERLOADED_OPERATORS: @@ -520,13 +690,141 @@ void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO) { void MicrosoftCXXNameMangler::mangleSourceName(const IdentifierInfo *II) { // ::= @ - Out << II->getName() << '@'; + std::string key = II->getNameStart(); + BackRefMap::iterator Found; + if (UseNameBackReferences) + Found = NameBackReferences.find(key); + if (!UseNameBackReferences || Found == NameBackReferences.end()) { + Out << II->getName() << '@'; + if (UseNameBackReferences && NameBackReferences.size() < 10) { + size_t Size = NameBackReferences.size(); + NameBackReferences[key] = Size; + } + } else { + Out << Found->second; + } } void MicrosoftCXXNameMangler::mangleObjCMethodName(const ObjCMethodDecl *MD) { Context.mangleObjCMethodName(MD, Out); } +// Find out how many function decls live above this one and return an integer +// suitable for use as the number in a numbered anonymous scope. +// TODO: Memoize. +static unsigned getLocalNestingLevel(const FunctionDecl *FD) { + const DeclContext *DC = FD->getParent(); + int level = 1; + + while (DC && !DC->isTranslationUnit()) { + if (isa(DC) || isa(DC)) level++; + DC = DC->getParent(); + } + + return 2*level; +} + +void MicrosoftCXXNameMangler::mangleLocalName(const FunctionDecl *FD) { + // ::= ? + // ::= ? + // Even though the name is rendered in reverse order (e.g. + // A::B::C is rendered as C@B@A), VC numbers the scopes from outermost to + // innermost. So a method bar in class C local to function foo gets mangled + // as something like: + // ?bar@C@?1??foo@@YAXXZ@QAEXXZ + // This is more apparent when you have a type nested inside a method of a + // type nested inside a function. A method baz in class D local to method + // bar of class C local to function foo gets mangled as: + // ?baz@D@?3??bar@C@?1??foo@@YAXXZ@QAEXXZ@QAEXXZ + // This scheme is general enough to support GCC-style nested + // functions. You could have a method baz of class C inside a function bar + // inside a function foo, like so: + // ?baz@C@?3??bar@?1??foo@@YAXXZ@YAXXZ@QAEXXZ + int NestLevel = getLocalNestingLevel(FD); + Out << '?'; + mangleNumber(NestLevel); + Out << '?'; + mangle(FD, "?"); +} + +void MicrosoftCXXNameMangler::mangleTemplateInstantiationName( + const TemplateDecl *TD, + const SmallVectorImpl &TemplateArgs) { + // ::= + // ::= + // Always start with the unqualified name. + + // Templates have their own context for back references. + BackRefMap TemplateContext; + NameBackReferences.swap(TemplateContext); + + mangleUnscopedTemplateName(TD); + mangleTemplateArgs(TemplateArgs); + + NameBackReferences.swap(TemplateContext); +} + +void +MicrosoftCXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *TD) { + // ::= ?$ + Out << "?$"; + mangleUnqualifiedName(TD); +} + +void +MicrosoftCXXNameMangler::mangleIntegerLiteral(QualType T, + const llvm::APSInt &Value) { + // ::= $0 + Out << "$0"; + // Make sure booleans are encoded as 0/1. + if (T->isBooleanType()) + Out << (Value.getBoolValue() ? "0" : "A@"); + else + mangleNumber(Value); +} + +void +MicrosoftCXXNameMangler::mangleTemplateArgs( + const SmallVectorImpl &TemplateArgs) { + // ::= { | }+ @ + unsigned NumTemplateArgs = TemplateArgs.size(); + for (unsigned i = 0; i < NumTemplateArgs; ++i) { + const TemplateArgumentLoc &TAL = TemplateArgs[i]; + const TemplateArgument &TA = TAL.getArgument(); + switch (TA.getKind()) { + case TemplateArgument::Null: + llvm_unreachable("Can't mangle null template arguments!"); + case TemplateArgument::Type: + mangleType(TA.getAsType(), TAL.getSourceRange()); + break; + case TemplateArgument::Integral: + mangleIntegerLiteral(TA.getIntegralType(), TA.getAsIntegral()); + break; + case TemplateArgument::Expression: { + // See if this is a constant expression. + Expr *TAE = TA.getAsExpr(); + llvm::APSInt Value; + if (TAE->isIntegerConstantExpr(Value, Context.getASTContext())) { + mangleIntegerLiteral(TAE->getType(), Value); + break; + } + /* fallthrough */ + } default: { + // Issue a diagnostic. + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this %select{ERROR|ERROR|pointer/reference|ERROR|" + "template|template pack expansion|expression|parameter pack}0 " + "template argument yet"); + Diags.Report(TAL.getLocation(), DiagID) + << TA.getKind() + << TAL.getSourceRange(); + } + } + } + Out << '@'; +} + void MicrosoftCXXNameMangler::mangleQualifiers(Qualifiers Quals, bool IsMember) { // ::= [E] [F] [I] @@ -610,7 +908,29 @@ void MicrosoftCXXNameMangler::mangleQualifiers(Qualifiers Quals, // FIXME: For now, just drop all extension qualifiers on the floor. } -void MicrosoftCXXNameMangler::mangleType(QualType T) { +void MicrosoftCXXNameMangler::mangleTypeRepeated(QualType T, SourceRange Range) { + void *TypePtr = getASTContext().getCanonicalType(T).getAsOpaquePtr(); + ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr); + + if (Found == TypeBackReferences.end()) { + size_t OutSizeBefore = Out.GetNumBytesInBuffer(); + + mangleType(T,Range); + + // See if it's worth creating a back reference. + // Only types longer than 1 character are considered + // and only 10 back references slots are available: + bool LongerThanOneChar = (Out.GetNumBytesInBuffer() - OutSizeBefore > 1); + if (LongerThanOneChar && TypeBackReferences.size() < 10) { + size_t Size = TypeBackReferences.size(); + TypeBackReferences[TypePtr] = Size; + } + } else { + Out << Found->second; + } +} + +void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range) { // Only operate on the canonical type! T = getASTContext().getCanonicalType(T); @@ -644,18 +964,22 @@ void MicrosoftCXXNameMangler::mangleType(QualType T) { switch (T->getTypeClass()) { #define ABSTRACT_TYPE(CLASS, PARENT) #define NON_CANONICAL_TYPE(CLASS, PARENT) \ -case Type::CLASS: \ -llvm_unreachable("can't mangle non-canonical type " #CLASS "Type"); \ -return; + case Type::CLASS: \ + llvm_unreachable("can't mangle non-canonical type " #CLASS "Type"); \ + return; #define TYPE(CLASS, PARENT) \ -case Type::CLASS: \ -mangleType(static_cast(T.getTypePtr())); \ -break; + case Type::CLASS: \ + mangleType(static_cast(T.getTypePtr()), Range); \ + break; #include "clang/AST/TypeNodes.def" +#undef ABSTRACT_TYPE +#undef NON_CANONICAL_TYPE +#undef TYPE } } -void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T) { +void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, + SourceRange Range) { // ::= // ::= X # void // ::= C # signed char @@ -713,24 +1037,32 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T) { case BuiltinType::ObjCId: Out << "PAUobjc_object@@"; break; case BuiltinType::ObjCClass: Out << "PAUobjc_class@@"; break; case BuiltinType::ObjCSel: Out << "PAUobjc_selector@@"; break; + + case BuiltinType::NullPtr: Out << "$$T"; break; case BuiltinType::Char16: case BuiltinType::Char32: - case BuiltinType::Half: - case BuiltinType::NullPtr: - assert(0 && "Don't know how to mangle this type yet"); + case BuiltinType::Half: { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this built-in %0 type yet"); + Diags.Report(Range.getBegin(), DiagID) + << T->getName(Context.getASTContext().getPrintingPolicy()) + << Range; + break; + } } } // ::= -void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T) { +void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T, + SourceRange) { // Structors only appear in decls, so at this point we know it's not a // structor type. - // I'll probably have mangleType(MemberPointerType) call the mangleType() - // method directly. mangleType(T, NULL, false, false); } -void MicrosoftCXXNameMangler::mangleType(const FunctionNoProtoType *T) { +void MicrosoftCXXNameMangler::mangleType(const FunctionNoProtoType *T, + SourceRange) { llvm_unreachable("Can't mangle K&R function prototypes"); } @@ -753,8 +1085,23 @@ void MicrosoftCXXNameMangler::mangleType(const FunctionType *T, // ::= @ # structors (they have no declared return type) if (IsStructor) Out << '@'; - else - mangleType(Proto->getResultType()); + else { + QualType Result = Proto->getResultType(); + const Type* RT = Result.getTypePtr(); + if (!RT->isAnyPointerType() && !RT->isReferenceType()) { + if (Result.hasQualifiers() || !RT->isBuiltinType()) + Out << '?'; + if (!RT->isBuiltinType() && !Result.hasQualifiers()) { + // Lack of qualifiers for user types is mangled as 'A'. + Out << 'A'; + } + } + + // FIXME: Get the source range for the result type. Or, better yet, + // implement the unimplemented stuff so we don't need accurate source + // location info anymore :). + mangleType(Result, SourceRange()); + } // ::= X # void // ::= + @ @@ -763,17 +1110,21 @@ void MicrosoftCXXNameMangler::mangleType(const FunctionType *T, Out << 'X'; } else { if (D) { - // If we got a decl, use the "types-as-written" to make sure arrays - // get mangled right. + // If we got a decl, use the type-as-written to make sure arrays + // get mangled right. Note that we can't rely on the TSI + // existing if (for example) the parameter was synthesized. for (FunctionDecl::param_const_iterator Parm = D->param_begin(), - ParmEnd = D->param_end(); - Parm != ParmEnd; ++Parm) - mangleType((*Parm)->getTypeSourceInfo()->getType()); + ParmEnd = D->param_end(); Parm != ParmEnd; ++Parm) { + TypeSourceInfo *TSI = (*Parm)->getTypeSourceInfo(); + QualType Type = TSI ? TSI->getType() : (*Parm)->getType(); + mangleTypeRepeated(Type, (*Parm)->getSourceRange()); + } } else { + // Happens for function pointer type arguments for example. for (FunctionProtoType::arg_type_iterator Arg = Proto->arg_type_begin(), ArgEnd = Proto->arg_type_end(); Arg != ArgEnd; ++Arg) - mangleType(*Arg); + mangleTypeRepeated(*Arg, SourceRange()); } // ::= Z # ellipsis if (Proto->isVariadic()) @@ -860,8 +1211,16 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T, // that they could be in a DLL and somebody from another module could call // them.) CallingConv CC = T->getCallConv(); - if (CC == CC_Default) - CC = IsInstMethod ? getASTContext().getDefaultMethodCallConv() : CC_C; + if (CC == CC_Default) { + if (IsInstMethod) { + const FunctionProtoType *FPT = + T->getCanonicalTypeUnqualified().getAs(); + bool isVariadic = FPT->isVariadic(); + CC = getASTContext().getDefaultCXXMethodCallConv(isVariadic); + } else { + CC = CC_C; + } + } switch (CC) { default: llvm_unreachable("Unsupported CC for mangling"); @@ -884,8 +1243,15 @@ void MicrosoftCXXNameMangler::mangleThrowSpecification( Out << 'Z'; } -void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T) { - llvm_unreachable("Don't know how to mangle UnresolvedUsingTypes yet!"); +void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T, + SourceRange Range) { + // Probably should be mangled as a template instantiation; need to see what + // VC does first. + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this unresolved dependent type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } // ::= | | | @@ -893,10 +1259,10 @@ void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T) { // ::= U // ::= V // ::= W -void MicrosoftCXXNameMangler::mangleType(const EnumType *T) { +void MicrosoftCXXNameMangler::mangleType(const EnumType *T, SourceRange) { mangleType(static_cast(T)); } -void MicrosoftCXXNameMangler::mangleType(const RecordType *T) { +void MicrosoftCXXNameMangler::mangleType(const RecordType *T, SourceRange) { mangleType(static_cast(T)); } void MicrosoftCXXNameMangler::mangleType(const TagType *T) { @@ -936,31 +1302,48 @@ void MicrosoftCXXNameMangler::mangleType(const ArrayType *T, bool IsGlobal) { Out << 'Q'; mangleExtraDimensions(T->getElementType()); } -void MicrosoftCXXNameMangler::mangleType(const ConstantArrayType *T) { +void MicrosoftCXXNameMangler::mangleType(const ConstantArrayType *T, + SourceRange) { mangleType(static_cast(T), false); } -void MicrosoftCXXNameMangler::mangleType(const VariableArrayType *T) { +void MicrosoftCXXNameMangler::mangleType(const VariableArrayType *T, + SourceRange) { mangleType(static_cast(T), false); } -void MicrosoftCXXNameMangler::mangleType(const DependentSizedArrayType *T) { +void MicrosoftCXXNameMangler::mangleType(const DependentSizedArrayType *T, + SourceRange) { mangleType(static_cast(T), false); } -void MicrosoftCXXNameMangler::mangleType(const IncompleteArrayType *T) { +void MicrosoftCXXNameMangler::mangleType(const IncompleteArrayType *T, + SourceRange) { mangleType(static_cast(T), false); } void MicrosoftCXXNameMangler::mangleExtraDimensions(QualType ElementTy) { SmallVector Dimensions; for (;;) { - if (ElementTy->isConstantArrayType()) { - const ConstantArrayType *CAT = - static_cast(ElementTy.getTypePtr()); + if (const ConstantArrayType *CAT = + getASTContext().getAsConstantArrayType(ElementTy)) { Dimensions.push_back(CAT->getSize()); ElementTy = CAT->getElementType(); } else if (ElementTy->isVariableArrayType()) { - llvm_unreachable("Don't know how to mangle VLAs!"); + const VariableArrayType *VAT = + getASTContext().getAsVariableArrayType(ElementTy); + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this variable-length array yet"); + Diags.Report(VAT->getSizeExpr()->getExprLoc(), DiagID) + << VAT->getBracketsRange(); + return; } else if (ElementTy->isDependentSizedArrayType()) { // The dependent expression has to be folded into a constant (TODO). - llvm_unreachable("Don't know how to mangle dependent-sized arrays!"); + const DependentSizedArrayType *DSAT = + getASTContext().getAsDependentSizedArrayType(ElementTy); + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this dependent-length array yet"); + Diags.Report(DSAT->getSizeExpr()->getExprLoc(), DiagID) + << DSAT->getBracketsRange(); + return; } else if (ElementTy->isIncompleteArrayType()) continue; else break; } @@ -974,151 +1357,246 @@ void MicrosoftCXXNameMangler::mangleExtraDimensions(QualType ElementTy) { mangleNumber(Dimensions[Dim].getLimitedValue()); } } - mangleType(ElementTy.getLocalUnqualifiedType()); + mangleType(ElementTy.getLocalUnqualifiedType(), SourceRange()); } // ::= // ::= // -void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T) { +void MicrosoftCXXNameMangler::mangleType(const MemberPointerType *T, + SourceRange Range) { QualType PointeeType = T->getPointeeType(); - if (const FunctionProtoType *FPT = dyn_cast(PointeeType)) { + if (const FunctionProtoType *FPT = PointeeType->getAs()) { Out << '8'; - mangleName(cast(T->getClass())->getDecl()); + mangleName(T->getClass()->castAs()->getDecl()); mangleType(FPT, NULL, false, true); } else { mangleQualifiers(PointeeType.getQualifiers(), true); - mangleName(cast(T->getClass())->getDecl()); - mangleType(PointeeType.getLocalUnqualifiedType()); + mangleName(T->getClass()->castAs()->getDecl()); + mangleType(PointeeType.getLocalUnqualifiedType(), Range); } } -void MicrosoftCXXNameMangler::mangleType(const TemplateTypeParmType *T) { - llvm_unreachable("Don't know how to mangle TemplateTypeParmTypes yet!"); +void MicrosoftCXXNameMangler::mangleType(const TemplateTypeParmType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this template type parameter type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } void MicrosoftCXXNameMangler::mangleType( - const SubstTemplateTypeParmPackType *T) { - llvm_unreachable( - "Don't know how to mangle SubstTemplateTypeParmPackTypes yet!"); + const SubstTemplateTypeParmPackType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this substituted parameter pack yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } // ::= // ::= -void MicrosoftCXXNameMangler::mangleType(const PointerType *T) { +void MicrosoftCXXNameMangler::mangleType(const PointerType *T, + SourceRange Range) { QualType PointeeTy = T->getPointeeType(); if (PointeeTy->isArrayType()) { // Pointers to arrays are mangled like arrays. - mangleExtraDimensions(T->getPointeeType()); - } else if (PointeeTy->isFunctionType()) { + mangleExtraDimensions(PointeeTy); + } else if (const FunctionType *FT = PointeeTy->getAs()) { // Function pointers are special. Out << '6'; - mangleType(static_cast(PointeeTy.getTypePtr()), - NULL, false, false); + mangleType(FT, NULL, false, false); } else { if (!PointeeTy.hasQualifiers()) // Lack of qualifiers is mangled as 'A'. Out << 'A'; - mangleType(PointeeTy); + mangleType(PointeeTy, Range); } } -void MicrosoftCXXNameMangler::mangleType(const ObjCObjectPointerType *T) { +void MicrosoftCXXNameMangler::mangleType(const ObjCObjectPointerType *T, + SourceRange Range) { // Object pointers never have qualifiers. Out << 'A'; - mangleType(T->getPointeeType()); + mangleType(T->getPointeeType(), Range); } // ::= // ::= A -void MicrosoftCXXNameMangler::mangleType(const LValueReferenceType *T) { +void MicrosoftCXXNameMangler::mangleType(const LValueReferenceType *T, + SourceRange Range) { Out << 'A'; QualType PointeeTy = T->getPointeeType(); if (!PointeeTy.hasQualifiers()) // Lack of qualifiers is mangled as 'A'. Out << 'A'; - mangleType(PointeeTy); -} - -void MicrosoftCXXNameMangler::mangleType(const RValueReferenceType *T) { - llvm_unreachable("Don't know how to mangle RValueReferenceTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const ComplexType *T) { - llvm_unreachable("Don't know how to mangle ComplexTypes yet!"); + mangleType(PointeeTy, Range); } -void MicrosoftCXXNameMangler::mangleType(const VectorType *T) { - llvm_unreachable("Don't know how to mangle VectorTypes yet!"); -} -void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T) { - llvm_unreachable("Don't know how to mangle ExtVectorTypes yet!"); -} -void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T) { - llvm_unreachable( - "Don't know how to mangle DependentSizedExtVectorTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T) { +// ::= +// ::= $$Q +void MicrosoftCXXNameMangler::mangleType(const RValueReferenceType *T, + SourceRange Range) { + Out << "$$Q"; + QualType PointeeTy = T->getPointeeType(); + if (!PointeeTy.hasQualifiers()) + // Lack of qualifiers is mangled as 'A'. + Out << 'A'; + mangleType(PointeeTy, Range); +} + +void MicrosoftCXXNameMangler::mangleType(const ComplexType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this complex number type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const VectorType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this vector type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} +void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this extended vector type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} +void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this dependent-sized extended vector type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T, + SourceRange) { // ObjC interfaces have structs underlying them. Out << 'U'; mangleName(T->getDecl()); } -void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T) { +void MicrosoftCXXNameMangler::mangleType(const ObjCObjectType *T, + SourceRange Range) { // We don't allow overloading by different protocol qualification, // so mangling them isn't necessary. - mangleType(T->getBaseType()); + mangleType(T->getBaseType(), Range); } -void MicrosoftCXXNameMangler::mangleType(const BlockPointerType *T) { +void MicrosoftCXXNameMangler::mangleType(const BlockPointerType *T, + SourceRange Range) { Out << "_E"; - mangleType(T->getPointeeType()); + mangleType(T->getPointeeType(), Range); } -void MicrosoftCXXNameMangler::mangleType(const InjectedClassNameType *T) { - llvm_unreachable("Don't know how to mangle InjectedClassNameTypes yet!"); +void MicrosoftCXXNameMangler::mangleType(const InjectedClassNameType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this injected class name type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } -void MicrosoftCXXNameMangler::mangleType(const TemplateSpecializationType *T) { - llvm_unreachable("Don't know how to mangle TemplateSpecializationTypes yet!"); +void MicrosoftCXXNameMangler::mangleType(const TemplateSpecializationType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this template specialization type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } -void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T) { - llvm_unreachable("Don't know how to mangle DependentNameTypes yet!"); +void MicrosoftCXXNameMangler::mangleType(const DependentNameType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this dependent name type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } void MicrosoftCXXNameMangler::mangleType( - const DependentTemplateSpecializationType *T) { - llvm_unreachable( - "Don't know how to mangle DependentTemplateSpecializationTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T) { - llvm_unreachable("Don't know how to mangle PackExpansionTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const TypeOfType *T) { - llvm_unreachable("Don't know how to mangle TypeOfTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const TypeOfExprType *T) { - llvm_unreachable("Don't know how to mangle TypeOfExprTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const DecltypeType *T) { - llvm_unreachable("Don't know how to mangle DecltypeTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const UnaryTransformType *T) { - llvm_unreachable("Don't know how to mangle UnaryTransformationTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const AutoType *T) { - llvm_unreachable("Don't know how to mangle AutoTypes yet!"); -} - -void MicrosoftCXXNameMangler::mangleType(const AtomicType *T) { - llvm_unreachable("Don't know how to mangle AtomicTypes yet!"); + const DependentTemplateSpecializationType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this dependent template specialization type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const PackExpansionType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this pack expansion yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const TypeOfType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this typeof(type) yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const TypeOfExprType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this typeof(expression) yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const DecltypeType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this decltype() yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const UnaryTransformType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this unary transform type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const AutoType *T, SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this 'auto' type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; +} + +void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, + SourceRange Range) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this C11 atomic type yet"); + Diags.Report(Range.getBegin(), DiagID) + << Range; } void MicrosoftMangleContext::mangleName(const NamedDecl *D, @@ -1138,17 +1616,35 @@ void MicrosoftMangleContext::mangleName(const NamedDecl *D, void MicrosoftMangleContext::mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, raw_ostream &) { - llvm_unreachable("Can't yet mangle thunks!"); + unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle thunk for this method yet"); + getDiags().Report(MD->getLocation(), DiagID); } void MicrosoftMangleContext::mangleCXXDtorThunk(const CXXDestructorDecl *DD, CXXDtorType Type, const ThisAdjustment &, raw_ostream &) { - llvm_unreachable("Can't yet mangle destructor thunks!"); + unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle thunk for this destructor yet"); + getDiags().Report(DD->getLocation(), DiagID); } void MicrosoftMangleContext::mangleCXXVTable(const CXXRecordDecl *RD, - raw_ostream &) { - llvm_unreachable("Can't yet mangle virtual tables!"); + raw_ostream &Out) { + // ::= ? + // [] @ + // ::= _7 # vftable + // ::= _8 # vbtable + // NOTE: here is always 'B' (const). + // is always '6' for vftables and '7' for vbtables. (The difference is + // beyond me.) + // TODO: vbtables. + MicrosoftCXXNameMangler Mangler(*this, Out); + Mangler.getStream() << "\01??_7"; + Mangler.mangleName(RD); + Mangler.getStream() << "6B"; + // TODO: If the class has more than one vtable, mangle in the class it came + // from. + Mangler.getStream() << '@'; } void MicrosoftMangleContext::mangleCXXVTT(const CXXRecordDecl *RD, raw_ostream &) { @@ -1162,11 +1658,19 @@ void MicrosoftMangleContext::mangleCXXCtorVTable(const CXXRecordDecl *RD, } void MicrosoftMangleContext::mangleCXXRTTI(QualType T, raw_ostream &) { - llvm_unreachable("Can't yet mangle RTTI!"); + // FIXME: Give a location... + unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle RTTI descriptors for type %0 yet"); + getDiags().Report(DiagID) + << T.getBaseTypeIdentifier(); } void MicrosoftMangleContext::mangleCXXRTTIName(QualType T, raw_ostream &) { - llvm_unreachable("Can't yet mangle RTTI names!"); + // FIXME: Give a location... + unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle the name of type %0 into RTTI descriptors yet"); + getDiags().Report(DiagID) + << T.getBaseTypeIdentifier(); } void MicrosoftMangleContext::mangleCXXCtor(const CXXConstructorDecl *D, CXXCtorType Type, @@ -1180,9 +1684,11 @@ void MicrosoftMangleContext::mangleCXXDtor(const CXXDestructorDecl *D, MicrosoftCXXNameMangler mangler(*this, Out); mangler.mangle(D); } -void MicrosoftMangleContext::mangleReferenceTemporary(const clang::VarDecl *, +void MicrosoftMangleContext::mangleReferenceTemporary(const clang::VarDecl *VD, raw_ostream &) { - llvm_unreachable("Can't yet mangle reference temporaries!"); + unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, + "cannot mangle this reference temporary yet"); + getDiags().Report(VD->getLocation(), DiagID); } MangleContext *clang::createMicrosoftMangleContext(ASTContext &Context, diff --git a/lib/AST/NSAPI.cpp b/lib/AST/NSAPI.cpp index f5ea2c5..39077d1 100644 --- a/lib/AST/NSAPI.cpp +++ b/lib/AST/NSAPI.cpp @@ -9,11 +9,13 @@ #include "clang/AST/NSAPI.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" using namespace clang; NSAPI::NSAPI(ASTContext &ctx) - : Ctx(ctx), ClassIds() { + : Ctx(ctx), ClassIds(), BOOLId(0), NSIntegerId(0), NSUIntegerId(0), + NSASCIIStringEncodingId(0), NSUTF8StringEncodingId(0) { } IdentifierInfo *NSAPI::getNSClassId(NSClassIdKindKind K) const { @@ -40,6 +42,21 @@ Selector NSAPI::getNSStringSelector(NSStringMethodKind MK) const { case NSStr_stringWithString: Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("stringWithString")); break; + case NSStr_stringWithUTF8String: + Sel = Ctx.Selectors.getUnarySelector( + &Ctx.Idents.get("stringWithUTF8String")); + break; + case NSStr_stringWithCStringEncoding: { + IdentifierInfo *KeyIdents[] = { + &Ctx.Idents.get("stringWithCString"), + &Ctx.Idents.get("encoding") + }; + Sel = Ctx.Selectors.getSelector(2, KeyIdents); + break; + } + case NSStr_stringWithCString: + Sel= Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("stringWithCString")); + break; case NSStr_initWithString: Sel = Ctx.Selectors.getUnarySelector(&Ctx.Idents.get("initWithString")); break; @@ -50,6 +67,17 @@ Selector NSAPI::getNSStringSelector(NSStringMethodKind MK) const { return NSStringSelectors[MK]; } +llvm::Optional +NSAPI::getNSStringMethodKind(Selector Sel) const { + for (unsigned i = 0; i != NumNSStringMethods; ++i) { + NSStringMethodKind MK = NSStringMethodKind(i); + if (Sel == getNSStringSelector(MK)) + return MK; + } + + return llvm::Optional(); +} + Selector NSAPI::getNSArraySelector(NSArrayMethodKind MK) const { if (NSArraySelectors[MK].isNull()) { Selector Sel; @@ -251,11 +279,22 @@ NSAPI::getNSNumberLiteralMethodKind(Selector Sel) const { } llvm::Optional -NSAPI::getNSNumberFactoryMethodKind(QualType T) { +NSAPI::getNSNumberFactoryMethodKind(QualType T) const { const BuiltinType *BT = T->getAs(); if (!BT) return llvm::Optional(); - + + const TypedefType *TDT = T->getAs(); + if (TDT) { + QualType TDTTy = QualType(TDT, 0); + if (isObjCBOOLType(TDTTy)) + return NSAPI::NSNumberWithBool; + if (isObjCNSIntegerType(TDTTy)) + return NSAPI::NSNumberWithInteger; + if (isObjCNSUIntegerType(TDTTy)) + return NSAPI::NSNumberWithUnsignedInteger; + } + switch (BT->getKind()) { case BuiltinType::Char_S: case BuiltinType::SChar: @@ -310,3 +349,65 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) { return llvm::Optional(); } + +/// \brief Returns true if \param T is a typedef of "BOOL" in objective-c. +bool NSAPI::isObjCBOOLType(QualType T) const { + return isObjCTypedef(T, "BOOL", BOOLId); +} +/// \brief Returns true if \param T is a typedef of "NSInteger" in objective-c. +bool NSAPI::isObjCNSIntegerType(QualType T) const { + return isObjCTypedef(T, "NSInteger", NSIntegerId); +} +/// \brief Returns true if \param T is a typedef of "NSUInteger" in objective-c. +bool NSAPI::isObjCNSUIntegerType(QualType T) const { + return isObjCTypedef(T, "NSUInteger", NSUIntegerId); +} + +bool NSAPI::isObjCTypedef(QualType T, + StringRef name, IdentifierInfo *&II) const { + if (!Ctx.getLangOpts().ObjC1) + return false; + if (T.isNull()) + return false; + + if (!II) + II = &Ctx.Idents.get(name); + + while (const TypedefType *TDT = T->getAs()) { + if (TDT->getDecl()->getDeclName().getAsIdentifierInfo() == II) + return true; + T = TDT->desugar(); + } + + return false; +} + +bool NSAPI::isObjCEnumerator(const Expr *E, + StringRef name, IdentifierInfo *&II) const { + if (!Ctx.getLangOpts().ObjC1) + return false; + if (!E) + return false; + + if (!II) + II = &Ctx.Idents.get(name); + + if (const DeclRefExpr *DRE = dyn_cast(E->IgnoreParenImpCasts())) + if (const EnumConstantDecl * + EnumD = dyn_cast_or_null(DRE->getDecl())) + return EnumD->getIdentifier() == II; + + return false; +} + +Selector NSAPI::getOrInitSelector(ArrayRef Ids, + Selector &Sel) const { + if (Sel.isNull()) { + SmallVector Idents; + for (ArrayRef::const_iterator + I = Ids.begin(), E = Ids.end(); I != E; ++I) + Idents.push_back(&Ctx.Idents.get(*I)); + Sel = Ctx.Selectors.getSelector(Idents.size(), Idents.data()); + } + return Sel; +} diff --git a/lib/AST/ParentMap.cpp b/lib/AST/ParentMap.cpp index 64016d9..fa87afd 100644 --- a/lib/AST/ParentMap.cpp +++ b/lib/AST/ParentMap.cpp @@ -23,13 +23,20 @@ typedef llvm::DenseMap MapTy; static void BuildParentMap(MapTy& M, Stmt* S) { for (Stmt::child_range I = S->children(); I; ++I) if (*I) { - M[*I] = S; - BuildParentMap(M, *I); + // Prefer the first time we see this statement in the traversal. + // This is important for PseudoObjectExprs. + Stmt *&Parent = M[*I]; + if (!Parent) { + Parent = S; + BuildParentMap(M, *I); + } } // Also include the source expr tree of an OpaqueValueExpr in the map. - if (const OpaqueValueExpr *OVE = dyn_cast(S)) + if (const OpaqueValueExpr *OVE = dyn_cast(S)) { + M[OVE->getSourceExpr()] = S; BuildParentMap(M, OVE->getSourceExpr()); + } } ParentMap::ParentMap(Stmt* S) : Impl(0) { diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp new file mode 100644 index 0000000..c704cab --- /dev/null +++ b/lib/AST/RawCommentList.cpp @@ -0,0 +1,271 @@ +//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/RawCommentList.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Comment.h" +#include "clang/AST/CommentLexer.h" +#include "clang/AST/CommentBriefParser.h" +#include "clang/AST/CommentSema.h" +#include "clang/AST/CommentParser.h" +#include "clang/AST/CommentCommandTraits.h" +#include "llvm/ADT/STLExtras.h" + +using namespace clang; + +namespace { +/// Get comment kind and bool describing if it is a trailing comment. +std::pair getCommentKind(StringRef Comment) { + if (Comment.size() < 3 || Comment[0] != '/') + return std::make_pair(RawComment::RCK_Invalid, false); + + RawComment::CommentKind K; + if (Comment[1] == '/') { + if (Comment.size() < 3) + return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); + + if (Comment[2] == '/') + K = RawComment::RCK_BCPLSlash; + else if (Comment[2] == '!') + K = RawComment::RCK_BCPLExcl; + else + return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); + } else { + assert(Comment.size() >= 4); + + // Comment lexer does not understand escapes in comment markers, so pretend + // that this is not a comment. + if (Comment[1] != '*' || + Comment[Comment.size() - 2] != '*' || + Comment[Comment.size() - 1] != '/') + return std::make_pair(RawComment::RCK_Invalid, false); + + if (Comment[2] == '*') + K = RawComment::RCK_JavaDoc; + else if (Comment[2] == '!') + K = RawComment::RCK_Qt; + else + return std::make_pair(RawComment::RCK_OrdinaryC, false); + } + const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<'); + return std::make_pair(K, TrailingComment); +} + +bool mergedCommentIsTrailingComment(StringRef Comment) { + return (Comment.size() > 3) && (Comment[3] == '<'); +} +} // unnamed namespace + +RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, + bool Merged) : + Range(SR), RawTextValid(false), BriefTextValid(false), + IsAlmostTrailingComment(false), + BeginLineValid(false), EndLineValid(false) { + // Extract raw comment text, if possible. + if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { + Kind = RCK_Invalid; + return; + } + + if (!Merged) { + // Guess comment kind. + std::pair K = getCommentKind(RawText); + Kind = K.first; + IsTrailingComment = K.second; + + IsAlmostTrailingComment = RawText.startswith("//<") || + RawText.startswith("/*<"); + } else { + Kind = RCK_Merged; + IsTrailingComment = mergedCommentIsTrailingComment(RawText); + } +} + +const Decl *RawComment::getDecl() const { + if (DeclOrParsedComment.isNull()) + return NULL; + + if (const Decl *D = DeclOrParsedComment.dyn_cast()) + return D; + + return DeclOrParsedComment.get()->getDecl(); +} + +unsigned RawComment::getBeginLine(const SourceManager &SM) const { + if (BeginLineValid) + return BeginLine; + + std::pair LocInfo = SM.getDecomposedLoc(Range.getBegin()); + BeginLine = SM.getLineNumber(LocInfo.first, LocInfo.second); + BeginLineValid = true; + return BeginLine; +} + +unsigned RawComment::getEndLine(const SourceManager &SM) const { + if (EndLineValid) + return EndLine; + + std::pair LocInfo = SM.getDecomposedLoc(Range.getEnd()); + EndLine = SM.getLineNumber(LocInfo.first, LocInfo.second); + EndLineValid = true; + return EndLine; +} + +StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { + FileID BeginFileID; + FileID EndFileID; + unsigned BeginOffset; + unsigned EndOffset; + + llvm::tie(BeginFileID, BeginOffset) = + SourceMgr.getDecomposedLoc(Range.getBegin()); + llvm::tie(EndFileID, EndOffset) = + SourceMgr.getDecomposedLoc(Range.getEnd()); + + const unsigned Length = EndOffset - BeginOffset; + if (Length < 2) + return StringRef(); + + // The comment can't begin in one file and end in another. + assert(BeginFileID == EndFileID); + + bool Invalid = false; + const char *BufferStart = SourceMgr.getBufferData(BeginFileID, + &Invalid).data(); + if (Invalid) + return StringRef(); + + return StringRef(BufferStart + BeginOffset, Length); +} + +const char *RawComment::extractBriefText(const ASTContext &Context) const { + // Make sure that RawText is valid. + getRawText(Context.getSourceManager()); + + // Since we will be copying the resulting text, all allocations made during + // parsing are garbage after resulting string is formed. Thus we can use + // a separate allocator for all temporary stuff. + llvm::BumpPtrAllocator Allocator; + + comments::CommandTraits Traits; + comments::Lexer L(Allocator, Traits, + Range.getBegin(), comments::CommentOptions(), + RawText.begin(), RawText.end()); + comments::BriefParser P(L, Traits); + + const std::string Result = P.Parse(); + const unsigned BriefTextLength = Result.size(); + char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; + memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); + BriefText = BriefTextPtr; + BriefTextValid = true; + + return BriefTextPtr; +} + +comments::FullComment *RawComment::parse(const ASTContext &Context) const { + // Make sure that RawText is valid. + getRawText(Context.getSourceManager()); + + comments::CommandTraits Traits; + comments::Lexer L(Context.getAllocator(), Traits, + getSourceRange().getBegin(), comments::CommentOptions(), + RawText.begin(), RawText.end()); + comments::Sema S(Context.getAllocator(), Context.getSourceManager(), + Context.getDiagnostics(), Traits); + S.setDecl(getDecl()); + comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), + Context.getDiagnostics(), Traits); + + comments::FullComment *FC = P.parseFullComment(); + DeclOrParsedComment = FC; + return FC; +} + +namespace { +bool containsOnlyWhitespace(StringRef Str) { + return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos; +} + +bool onlyWhitespaceBetweenComments(SourceManager &SM, + const RawComment &C1, const RawComment &C2) { + std::pair C1EndLocInfo = SM.getDecomposedLoc( + C1.getSourceRange().getEnd()); + std::pair C2BeginLocInfo = SM.getDecomposedLoc( + C2.getSourceRange().getBegin()); + + // Question does not make sense if comments are located in different files. + if (C1EndLocInfo.first != C2BeginLocInfo.first) + return false; + + bool Invalid = false; + const char *Buffer = SM.getBufferData(C1EndLocInfo.first, &Invalid).data(); + if (Invalid) + return false; + + StringRef TextBetweenComments(Buffer + C1EndLocInfo.second, + C2BeginLocInfo.second - C1EndLocInfo.second); + + return containsOnlyWhitespace(TextBetweenComments); +} +} // unnamed namespace + +void RawCommentList::addComment(const RawComment &RC, + llvm::BumpPtrAllocator &Allocator) { + if (RC.isInvalid()) + return; + + // Check if the comments are not in source order. + while (!Comments.empty() && + !SourceMgr.isBeforeInTranslationUnit( + Comments.back()->getSourceRange().getBegin(), + RC.getSourceRange().getBegin())) { + // If they are, just pop a few last comments that don't fit. + // This happens if an \#include directive contains comments. + Comments.pop_back(); + } + + if (OnlyWhitespaceSeen) { + if (!onlyWhitespaceBetweenComments(SourceMgr, LastComment, RC)) + OnlyWhitespaceSeen = false; + } + + LastComment = RC; + + // Ordinary comments are not interesting for us. + if (RC.isOrdinary()) + return; + + // If this is the first Doxygen comment, save it (because there isn't + // anything to merge it with). + if (Comments.empty()) { + Comments.push_back(new (Allocator) RawComment(RC)); + OnlyWhitespaceSeen = true; + return; + } + + const RawComment &C1 = *Comments.back(); + const RawComment &C2 = RC; + + // Merge comments only if there is only whitespace between them. + // Can't merge trailing and non-trailing comments. + // Merge trailing comments if they are on same or consecutive lines. + if (OnlyWhitespaceSeen && + (C1.isTrailingComment() == C2.isTrailingComment()) && + (!C1.isTrailingComment() || + C1.getEndLine(SourceMgr) + 1 >= C2.getBeginLine(SourceMgr))) { + SourceRange MergedRange(C1.getSourceRange().getBegin(), + C2.getSourceRange().getEnd()); + *Comments.back() = RawComment(SourceMgr, MergedRange, true); + } else + Comments.push_back(new (Allocator) RawComment(RC)); + + OnlyWhitespaceSeen = true; +} + diff --git a/lib/AST/RecordLayout.cpp b/lib/AST/RecordLayout.cpp index 0114eba..2ae0aab 100644 --- a/lib/AST/RecordLayout.cpp +++ b/lib/AST/RecordLayout.cpp @@ -32,7 +32,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size, CharUnits alignment, CharUnits datasize, const uint64_t *fieldoffsets, unsigned fieldcount) - : Size(size), DataSize(datasize), FieldOffsets(0), Alignment(alignment), + : Size(size), DataSize(datasize), Alignment(alignment), FieldOffsets(0), FieldCount(fieldcount), CXXInfo(0) { if (FieldCount > 0) { FieldOffsets = new (Ctx) uint64_t[FieldCount]; @@ -43,7 +43,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size, // Constructor for C++ records. ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CharUnits size, CharUnits alignment, - CharUnits vfptroffset, CharUnits vbptroffset, + bool hasOwnVFPtr, CharUnits vbptroffset, CharUnits datasize, const uint64_t *fieldoffsets, unsigned fieldcount, @@ -53,8 +53,8 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, const CXXRecordDecl *PrimaryBase, bool IsPrimaryBaseVirtual, const BaseOffsetsMapTy& BaseOffsets, - const BaseOffsetsMapTy& VBaseOffsets) - : Size(size), DataSize(datasize), FieldOffsets(0), Alignment(alignment), + const VBaseOffsetsMapTy& VBaseOffsets) + : Size(size), DataSize(datasize), Alignment(alignment), FieldOffsets(0), FieldCount(fieldcount), CXXInfo(new (Ctx) CXXRecordLayoutInfo) { if (FieldCount > 0) { @@ -69,7 +69,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, CXXInfo->SizeOfLargestEmptySubobject = SizeOfLargestEmptySubobject; CXXInfo->BaseOffsets = BaseOffsets; CXXInfo->VBaseOffsets = VBaseOffsets; - CXXInfo->VFPtrOffset = vfptroffset; + CXXInfo->HasOwnVFPtr = hasOwnVFPtr; CXXInfo->VBPtrOffset = vbptroffset; #ifndef NDEBUG @@ -81,7 +81,7 @@ ASTRecordLayout::ASTRecordLayout(const ASTContext &Ctx, "Primary virtual base must be at offset 0!"); } } else { - assert(getBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(getBaseClassOffset(PrimaryBase).isZero() && "Primary base must be at offset 0!"); } } diff --git a/lib/AST/RecordLayoutBuilder.cpp b/lib/AST/RecordLayoutBuilder.cpp index c2d9294..d5df63f 100644 --- a/lib/AST/RecordLayoutBuilder.cpp +++ b/lib/AST/RecordLayoutBuilder.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" @@ -161,10 +162,9 @@ void EmptySubobjectMap::ComputeEmptySubobjectSizes() { // Check the fields. for (CXXRecordDecl::field_iterator I = Class->field_begin(), E = Class->field_end(); I != E; ++I) { - const FieldDecl *FD = *I; const RecordType *RT = - Context.getBaseElementType(FD->getType())->getAs(); + Context.getBaseElementType(I->getType())->getAs(); // We only care about record types. if (!RT) @@ -261,12 +261,11 @@ EmptySubobjectMap::CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl *FD = *I; - if (FD->isBitField()) + if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - if (!CanPlaceFieldSubobjectAtOffset(FD, FieldOffset)) + if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } @@ -310,12 +309,11 @@ void EmptySubobjectMap::UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl *FD = *I; - if (FD->isBitField()) + if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - UpdateEmptyFieldSubobjects(FD, FieldOffset); + UpdateEmptyFieldSubobjects(*I, FieldOffset); } } @@ -380,13 +378,12 @@ EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl *FD = *I; - if (FD->isBitField()) + if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - if (!CanPlaceFieldSubobjectAtOffset(FD, FieldOffset)) + if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } @@ -491,13 +488,12 @@ void EmptySubobjectMap::UpdateEmptyFieldSubobjects(const CXXRecordDecl *RD, unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl *FD = *I; - if (FD->isBitField()) + if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); - UpdateEmptyFieldSubobjects(FD, FieldOffset); + UpdateEmptyFieldSubobjects(*I, FieldOffset); } } @@ -538,6 +534,8 @@ void EmptySubobjectMap::UpdateEmptyFieldSubobjects(const FieldDecl *FD, } } +typedef llvm::SmallPtrSet ClassSetTy; + class RecordLayoutBuilder { protected: // FIXME: Remove this and make the appropriate fields public. @@ -600,8 +598,9 @@ protected: /// out is virtual. bool PrimaryBaseIsVirtual; - /// VFPtrOffset - Virtual function table offset. Only for MS layout. - CharUnits VFPtrOffset; + /// HasOwnVFPtr - Whether the class provides its own vtable/vftbl + /// pointer, as opposed to inheriting one from a primary base class. + bool HasOwnVFPtr; /// VBPtrOffset - Virtual base table offset. Only for MS layout. CharUnits VBPtrOffset; @@ -612,7 +611,7 @@ protected: BaseOffsetsMapTy Bases; // VBases - virtual base classes and their offsets in the record. - BaseOffsetsMapTy VBases; + ASTRecordLayout::VBaseOffsetsMapTy VBases; /// IndirectPrimaryBases - Virtual base classes, direct or indirect, that are /// primary base classes for some other direct or indirect base class. @@ -652,7 +651,7 @@ protected: NonVirtualAlignment(CharUnits::One()), ZeroLengthBitfield(0), PrimaryBase(0), PrimaryBaseIsVirtual(false), - VFPtrOffset(CharUnits::fromQuantity(-1)), + HasOwnVFPtr(false), VBPtrOffset(CharUnits::fromQuantity(-1)), FirstNearlyEmptyVBase(0) { } @@ -725,15 +724,20 @@ protected: CharUnits Offset); bool needsVFTable(const CXXRecordDecl *RD) const; - bool hasNewVirtualFunction(const CXXRecordDecl *RD) const; + bool hasNewVirtualFunction(const CXXRecordDecl *RD, + bool IgnoreDestructor = false) const; bool isPossiblePrimaryBase(const CXXRecordDecl *Base) const; + void computeVtordisps(const CXXRecordDecl *RD, + ClassSetTy &VtordispVBases); + /// LayoutVirtualBases - Lays out all the virtual bases. void LayoutVirtualBases(const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass); /// LayoutVirtualBase - Lays out a single virtual base. - void LayoutVirtualBase(const BaseSubobjectInfo *Base); + void LayoutVirtualBase(const BaseSubobjectInfo *Base, + bool IsVtordispNeed = false); /// LayoutBase - Will lay out a base and return the offset where it was /// placed, in chars. @@ -1044,8 +1048,7 @@ RecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD) { CharUnits PtrAlign = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0)); EnsureVTablePointerAlignment(PtrAlign); - if (isMicrosoftCXXABI()) - VFPtrOffset = getSize(); + HasOwnVFPtr = true; setSize(getSize() + PtrWidth); setDataSize(getSize()); } @@ -1142,7 +1145,7 @@ RecordLayoutBuilder::AddPrimaryVirtualBaseOffsets(const BaseSubobjectInfo *Info, assert(!VBases.count(Info->PrimaryVirtualBaseInfo->Class) && "primary vbase offset already exists!"); VBases.insert(std::make_pair(Info->PrimaryVirtualBaseInfo->Class, - Offset)); + ASTRecordLayout::VBaseInfo(Offset, false))); // Traverse the primary virtual base. AddPrimaryVirtualBaseOffsets(Info->PrimaryVirtualBaseInfo, Offset); @@ -1193,19 +1196,177 @@ bool RecordLayoutBuilder::needsVFTable(const CXXRecordDecl *RD) const { return hasNewVirtualFunction(RD); } +/// Does the given class inherit non-virtually from any of the classes +/// in the given set? +static bool hasNonVirtualBaseInSet(const CXXRecordDecl *RD, + const ClassSetTy &set) { + for (CXXRecordDecl::base_class_const_iterator + I = RD->bases_begin(), E = RD->bases_end(); I != E; ++I) { + // Ignore virtual links. + if (I->isVirtual()) continue; + + // Check whether the set contains the base. + const CXXRecordDecl *base = I->getType()->getAsCXXRecordDecl(); + if (set.count(base)) + return true; + + // Otherwise, recurse and propagate. + if (hasNonVirtualBaseInSet(base, set)) + return true; + } + + return false; +} + +/// Does the given method (B::foo()) already override a method (A::foo()) +/// such that A requires a vtordisp in B? If so, we don't need to add a +/// new vtordisp for B in a yet-more-derived class C providing C::foo(). +static bool overridesMethodRequiringVtorDisp(const ASTContext &Context, + const CXXMethodDecl *M) { + CXXMethodDecl::method_iterator + I = M->begin_overridden_methods(), E = M->end_overridden_methods(); + if (I == E) return false; + + const ASTRecordLayout::VBaseOffsetsMapTy &offsets = + Context.getASTRecordLayout(M->getParent()).getVBaseOffsetsMap(); + do { + const CXXMethodDecl *overridden = *I; + + // If the overridden method's class isn't recognized as a virtual + // base in the derived class, ignore it. + ASTRecordLayout::VBaseOffsetsMapTy::const_iterator + it = offsets.find(overridden->getParent()); + if (it == offsets.end()) continue; + + // Otherwise, check if the overridden method's class needs a vtordisp. + if (it->second.hasVtorDisp()) return true; + + } while (++I != E); + return false; +} + +/// In the Microsoft ABI, decide which of the virtual bases require a +/// vtordisp field. +void RecordLayoutBuilder::computeVtordisps(const CXXRecordDecl *RD, + ClassSetTy &vtordispVBases) { + // Bail out if we have no virtual bases. + assert(RD->getNumVBases()); + + // Build up the set of virtual bases that we haven't decided yet. + ClassSetTy undecidedVBases; + for (CXXRecordDecl::base_class_const_iterator + I = RD->vbases_begin(), E = RD->vbases_end(); I != E; ++I) { + const CXXRecordDecl *vbase = I->getType()->getAsCXXRecordDecl(); + undecidedVBases.insert(vbase); + } + assert(!undecidedVBases.empty()); + + // A virtual base requires a vtordisp field in a derived class if it + // requires a vtordisp field in a base class. Walk all the direct + // bases and collect this information. + for (CXXRecordDecl::base_class_const_iterator I = RD->bases_begin(), + E = RD->bases_end(); I != E; ++I) { + const CXXRecordDecl *base = I->getType()->getAsCXXRecordDecl(); + const ASTRecordLayout &baseLayout = Context.getASTRecordLayout(base); + + // Iterate over the set of virtual bases provided by this class. + for (ASTRecordLayout::VBaseOffsetsMapTy::const_iterator + VI = baseLayout.getVBaseOffsetsMap().begin(), + VE = baseLayout.getVBaseOffsetsMap().end(); VI != VE; ++VI) { + // If it doesn't need a vtordisp in this base, ignore it. + if (!VI->second.hasVtorDisp()) continue; + + // If we've already seen it and decided it needs a vtordisp, ignore it. + if (!undecidedVBases.erase(VI->first)) + continue; + + // Add it. + vtordispVBases.insert(VI->first); + + // Quit as soon as we've decided everything. + if (undecidedVBases.empty()) + return; + } + } + + // Okay, we have virtual bases that we haven't yet decided about. A + // virtual base requires a vtordisp if any the non-destructor + // virtual methods declared in this class directly override a method + // provided by that virtual base. (If so, we need to emit a thunk + // for that method, to be used in the construction vftable, which + // applies an additional 'vtordisp' this-adjustment.) + + // Collect the set of bases directly overridden by any method in this class. + // It's possible that some of these classes won't be virtual bases, or won't be + // provided by virtual bases, or won't be virtual bases in the overridden + // instance but are virtual bases elsewhere. Only the last matters for what + // we're doing, and we can ignore those: if we don't directly override + // a method provided by a virtual copy of a base class, but we do directly + // override a method provided by a non-virtual copy of that base class, + // then we must indirectly override the method provided by the virtual base, + // and so we should already have collected it in the loop above. + ClassSetTy overriddenBases; + for (CXXRecordDecl::method_iterator + M = RD->method_begin(), E = RD->method_end(); M != E; ++M) { + // Ignore non-virtual methods and destructors. + if (isa(*M) || !M->isVirtual()) + continue; + + for (CXXMethodDecl::method_iterator I = M->begin_overridden_methods(), + E = M->end_overridden_methods(); I != E; ++I) { + const CXXMethodDecl *overriddenMethod = (*I); + + // Ignore methods that override methods from vbases that require + // require vtordisps. + if (overridesMethodRequiringVtorDisp(Context, overriddenMethod)) + continue; + + // As an optimization, check immediately whether we're overriding + // something from the undecided set. + const CXXRecordDecl *overriddenBase = overriddenMethod->getParent(); + if (undecidedVBases.erase(overriddenBase)) { + vtordispVBases.insert(overriddenBase); + if (undecidedVBases.empty()) return; + + // We can't 'continue;' here because one of our undecided + // vbases might non-virtually inherit from this base. + // Consider: + // struct A { virtual void foo(); }; + // struct B : A {}; + // struct C : virtual A, virtual B { virtual void foo(); }; + // We need a vtordisp for B here. + } + + // Otherwise, just collect it. + overriddenBases.insert(overriddenBase); + } + } + + // Walk the undecided v-bases and check whether they (non-virtually) + // provide any of the overridden bases. We don't need to consider + // virtual links because the vtordisp inheres to the layout + // subobject containing the base. + for (ClassSetTy::const_iterator + I = undecidedVBases.begin(), E = undecidedVBases.end(); I != E; ++I) { + if (hasNonVirtualBaseInSet(*I, overriddenBases)) + vtordispVBases.insert(*I); + } +} + /// hasNewVirtualFunction - Does the given polymorphic class declare a /// virtual function that does not override a method from any of its /// base classes? bool -RecordLayoutBuilder::hasNewVirtualFunction(const CXXRecordDecl *RD) const { - assert(RD->isPolymorphic()); +RecordLayoutBuilder::hasNewVirtualFunction(const CXXRecordDecl *RD, + bool IgnoreDestructor) const { if (!RD->getNumBases()) return true; for (CXXRecordDecl::method_iterator method = RD->method_begin(); method != RD->method_end(); ++method) { - if (method->isVirtual() && !method->size_overridden_methods()) { + if (method->isVirtual() && !method->size_overridden_methods() && + !(IgnoreDestructor && method->getKind() == Decl::CXXDestructor)) { return true; } } @@ -1215,11 +1376,11 @@ RecordLayoutBuilder::hasNewVirtualFunction(const CXXRecordDecl *RD) const { /// isPossiblePrimaryBase - Is the given base class an acceptable /// primary base class? bool -RecordLayoutBuilder::isPossiblePrimaryBase(const CXXRecordDecl *Base) const { +RecordLayoutBuilder::isPossiblePrimaryBase(const CXXRecordDecl *base) const { // In the Itanium ABI, a class can be a primary base class if it has // a vtable for any reason. if (!isMicrosoftCXXABI()) - return Base->isDynamicClass(); + return base->isDynamicClass(); // In the MS ABI, a class can only be a primary base class if it // provides a vf-table at a static offset. That means it has to be @@ -1228,14 +1389,22 @@ RecordLayoutBuilder::isPossiblePrimaryBase(const CXXRecordDecl *Base) const { // base, which we have to guard against. // First off, it has to have virtual functions. - if (!Base->isPolymorphic()) return false; + if (!base->isPolymorphic()) return false; + + // If it has no virtual bases, then the vfptr must be at a static offset. + if (!base->getNumVBases()) return true; + + // Otherwise, the necessary information is cached in the layout. + const ASTRecordLayout &layout = Context.getASTRecordLayout(base); + + // If the base has its own vfptr, it can be a primary base. + if (layout.hasOwnVFPtr()) return true; - // If it has no virtual bases, then everything is at a static offset. - if (!Base->getNumVBases()) return true; + // If the base has a primary base class, then it can be a primary base. + if (layout.getPrimaryBase()) return true; - // Okay, just ask the base class's layout. - return (Context.getASTRecordLayout(Base).getVFPtrOffset() - != CharUnits::fromQuantity(-1)); + // Otherwise it can't. + return false; } void @@ -1288,10 +1457,12 @@ RecordLayoutBuilder::LayoutVirtualBases(const CXXRecordDecl *RD, } void RecordLayoutBuilder::MSLayoutVirtualBases(const CXXRecordDecl *RD) { - if (!RD->getNumVBases()) return; + ClassSetTy VtordispVBases; + computeVtordisps(RD, VtordispVBases); + // This is substantially simplified because there are no virtual // primary bases. for (CXXRecordDecl::base_class_const_iterator I = RD->vbases_begin(), @@ -1299,12 +1470,25 @@ void RecordLayoutBuilder::MSLayoutVirtualBases(const CXXRecordDecl *RD) { const CXXRecordDecl *BaseDecl = I->getType()->getAsCXXRecordDecl(); const BaseSubobjectInfo *BaseInfo = VirtualBaseInfo.lookup(BaseDecl); assert(BaseInfo && "Did not find virtual base info!"); - - LayoutVirtualBase(BaseInfo); + + // If this base requires a vtordisp, add enough space for an int field. + // This is apparently always 32-bits, even on x64. + bool vtordispNeeded = false; + if (VtordispVBases.count(BaseDecl)) { + CharUnits IntSize = + CharUnits::fromQuantity(Context.getTargetInfo().getIntWidth() / 8); + + setSize(getSize() + IntSize); + setDataSize(getSize()); + vtordispNeeded = true; + } + + LayoutVirtualBase(BaseInfo, vtordispNeeded); } } -void RecordLayoutBuilder::LayoutVirtualBase(const BaseSubobjectInfo *Base) { +void RecordLayoutBuilder::LayoutVirtualBase(const BaseSubobjectInfo *Base, + bool IsVtordispNeed) { assert(!Base->Derived && "Trying to lay out a primary virtual base!"); // Layout the base. @@ -1312,9 +1496,11 @@ void RecordLayoutBuilder::LayoutVirtualBase(const BaseSubobjectInfo *Base) { // Add its base class offset. assert(!VBases.count(Base->Class) && "vbase offset already exists!"); - VBases.insert(std::make_pair(Base->Class, Offset)); - - AddPrimaryVirtualBaseOffsets(Base, Offset); + VBases.insert(std::make_pair(Base->Class, + ASTRecordLayout::VBaseInfo(Offset, IsVtordispNeed))); + + if (!isMicrosoftCXXABI()) + AddPrimaryVirtualBaseOffsets(Base, Offset); } CharUnits RecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) { @@ -1461,8 +1647,8 @@ void RecordLayoutBuilder::Layout(const CXXRecordDecl *RD) { Context.getTargetInfo().getCharAlign())); NonVirtualAlignment = Alignment; - if (isMicrosoftCXXABI() && - NonVirtualSize != NonVirtualSize.RoundUpToAlignment(Alignment)) { + if (isMicrosoftCXXABI()) { + if (NonVirtualSize != NonVirtualSize.RoundUpToAlignment(Alignment)) { CharUnits AlignMember = NonVirtualSize.RoundUpToAlignment(Alignment) - NonVirtualSize; @@ -1472,9 +1658,9 @@ void RecordLayoutBuilder::Layout(const CXXRecordDecl *RD) { NonVirtualSize = Context.toCharUnitsFromBits( llvm::RoundUpToAlignment(getSizeInBits(), Context.getTargetInfo().getCharAlign())); + } MSLayoutVirtualBases(RD); - } else { // Lay out the virtual bases and add the primary virtual base offsets. LayoutVirtualBases(RD, RD); @@ -1540,7 +1726,7 @@ void RecordLayoutBuilder::LayoutFields(const RecordDecl *D) { for (RecordDecl::field_iterator Field = D->field_begin(), FieldEnd = D->field_end(); Field != FieldEnd; ++Field) { if (IsMsStruct) { - FieldDecl *FD = (*Field); + FieldDecl *FD = *Field; if (Context.ZeroBitfieldFollowsBitfield(FD, LastFD)) ZeroLengthBitfield = FD; // Zero-length bitfields following non-bitfield members are @@ -1635,9 +1821,8 @@ void RecordLayoutBuilder::LayoutFields(const RecordDecl *D) { } else if (!Context.getTargetInfo().useBitFieldTypeAlignment() && Context.getTargetInfo().useZeroLengthBitfieldAlignment()) { - FieldDecl *FD = (*Field); - if (FD->isBitField() && FD->getBitWidthValue(Context) == 0) - ZeroLengthBitfield = FD; + if (Field->isBitField() && Field->getBitWidthValue(Context) == 0) + ZeroLengthBitfield = *Field; } LayoutField(*Field); } @@ -2166,6 +2351,10 @@ RecordLayoutBuilder::ComputeKeyFunction(const CXXRecordDecl *RD) { if (MD->hasInlineBody()) continue; + // Ignore inline deleted or defaulted functions. + if (!MD->isUserProvided()) + continue; + // We found it. return MD; } @@ -2238,7 +2427,7 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const { NewEntry = new (*this) ASTRecordLayout(*this, Builder.getSize(), Builder.Alignment, - Builder.VFPtrOffset, + Builder.HasOwnVFPtr, Builder.VBPtrOffset, DataSize, Builder.FieldOffsets.data(), @@ -2375,7 +2564,7 @@ static void DumpCXXRecordLayout(raw_ostream &OS, IndentLevel++; const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase(); - bool HasVfptr = Layout.getVFPtrOffset() != CharUnits::fromQuantity(-1); + bool HasVfptr = Layout.hasOwnVFPtr(); bool HasVbptr = Layout.getVBPtrOffset() != CharUnits::fromQuantity(-1); // Vtable pointer. @@ -2405,7 +2594,7 @@ static void DumpCXXRecordLayout(raw_ostream &OS, // vfptr and vbptr (for Microsoft C++ ABI) if (HasVfptr) { - PrintOffset(OS, Offset + Layout.getVFPtrOffset(), IndentLevel); + PrintOffset(OS, Offset, IndentLevel); OS << '(' << *RD << " vftable pointer)\n"; } if (HasVbptr) { @@ -2417,27 +2606,29 @@ static void DumpCXXRecordLayout(raw_ostream &OS, uint64_t FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { - const FieldDecl *Field = *I; + const FieldDecl &Field = **I; CharUnits FieldOffset = Offset + C.toCharUnitsFromBits(Layout.getFieldOffset(FieldNo)); - if (const RecordType *RT = Field->getType()->getAs()) { + if (const RecordType *RT = Field.getType()->getAs()) { if (const CXXRecordDecl *D = dyn_cast(RT->getDecl())) { DumpCXXRecordLayout(OS, D, C, FieldOffset, IndentLevel, - Field->getName().data(), + Field.getName().data(), /*IncludeVirtualBases=*/true); continue; } } PrintOffset(OS, FieldOffset, IndentLevel); - OS << Field->getType().getAsString() << ' ' << *Field << '\n'; + OS << Field.getType().getAsString() << ' ' << Field << '\n'; } if (!IncludeVirtualBases) return; // Dump virtual bases. + const ASTRecordLayout::VBaseOffsetsMapTy &vtordisps = + Layout.getVBaseOffsetsMap(); for (CXXRecordDecl::base_class_const_iterator I = RD->vbases_begin(), E = RD->vbases_end(); I != E; ++I) { assert(I->isVirtual() && "Found non-virtual class!"); @@ -2445,6 +2636,12 @@ static void DumpCXXRecordLayout(raw_ostream &OS, cast(I->getType()->getAs()->getDecl()); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBase); + + if (vtordisps.find(VBase)->second.hasVtorDisp()) { + PrintOffset(OS, VBaseOffset - CharUnits::fromQuantity(4), IndentLevel); + OS << "(vtordisp for vbase " << *VBase << ")\n"; + } + DumpCXXRecordLayout(OS, VBase, C, VBaseOffset, IndentLevel, VBase == PrimaryBase ? "(primary virtual base)" : "(virtual base)", diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp index e4d9f0a..d877c3f 100644 --- a/lib/AST/Stmt.cpp +++ b/lib/AST/Stmt.cpp @@ -244,6 +244,22 @@ SourceLocation Stmt::getLocEnd() const { llvm_unreachable("unknown statement kind"); } +CompoundStmt::CompoundStmt(ASTContext &C, Stmt **StmtStart, unsigned NumStmts, + SourceLocation LB, SourceLocation RB) + : Stmt(CompoundStmtClass), LBracLoc(LB), RBracLoc(RB) { + CompoundStmtBits.NumStmts = NumStmts; + assert(CompoundStmtBits.NumStmts == NumStmts && + "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!"); + + if (NumStmts == 0) { + Body = 0; + return; + } + + Body = new (C) Stmt*[NumStmts]; + memcpy(Body, StmtStart, NumStmts * sizeof(*Body)); +} + void CompoundStmt::setStmts(ASTContext &C, Stmt **Stmts, unsigned NumStmts) { if (this->Body) C.Deallocate(Body); @@ -257,6 +273,23 @@ const char *LabelStmt::getName() const { return getDecl()->getIdentifier()->getNameStart(); } +AttributedStmt *AttributedStmt::Create(ASTContext &C, SourceLocation Loc, + ArrayRef Attrs, + Stmt *SubStmt) { + void *Mem = C.Allocate(sizeof(AttributedStmt) + + sizeof(Attr*) * (Attrs.size() - 1), + llvm::alignOf()); + return new (Mem) AttributedStmt(Loc, Attrs, SubStmt); +} + +AttributedStmt *AttributedStmt::CreateEmpty(ASTContext &C, unsigned NumAttrs) { + assert(NumAttrs > 0 && "NumAttrs should be greater than zero"); + void *Mem = C.Allocate(sizeof(AttributedStmt) + + sizeof(Attr*) * (NumAttrs - 1), + llvm::alignOf()); + return new (Mem) AttributedStmt(EmptyShell(), NumAttrs); +} + // This is defined here to avoid polluting Stmt.h with importing Expr.h SourceRange ReturnStmt::getSourceRange() const { if (RetExpr) @@ -328,7 +361,7 @@ void AsmStmt::setOutputsAndInputsAndClobbers(ASTContext &C, StringLiteral **Constraints, Stmt **Exprs, unsigned NumOutputs, - unsigned NumInputs, + unsigned NumInputs, StringLiteral **Clobbers, unsigned NumClobbers) { this->NumOutputs = NumOutputs; @@ -336,19 +369,19 @@ void AsmStmt::setOutputsAndInputsAndClobbers(ASTContext &C, this->NumClobbers = NumClobbers; unsigned NumExprs = NumOutputs + NumInputs; - + C.Deallocate(this->Names); this->Names = new (C) IdentifierInfo*[NumExprs]; std::copy(Names, Names + NumExprs, this->Names); - + C.Deallocate(this->Exprs); this->Exprs = new (C) Stmt*[NumExprs]; std::copy(Exprs, Exprs + NumExprs, this->Exprs); - + C.Deallocate(this->Constraints); this->Constraints = new (C) StringLiteral*[NumExprs]; std::copy(Constraints, Constraints + NumExprs, this->Constraints); - + C.Deallocate(this->Clobbers); this->Clobbers = new (C) StringLiteral*[NumClobbers]; std::copy(Clobbers, Clobbers + NumClobbers, this->Clobbers); @@ -407,7 +440,7 @@ unsigned AsmStmt::AnalyzeAsmString(SmallVectorImpl&Pieces, std::string CurStringPiece; bool HasVariants = !C.getTargetInfo().hasNoAsmVariants(); - + while (1) { // Done with the string? if (CurPtr == StrEnd) { @@ -428,7 +461,7 @@ unsigned AsmStmt::AnalyzeAsmString(SmallVectorImpl&Pieces, CurStringPiece += CurChar; continue; } - + // Escaped "%" character in asm string. if (CurPtr == StrEnd) { // % at end of string is invalid (no escape). @@ -525,8 +558,8 @@ QualType CXXCatchStmt::getCaughtType() const { // Constructors //===----------------------------------------------------------------------===// -AsmStmt::AsmStmt(ASTContext &C, SourceLocation asmloc, bool issimple, - bool isvolatile, bool msasm, +AsmStmt::AsmStmt(ASTContext &C, SourceLocation asmloc, bool issimple, + bool isvolatile, bool msasm, unsigned numoutputs, unsigned numinputs, IdentifierInfo **names, StringLiteral **constraints, Expr **exprs, StringLiteral *asmstr, unsigned numclobbers, @@ -535,8 +568,8 @@ AsmStmt::AsmStmt(ASTContext &C, SourceLocation asmloc, bool issimple, , IsSimple(issimple), IsVolatile(isvolatile), MSAsm(msasm) , NumOutputs(numoutputs), NumInputs(numinputs), NumClobbers(numclobbers) { - unsigned NumExprs = NumOutputs +NumInputs; - + unsigned NumExprs = NumOutputs + NumInputs; + Names = new (C) IdentifierInfo*[NumExprs]; std::copy(names, names + NumExprs, Names); @@ -550,6 +583,33 @@ AsmStmt::AsmStmt(ASTContext &C, SourceLocation asmloc, bool issimple, std::copy(clobbers, clobbers + NumClobbers, Clobbers); } +MSAsmStmt::MSAsmStmt(ASTContext &C, SourceLocation asmloc, + bool issimple, bool isvolatile, ArrayRef asmtoks, + ArrayRef lineends, StringRef asmstr, + ArrayRef clobbers, SourceLocation endloc) + : Stmt(MSAsmStmtClass), AsmLoc(asmloc), EndLoc(endloc), + AsmStr(asmstr.str()), IsSimple(issimple), IsVolatile(isvolatile), + NumAsmToks(asmtoks.size()), NumLineEnds(lineends.size()), + NumClobbers(clobbers.size()) { + + AsmToks = new (C) Token[NumAsmToks]; + for (unsigned i = 0, e = NumAsmToks; i != e; ++i) + AsmToks[i] = asmtoks[i]; + + LineEnds = new (C) unsigned[NumLineEnds]; + for (unsigned i = 0, e = NumLineEnds; i != e; ++i) + LineEnds[i] = lineends[i]; + + Clobbers = new (C) StringRef[NumClobbers]; + for (unsigned i = 0, e = NumClobbers; i != e; ++i) { + // FIXME: Avoid the allocation/copy if at all possible. + size_t size = clobbers[i].size(); + char *dest = new (C) char[size]; + std::strncpy(dest, clobbers[i].data(), size); + Clobbers[i] = StringRef(dest, size); + } +} + ObjCForCollectionStmt::ObjCForCollectionStmt(Stmt *Elem, Expr *Collect, Stmt *Body, SourceLocation FCL, SourceLocation RPL) @@ -571,31 +631,31 @@ ObjCAtTryStmt::ObjCAtTryStmt(SourceLocation atTryLoc, Stmt *atTryStmt, Stmts[0] = atTryStmt; for (unsigned I = 0; I != NumCatchStmts; ++I) Stmts[I + 1] = CatchStmts[I]; - + if (HasFinally) Stmts[NumCatchStmts + 1] = atFinallyStmt; } -ObjCAtTryStmt *ObjCAtTryStmt::Create(ASTContext &Context, - SourceLocation atTryLoc, +ObjCAtTryStmt *ObjCAtTryStmt::Create(ASTContext &Context, + SourceLocation atTryLoc, Stmt *atTryStmt, - Stmt **CatchStmts, + Stmt **CatchStmts, unsigned NumCatchStmts, Stmt *atFinallyStmt) { - unsigned Size = sizeof(ObjCAtTryStmt) + + unsigned Size = sizeof(ObjCAtTryStmt) + (1 + NumCatchStmts + (atFinallyStmt != 0)) * sizeof(Stmt *); void *Mem = Context.Allocate(Size, llvm::alignOf()); return new (Mem) ObjCAtTryStmt(atTryLoc, atTryStmt, CatchStmts, NumCatchStmts, atFinallyStmt); } -ObjCAtTryStmt *ObjCAtTryStmt::CreateEmpty(ASTContext &Context, +ObjCAtTryStmt *ObjCAtTryStmt::CreateEmpty(ASTContext &Context, unsigned NumCatchStmts, bool HasFinally) { - unsigned Size = sizeof(ObjCAtTryStmt) + + unsigned Size = sizeof(ObjCAtTryStmt) + (1 + NumCatchStmts + HasFinally) * sizeof(Stmt *); void *Mem = Context.Allocate(Size, llvm::alignOf()); - return new (Mem) ObjCAtTryStmt(EmptyShell(), NumCatchStmts, HasFinally); + return new (Mem) ObjCAtTryStmt(EmptyShell(), NumCatchStmts, HasFinally); } SourceRange ObjCAtTryStmt::getSourceRange() const { @@ -606,12 +666,12 @@ SourceRange ObjCAtTryStmt::getSourceRange() const { EndLoc = getCatchStmt(NumCatchStmts - 1)->getLocEnd(); else EndLoc = getTryBody()->getLocEnd(); - + return SourceRange(AtTryLoc, EndLoc); } CXXTryStmt *CXXTryStmt::Create(ASTContext &C, SourceLocation tryLoc, - Stmt *tryBlock, Stmt **handlers, + Stmt *tryBlock, Stmt **handlers, unsigned numHandlers) { std::size_t Size = sizeof(CXXTryStmt); Size += ((numHandlers + 1) * sizeof(Stmt)); @@ -671,20 +731,20 @@ const VarDecl *CXXForRangeStmt::getLoopVariable() const { return const_cast(this)->getLoopVariable(); } -IfStmt::IfStmt(ASTContext &C, SourceLocation IL, VarDecl *var, Expr *cond, +IfStmt::IfStmt(ASTContext &C, SourceLocation IL, VarDecl *var, Expr *cond, Stmt *then, SourceLocation EL, Stmt *elsev) : Stmt(IfStmtClass), IfLoc(IL), ElseLoc(EL) { setConditionVariable(C, var); SubExprs[COND] = reinterpret_cast(cond); SubExprs[THEN] = then; - SubExprs[ELSE] = elsev; + SubExprs[ELSE] = elsev; } VarDecl *IfStmt::getConditionVariable() const { if (!SubExprs[VAR]) return 0; - + DeclStmt *DS = cast(SubExprs[VAR]); return cast(DS->getSingleDecl()); } @@ -694,16 +754,16 @@ void IfStmt::setConditionVariable(ASTContext &C, VarDecl *V) { SubExprs[VAR] = 0; return; } - + SourceRange VarRange = V->getSourceRange(); SubExprs[VAR] = new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), VarRange.getEnd()); } -ForStmt::ForStmt(ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, - Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP, +ForStmt::ForStmt(ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, + Expr *Inc, Stmt *Body, SourceLocation FL, SourceLocation LP, SourceLocation RP) - : Stmt(ForStmtClass), ForLoc(FL), LParenLoc(LP), RParenLoc(RP) + : Stmt(ForStmtClass), ForLoc(FL), LParenLoc(LP), RParenLoc(RP) { SubExprs[INIT] = Init; setConditionVariable(C, condVar); @@ -715,7 +775,7 @@ ForStmt::ForStmt(ASTContext &C, Stmt *Init, Expr *Cond, VarDecl *condVar, VarDecl *ForStmt::getConditionVariable() const { if (!SubExprs[CONDVAR]) return 0; - + DeclStmt *DS = cast(SubExprs[CONDVAR]); return cast(DS->getSingleDecl()); } @@ -725,14 +785,14 @@ void ForStmt::setConditionVariable(ASTContext &C, VarDecl *V) { SubExprs[CONDVAR] = 0; return; } - + SourceRange VarRange = V->getSourceRange(); SubExprs[CONDVAR] = new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), VarRange.getEnd()); } -SwitchStmt::SwitchStmt(ASTContext &C, VarDecl *Var, Expr *cond) - : Stmt(SwitchStmtClass), FirstCase(0), AllEnumCasesCovered(0) +SwitchStmt::SwitchStmt(ASTContext &C, VarDecl *Var, Expr *cond) + : Stmt(SwitchStmtClass), FirstCase(0), AllEnumCasesCovered(0) { setConditionVariable(C, Var); SubExprs[COND] = reinterpret_cast(cond); @@ -742,7 +802,7 @@ SwitchStmt::SwitchStmt(ASTContext &C, VarDecl *Var, Expr *cond) VarDecl *SwitchStmt::getConditionVariable() const { if (!SubExprs[VAR]) return 0; - + DeclStmt *DS = cast(SubExprs[VAR]); return cast(DS->getSingleDecl()); } @@ -752,7 +812,7 @@ void SwitchStmt::setConditionVariable(ASTContext &C, VarDecl *V) { SubExprs[VAR] = 0; return; } - + SourceRange VarRange = V->getSourceRange(); SubExprs[VAR] = new (C) DeclStmt(DeclGroupRef(V), VarRange.getBegin(), VarRange.getEnd()); @@ -764,7 +824,7 @@ Stmt *SwitchCase::getSubStmt() { return cast(this)->getSubStmt(); } -WhileStmt::WhileStmt(ASTContext &C, VarDecl *Var, Expr *cond, Stmt *body, +WhileStmt::WhileStmt(ASTContext &C, VarDecl *Var, Expr *cond, Stmt *body, SourceLocation WL) : Stmt(WhileStmtClass) { setConditionVariable(C, Var); @@ -776,7 +836,7 @@ WhileStmt::WhileStmt(ASTContext &C, VarDecl *Var, Expr *cond, Stmt *body, VarDecl *WhileStmt::getConditionVariable() const { if (!SubExprs[VAR]) return 0; - + DeclStmt *DS = cast(SubExprs[VAR]); return cast(DS->getSingleDecl()); } diff --git a/lib/AST/StmtDumper.cpp b/lib/AST/StmtDumper.cpp index b5e298c..962e352 100644 --- a/lib/AST/StmtDumper.cpp +++ b/lib/AST/StmtDumper.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/StmtVisitor.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/PrettyPrinter.h" @@ -166,6 +167,7 @@ namespace { void VisitObjCAtCatchStmt(ObjCAtCatchStmt *Node); void VisitObjCEncodeExpr(ObjCEncodeExpr *Node); void VisitObjCMessageExpr(ObjCMessageExpr* Node); + void VisitObjCBoxedExpr(ObjCBoxedExpr* Node); void VisitObjCSelectorExpr(ObjCSelectorExpr *Node); void VisitObjCProtocolExpr(ObjCProtocolExpr *Node); void VisitObjCPropertyRefExpr(ObjCPropertyRefExpr *Node); @@ -423,6 +425,7 @@ void StmtDumper::VisitPredefinedExpr(PredefinedExpr *Node) { default: llvm_unreachable("unknown case"); case PredefinedExpr::Func: OS << " __func__"; break; case PredefinedExpr::Function: OS << " __FUNCTION__"; break; + case PredefinedExpr::LFunction: OS << " L__FUNCTION__"; break; case PredefinedExpr::PrettyFunction: OS << " __PRETTY_FUNCTION__";break; } } @@ -445,18 +448,8 @@ void StmtDumper::VisitFloatingLiteral(FloatingLiteral *Node) { void StmtDumper::VisitStringLiteral(StringLiteral *Str) { DumpExpr(Str); - // FIXME: this doesn't print wstrings right. OS << " "; - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // No prefix - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - OS.write_escaped(Str->getString()); - OS << '"'; + Str->outputString(OS); } void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) { @@ -471,7 +464,7 @@ void StmtDumper::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { OS << " sizeof "; break; case UETT_AlignOf: - OS << " __alignof "; + OS << " alignof "; break; case UETT_VecStep: OS << " vec_step "; @@ -637,6 +630,11 @@ void StmtDumper::VisitObjCMessageExpr(ObjCMessageExpr* Node) { } } +void StmtDumper::VisitObjCBoxedExpr(ObjCBoxedExpr* Node) { + DumpExpr(Node); + OS << " selector=" << Node->getBoxingMethod()->getSelector().getAsString(); +} + void StmtDumper::VisitObjCAtCatchStmt(ObjCAtCatchStmt *Node) { DumpStmt(Node); if (VarDecl *CatchParam = Node->getCatchParamDecl()) { diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp index 0d1066b..2f7cb55 100644 --- a/lib/AST/StmtPrinter.cpp +++ b/lib/AST/StmtPrinter.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" @@ -172,9 +173,9 @@ void StmtPrinter::VisitLabelStmt(LabelStmt *Node) { void StmtPrinter::VisitAttributedStmt(AttributedStmt *Node) { OS << "[["; bool first = true; - for (AttrVec::const_iterator it = Node->getAttrs().begin(), - end = Node->getAttrs().end(); - it != end; ++it) { + for (ArrayRef::iterator it = Node->getAttrs().begin(), + end = Node->getAttrs().end(); + it != end; ++it) { if (!first) { OS << ", "; first = false; @@ -429,6 +430,11 @@ void StmtPrinter::VisitAsmStmt(AsmStmt *Node) { OS << ");\n"; } +void StmtPrinter::VisitMSAsmStmt(MSAsmStmt *Node) { + // FIXME: Implement MS style inline asm statement printer. + Indent() << "asm ()"; +} + void StmtPrinter::VisitObjCAtTryStmt(ObjCAtTryStmt *Node) { Indent() << "@try"; if (CompoundStmt *TS = dyn_cast(Node->getTryBody())) { @@ -638,6 +644,9 @@ void StmtPrinter::VisitPredefinedExpr(PredefinedExpr *Node) { case PredefinedExpr::Function: OS << "__FUNCTION__"; break; + case PredefinedExpr::LFunction: + OS << "L__FUNCTION__"; + break; case PredefinedExpr::PrettyFunction: OS << "__PRETTY_FUNCTION__"; break; @@ -734,93 +743,7 @@ void StmtPrinter::VisitImaginaryLiteral(ImaginaryLiteral *Node) { } void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { - switch (Str->getKind()) { - case StringLiteral::Ascii: break; // no prefix. - case StringLiteral::Wide: OS << 'L'; break; - case StringLiteral::UTF8: OS << "u8"; break; - case StringLiteral::UTF16: OS << 'u'; break; - case StringLiteral::UTF32: OS << 'U'; break; - } - OS << '"'; - static char Hex[] = "0123456789ABCDEF"; - - unsigned LastSlashX = Str->getLength(); - for (unsigned I = 0, N = Str->getLength(); I != N; ++I) { - switch (uint32_t Char = Str->getCodeUnit(I)) { - default: - // FIXME: Convert UTF-8 back to codepoints before rendering. - - // Convert UTF-16 surrogate pairs back to codepoints before rendering. - // Leave invalid surrogates alone; we'll use \x for those. - if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 && - Char >= 0xd800 && Char <= 0xdbff) { - uint32_t Trail = Str->getCodeUnit(I + 1); - if (Trail >= 0xdc00 && Trail <= 0xdfff) { - Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00); - ++I; - } - } - - if (Char > 0xff) { - // If this is a wide string, output characters over 0xff using \x - // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a - // codepoint: use \x escapes for invalid codepoints. - if (Str->getKind() == StringLiteral::Wide || - (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) { - // FIXME: Is this the best way to print wchar_t? - OS << "\\x"; - int Shift = 28; - while ((Char >> Shift) == 0) - Shift -= 4; - for (/**/; Shift >= 0; Shift -= 4) - OS << Hex[(Char >> Shift) & 15]; - LastSlashX = I; - break; - } - - if (Char > 0xffff) - OS << "\\U00" - << Hex[(Char >> 20) & 15] - << Hex[(Char >> 16) & 15]; - else - OS << "\\u"; - OS << Hex[(Char >> 12) & 15] - << Hex[(Char >> 8) & 15] - << Hex[(Char >> 4) & 15] - << Hex[(Char >> 0) & 15]; - break; - } - - // If we used \x... for the previous character, and this character is a - // hexadecimal digit, prevent it being slurped as part of the \x. - if (LastSlashX + 1 == I) { - switch (Char) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - OS << "\"\""; - } - } - - if (Char <= 0xff && isprint(Char)) - OS << (char)Char; - else // Output anything hard as an octal escape. - OS << '\\' - << (char)('0' + ((Char >> 6) & 7)) - << (char)('0' + ((Char >> 3) & 7)) - << (char)('0' + ((Char >> 0) & 7)); - break; - // Handle some common non-printable cases to make dumps prettier. - case '\\': OS << "\\\\"; break; - case '"': OS << "\\\""; break; - case '\n': OS << "\\n"; break; - case '\t': OS << "\\t"; break; - case '\a': OS << "\\a"; break; - case '\b': OS << "\\b"; break; - } - } - OS << '"'; + Str->outputString(OS); } void StmtPrinter::VisitParenExpr(ParenExpr *Node) { OS << "("; @@ -892,7 +815,12 @@ void StmtPrinter::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node){ OS << "sizeof"; break; case UETT_AlignOf: - OS << "__alignof"; + if (Policy.LangOpts.CPlusPlus) + OS << "alignof"; + else if (Policy.LangOpts.C11) + OS << "_Alignof"; + else + OS << "__alignof"; break; case UETT_VecStep: OS << "vec_step"; @@ -1275,7 +1203,7 @@ void StmtPrinter::VisitUserDefinedLiteral(UserDefinedLiteral *Node) { const TemplateArgument &Pack = Args->get(0); for (TemplateArgument::pack_iterator I = Pack.pack_begin(), E = Pack.pack_end(); I != E; ++I) { - char C = (char)I->getAsIntegral()->getZExtValue(); + char C = (char)I->getAsIntegral().getZExtValue(); OS << C; } break; @@ -1727,9 +1655,9 @@ void StmtPrinter::VisitObjCStringLiteral(ObjCStringLiteral *Node) { VisitStringLiteral(Node->getString()); } -void StmtPrinter::VisitObjCNumericLiteral(ObjCNumericLiteral *E) { +void StmtPrinter::VisitObjCBoxedExpr(ObjCBoxedExpr *E) { OS << "@"; - Visit(E->getNumber()); + Visit(E->getSubExpr()); } void StmtPrinter::VisitObjCArrayLiteral(ObjCArrayLiteral *E) { diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index e50523a..2168b64 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -178,6 +178,11 @@ void StmtProfiler::VisitAsmStmt(const AsmStmt *S) { VisitStringLiteral(S->getClobber(I)); } +void StmtProfiler::VisitMSAsmStmt(const MSAsmStmt *S) { + // FIXME: Implement MS style inline asm statement profiler. + VisitStmt(S); +} + void StmtProfiler::VisitCXXCatchStmt(const CXXCatchStmt *S) { VisitStmt(S); VisitType(S->getCaughtType()); @@ -981,7 +986,7 @@ void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) { VisitExpr(S); } -void StmtProfiler::VisitObjCNumericLiteral(const ObjCNumericLiteral *E) { +void StmtProfiler::VisitObjCBoxedExpr(const ObjCBoxedExpr *E) { VisitExpr(E); } @@ -1161,7 +1166,7 @@ void StmtProfiler::VisitTemplateArgument(const TemplateArgument &Arg) { break; case TemplateArgument::Integral: - Arg.getAsIntegral()->Profile(ID); + Arg.getAsIntegral().Profile(ID); VisitType(Arg.getIntegralType()); break; diff --git a/lib/AST/TemplateBase.cpp b/lib/AST/TemplateBase.cpp index 531e03e..f8dd396 100644 --- a/lib/AST/TemplateBase.cpp +++ b/lib/AST/TemplateBase.cpp @@ -36,17 +36,17 @@ using namespace clang; static void printIntegral(const TemplateArgument &TemplArg, raw_ostream &Out) { const ::clang::Type *T = TemplArg.getIntegralType().getTypePtr(); - const llvm::APSInt *Val = TemplArg.getAsIntegral(); + const llvm::APSInt &Val = TemplArg.getAsIntegral(); if (T->isBooleanType()) { - Out << (Val->getBoolValue() ? "true" : "false"); + Out << (Val.getBoolValue() ? "true" : "false"); } else if (T->isCharType()) { - const char Ch = Val->getZExtValue(); + const char Ch = Val.getZExtValue(); Out << ((Ch == '\'') ? "'\\" : "'"); Out.write_escaped(StringRef(&Ch, 1), /*UseHexEscapes=*/ true); Out << "'"; } else { - Out << Val->toString(10); + Out << Val; } } @@ -54,6 +54,25 @@ static void printIntegral(const TemplateArgument &TemplArg, // TemplateArgument Implementation //===----------------------------------------------------------------------===// +TemplateArgument::TemplateArgument(ASTContext &Ctx, const llvm::APSInt &Value, + QualType Type) + : Kind(Integral) { + // Copy the APSInt value into our decomposed form. + Integer.BitWidth = Value.getBitWidth(); + Integer.IsUnsigned = Value.isUnsigned(); + // If the value is large, we have to get additional memory from the ASTContext + unsigned NumWords = Value.getNumWords(); + if (NumWords > 1) { + void *Mem = Ctx.Allocate(NumWords * sizeof(uint64_t)); + std::memcpy(Mem, Value.getRawData(), NumWords * sizeof(uint64_t)); + Integer.pVal = static_cast(Mem); + } else { + Integer.VAL = Value.getZExtValue(); + } + + Integer.Type = Type.getAsOpaquePtr(); +} + TemplateArgument TemplateArgument::CreatePackCopy(ASTContext &Context, const TemplateArgument *Args, unsigned NumArgs) { @@ -246,7 +265,7 @@ void TemplateArgument::Profile(llvm::FoldingSetNodeID &ID, } case Integral: - getAsIntegral()->Profile(ID); + getAsIntegral().Profile(ID); getIntegralType().Profile(ID); break; @@ -275,7 +294,7 @@ bool TemplateArgument::structurallyEquals(const TemplateArgument &Other) const { case Integral: return getIntegralType() == Other.getIntegralType() && - *getAsIntegral() == *Other.getAsIntegral(); + getAsIntegral() == Other.getAsIntegral(); case Pack: if (Args.NumArgs != Other.Args.NumArgs) return false; @@ -498,7 +517,7 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB, return DB << "nullptr"; case TemplateArgument::Integral: - return DB << Arg.getAsIntegral()->toString(10); + return DB << Arg.getAsIntegral().toString(10); case TemplateArgument::Template: return DB << Arg.getAsTemplate(); diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp index 3f6a094..abefae4 100644 --- a/lib/AST/Type.cpp +++ b/lib/AST/Type.cpp @@ -288,6 +288,28 @@ QualType QualType::IgnoreParens(QualType T) { return T; } +/// \brief This will check for a TypedefType by removing any existing sugar +/// until it reaches a TypedefType or a non-sugared type. +template <> const TypedefType *Type::getAs() const { + const Type *Cur = this; + + while (true) { + if (const TypedefType *TDT = dyn_cast(Cur)) + return TDT; + switch (Cur->getTypeClass()) { +#define ABSTRACT_TYPE(Class, Parent) +#define TYPE(Class, Parent) \ + case Class: { \ + const Class##Type *Ty = cast(Cur); \ + if (!Ty->isSugared()) return 0; \ + Cur = Ty->desugar().getTypePtr(); \ + break; \ + } +#include "clang/AST/TypeNodes.def" + } + } +} + /// getUnqualifiedDesugaredType - Pull any qualifiers and syntactic /// sugar off the given type. This should produce an object of the /// same dynamic type as the canonical type. @@ -895,6 +917,14 @@ bool Type::isIncompleteType(NamedDecl **Def) const { } bool QualType::isPODType(ASTContext &Context) const { + // C++11 has a more relaxed definition of POD. + if (Context.getLangOpts().CPlusPlus0x) + return isCXX11PODType(Context); + + return isCXX98PODType(Context); +} + +bool QualType::isCXX98PODType(ASTContext &Context) const { // The compiler shouldn't query this for incomplete types, but the user might. // We return false for that case. Except for incomplete arrays of PODs, which // are PODs according to the standard. @@ -902,7 +932,7 @@ bool QualType::isPODType(ASTContext &Context) const { return 0; if ((*this)->isIncompleteArrayType()) - return Context.getBaseElementType(*this).isPODType(Context); + return Context.getBaseElementType(*this).isCXX98PODType(Context); if ((*this)->isIncompleteType()) return false; @@ -929,7 +959,7 @@ bool QualType::isPODType(ASTContext &Context) const { case Type::VariableArray: case Type::ConstantArray: // IncompleteArray is handled above. - return Context.getBaseElementType(*this).isPODType(Context); + return Context.getBaseElementType(*this).isCXX98PODType(Context); case Type::ObjCObjectPointer: case Type::BlockPointer: @@ -1417,7 +1447,7 @@ const char *Type::getTypeClassName() const { llvm_unreachable("Invalid type class."); } -const char *BuiltinType::getName(const PrintingPolicy &Policy) const { +StringRef BuiltinType::getName(const PrintingPolicy &Policy) const { switch (getKind()) { case Void: return "void"; case Bool: return Policy.Bool ? "bool" : "_Bool"; @@ -1554,6 +1584,11 @@ FunctionProtoType::FunctionProtoType(QualType result, const QualType *args, slot[1] = epi.ExceptionSpecTemplate; // This exception specification doesn't make the type dependent, because // it's not instantiated as part of instantiating the type. + } else if (getExceptionSpecType() == EST_Unevaluated) { + // Store the function decl from which we will resolve our + // exception specification. + FunctionDecl **slot = reinterpret_cast(argSlot + numArgs); + slot[0] = epi.ExceptionSpecDecl; } if (epi.ConsumedArguments) { @@ -1637,7 +1672,8 @@ void FunctionProtoType::Profile(llvm::FoldingSetNodeID &ID, QualType Result, ID.AddPointer(epi.Exceptions[i].getAsOpaquePtr()); } else if (epi.ExceptionSpecType == EST_ComputedNoexcept && epi.NoexceptExpr){ epi.NoexceptExpr->Profile(ID, Context, false); - } else if (epi.ExceptionSpecType == EST_Uninstantiated) { + } else if (epi.ExceptionSpecType == EST_Uninstantiated || + epi.ExceptionSpecType == EST_Unevaluated) { ID.AddPointer(epi.ExceptionSpecDecl->getCanonicalDecl()); } if (epi.ConsumedArguments) { @@ -1832,8 +1868,7 @@ TemplateSpecializationType(TemplateName T, Canon.isNull()? T.isDependent() : Canon->isInstantiationDependentType(), false, - Canon.isNull()? T.containsUnexpandedParameterPack() - : Canon->containsUnexpandedParameterPack()), + T.containsUnexpandedParameterPack()), Template(T), NumArgs(NumArgs), TypeAlias(!AliasedType.isNull()) { assert(!T.getAsDependentTemplateName() && "Use DependentTemplateSpecializationType for dependent template-name"); @@ -1858,6 +1893,8 @@ TemplateSpecializationType(TemplateName T, // arguments is. Given: // template using U = int; // U is always non-dependent, irrespective of the type T. + // However, U contains an unexpanded parameter pack, even though + // its expansion (and thus its desugared type) doesn't. if (Canon.isNull() && Args[Arg].isDependent()) setDependent(); else if (Args[Arg].isInstantiationDependent()) @@ -1866,7 +1903,7 @@ TemplateSpecializationType(TemplateName T, if (Args[Arg].getKind() == TemplateArgument::Type && Args[Arg].getAsType()->isVariablyModifiedType()) setVariablyModified(); - if (Canon.isNull() && Args[Arg].containsUnexpandedParameterPack()) + if (Args[Arg].containsUnexpandedParameterPack()) setContainsUnexpandedParameterPack(); new (&TemplateArgs[Arg]) TemplateArgument(Args[Arg]); diff --git a/lib/AST/TypeLoc.cpp b/lib/AST/TypeLoc.cpp index caa19b1..c7bb7da 100644 --- a/lib/AST/TypeLoc.cpp +++ b/lib/AST/TypeLoc.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/raw_ostream.h" #include "clang/AST/TypeLocVisitor.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "llvm/Support/ErrorHandling.h" using namespace clang; diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp index 3bf80e7..c42117c 100644 --- a/lib/AST/TypePrinter.cpp +++ b/lib/AST/TypePrinter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -19,8 +20,10 @@ #include "clang/AST/PrettyPrinter.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SaveAndRestore.h" using namespace clang; namespace { @@ -40,62 +43,124 @@ namespace { Policy.SuppressStrongLifetime = Old; } }; + + class ParamPolicyRAII { + PrintingPolicy &Policy; + bool Old; + + public: + explicit ParamPolicyRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressSpecifiers) { + Policy.SuppressSpecifiers = false; + } + + ~ParamPolicyRAII() { + Policy.SuppressSpecifiers = Old; + } + }; + + class ElaboratedTypePolicyRAII { + PrintingPolicy &Policy; + bool SuppressTagKeyword; + bool SuppressScope; + + public: + explicit ElaboratedTypePolicyRAII(PrintingPolicy &Policy) : Policy(Policy) { + SuppressTagKeyword = Policy.SuppressTagKeyword; + SuppressScope = Policy.SuppressScope; + Policy.SuppressTagKeyword = true; + Policy.SuppressScope = true; + } + + ~ElaboratedTypePolicyRAII() { + Policy.SuppressTagKeyword = SuppressTagKeyword; + Policy.SuppressScope = SuppressScope; + } + }; class TypePrinter { PrintingPolicy Policy; + bool HasEmptyPlaceHolder; public: - explicit TypePrinter(const PrintingPolicy &Policy) : Policy(Policy) { } - - void print(const Type *ty, Qualifiers qs, std::string &buffer); - void print(QualType T, std::string &S); - void AppendScope(DeclContext *DC, std::string &S); - void printTag(TagDecl *T, std::string &S); + explicit TypePrinter(const PrintingPolicy &Policy) + : Policy(Policy), HasEmptyPlaceHolder(false) { } + + void print(const Type *ty, Qualifiers qs, raw_ostream &OS, + StringRef PlaceHolder); + void print(QualType T, raw_ostream &OS, StringRef PlaceHolder); + + static bool canPrefixQualifiers(const Type *T, bool &NeedARCStrongQualifier); + void spaceBeforePlaceHolder(raw_ostream &OS); + void printTypeSpec(const NamedDecl *D, raw_ostream &OS); + + void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); + void printBefore(QualType T, raw_ostream &OS); + void printAfter(const Type *ty, Qualifiers qs, raw_ostream &OS); + void printAfter(QualType T, raw_ostream &OS); + void AppendScope(DeclContext *DC, raw_ostream &OS); + void printTag(TagDecl *T, raw_ostream &OS); #define ABSTRACT_TYPE(CLASS, PARENT) #define TYPE(CLASS, PARENT) \ - void print##CLASS(const CLASS##Type *T, std::string &S); + void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ + void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); #include "clang/AST/TypeNodes.def" }; } -static void AppendTypeQualList(std::string &S, unsigned TypeQuals) { +static void AppendTypeQualList(raw_ostream &OS, unsigned TypeQuals) { + bool appendSpace = false; if (TypeQuals & Qualifiers::Const) { - if (!S.empty()) S += ' '; - S += "const"; + OS << "const"; + appendSpace = true; } if (TypeQuals & Qualifiers::Volatile) { - if (!S.empty()) S += ' '; - S += "volatile"; + if (appendSpace) OS << ' '; + OS << "volatile"; + appendSpace = true; } if (TypeQuals & Qualifiers::Restrict) { - if (!S.empty()) S += ' '; - S += "restrict"; + if (appendSpace) OS << ' '; + OS << "restrict"; } } -void TypePrinter::print(QualType t, std::string &buffer) { +void TypePrinter::spaceBeforePlaceHolder(raw_ostream &OS) { + if (!HasEmptyPlaceHolder) + OS << ' '; +} + +void TypePrinter::print(QualType t, raw_ostream &OS, StringRef PlaceHolder) { SplitQualType split = t.split(); - print(split.Ty, split.Quals, buffer); + print(split.Ty, split.Quals, OS, PlaceHolder); } -void TypePrinter::print(const Type *T, Qualifiers Quals, std::string &buffer) { +void TypePrinter::print(const Type *T, Qualifiers Quals, raw_ostream &OS, + StringRef PlaceHolder) { if (!T) { - buffer += "NULL TYPE"; + OS << "NULL TYPE"; return; } if (Policy.SuppressSpecifiers && T->isSpecifierType()) return; - - // Print qualifiers as appropriate. - + + SaveAndRestore PHVal(HasEmptyPlaceHolder, PlaceHolder.empty()); + + printBefore(T, Quals, OS); + OS << PlaceHolder; + printAfter(T, Quals, OS); +} + +bool TypePrinter::canPrefixQualifiers(const Type *T, + bool &NeedARCStrongQualifier) { // CanPrefixQualifiers - We prefer to print type qualifiers before the type, // so that we get "const int" instead of "int const", but we can't do this if // the type is complex. For example if the type is "int*", we *must* print // "int * const", printing "const int *" is different. Only do this when the // type expands to a simple string. bool CanPrefixQualifiers = false; - bool NeedARCStrongQualifier = false; + NeedARCStrongQualifier = false; Type::TypeClass TC = T->getTypeClass(); if (const AutoType *AT = dyn_cast(T)) TC = AT->desugar()->getTypeClass(); @@ -157,493 +222,616 @@ void TypePrinter::print(const Type *T, Qualifiers Quals, std::string &buffer) { CanPrefixQualifiers = false; break; } - - if (!CanPrefixQualifiers && !Quals.empty()) { - std::string qualsBuffer; + + return CanPrefixQualifiers; +} + +void TypePrinter::printBefore(QualType T, raw_ostream &OS) { + SplitQualType Split = T.split(); + + // If we have cv1 T, where T is substituted for cv2 U, only print cv1 - cv2 + // at this level. + Qualifiers Quals = Split.Quals; + if (const SubstTemplateTypeParmType *Subst = + dyn_cast(Split.Ty)) + Quals -= QualType(Subst, 0).getQualifiers(); + + printBefore(Split.Ty, Quals, OS); +} + +/// \brief Prints the part of the type string before an identifier, e.g. for +/// "int foo[10]" it prints "int ". +void TypePrinter::printBefore(const Type *T,Qualifiers Quals, raw_ostream &OS) { + if (Policy.SuppressSpecifiers && T->isSpecifierType()) + return; + + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder); + + // Print qualifiers as appropriate. + + bool CanPrefixQualifiers = false; + bool NeedARCStrongQualifier = false; + CanPrefixQualifiers = canPrefixQualifiers(T, NeedARCStrongQualifier); + + if (CanPrefixQualifiers && !Quals.empty()) { if (NeedARCStrongQualifier) { IncludeStrongLifetimeRAII Strong(Policy); - Quals.getAsStringInternal(qualsBuffer, Policy); + Quals.print(OS, Policy, /*appendSpaceIfNonEmpty=*/true); } else { - Quals.getAsStringInternal(qualsBuffer, Policy); - } - - if (!qualsBuffer.empty()) { - if (!buffer.empty()) { - qualsBuffer += ' '; - qualsBuffer += buffer; - } - std::swap(buffer, qualsBuffer); + Quals.print(OS, Policy, /*appendSpaceIfNonEmpty=*/true); } } - + + bool hasAfterQuals = false; + if (!CanPrefixQualifiers && !Quals.empty()) { + hasAfterQuals = !Quals.isEmptyWhenPrinted(Policy); + if (hasAfterQuals) + HasEmptyPlaceHolder = false; + } + switch (T->getTypeClass()) { #define ABSTRACT_TYPE(CLASS, PARENT) #define TYPE(CLASS, PARENT) case Type::CLASS: \ - print##CLASS(cast(T), buffer); \ + print##CLASS##Before(cast(T), OS); \ break; #include "clang/AST/TypeNodes.def" } - - // If we're adding the qualifiers as a prefix, do it now. - if (CanPrefixQualifiers && !Quals.empty()) { - std::string qualsBuffer; + + if (hasAfterQuals) { if (NeedARCStrongQualifier) { IncludeStrongLifetimeRAII Strong(Policy); - Quals.getAsStringInternal(qualsBuffer, Policy); + Quals.print(OS, Policy, /*appendSpaceIfNonEmpty=*/!PrevPHIsEmpty.get()); } else { - Quals.getAsStringInternal(qualsBuffer, Policy); - } - - if (!qualsBuffer.empty()) { - if (!buffer.empty()) { - qualsBuffer += ' '; - qualsBuffer += buffer; - } - std::swap(buffer, qualsBuffer); + Quals.print(OS, Policy, /*appendSpaceIfNonEmpty=*/!PrevPHIsEmpty.get()); } } } -void TypePrinter::printBuiltin(const BuiltinType *T, std::string &S) { - if (S.empty()) { - S = T->getName(Policy); - } else { - // Prefix the basic type, e.g. 'int X'. - S = ' ' + S; - S = T->getName(Policy) + S; +void TypePrinter::printAfter(QualType t, raw_ostream &OS) { + SplitQualType split = t.split(); + printAfter(split.Ty, split.Quals, OS); +} + +/// \brief Prints the part of the type string after an identifier, e.g. for +/// "int foo[10]" it prints "[10]". +void TypePrinter::printAfter(const Type *T, Qualifiers Quals, raw_ostream &OS) { + switch (T->getTypeClass()) { +#define ABSTRACT_TYPE(CLASS, PARENT) +#define TYPE(CLASS, PARENT) case Type::CLASS: \ + print##CLASS##After(cast(T), OS); \ + break; +#include "clang/AST/TypeNodes.def" } } -void TypePrinter::printComplex(const ComplexType *T, std::string &S) { - print(T->getElementType(), S); - S = "_Complex " + S; +void TypePrinter::printBuiltinBefore(const BuiltinType *T, raw_ostream &OS) { + OS << T->getName(Policy); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printBuiltinAfter(const BuiltinType *T, raw_ostream &OS) { } -void TypePrinter::printPointer(const PointerType *T, std::string &S) { - S = '*' + S; - +void TypePrinter::printComplexBefore(const ComplexType *T, raw_ostream &OS) { + OS << "_Complex "; + printBefore(T->getElementType(), OS); +} +void TypePrinter::printComplexAfter(const ComplexType *T, raw_ostream &OS) { + printAfter(T->getElementType(), OS); +} + +void TypePrinter::printPointerBefore(const PointerType *T, raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getPointeeType(), OS); // Handle things like 'int (*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeType())) - S = '(' + S + ')'; - + OS << '('; + OS << '*'; +} +void TypePrinter::printPointerAfter(const PointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getPointeeType(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + // Handle things like 'int (*A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa(T->getPointeeType())) + OS << ')'; + printAfter(T->getPointeeType(), OS); } -void TypePrinter::printBlockPointer(const BlockPointerType *T, std::string &S) { - S = '^' + S; - print(T->getPointeeType(), S); +void TypePrinter::printBlockPointerBefore(const BlockPointerType *T, + raw_ostream &OS) { + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getPointeeType(), OS); + OS << '^'; +} +void TypePrinter::printBlockPointerAfter(const BlockPointerType *T, + raw_ostream &OS) { + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printAfter(T->getPointeeType(), OS); } -void TypePrinter::printLValueReference(const LValueReferenceType *T, - std::string &S) { - S = '&' + S; - +void TypePrinter::printLValueReferenceBefore(const LValueReferenceType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getPointeeTypeAsWritten(), OS); // Handle things like 'int (&A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeTypeAsWritten())) - S = '(' + S + ')'; - + OS << '('; + OS << '&'; +} +void TypePrinter::printLValueReferenceAfter(const LValueReferenceType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getPointeeTypeAsWritten(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + // Handle things like 'int (&A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa(T->getPointeeTypeAsWritten())) + OS << ')'; + printAfter(T->getPointeeTypeAsWritten(), OS); } -void TypePrinter::printRValueReference(const RValueReferenceType *T, - std::string &S) { - S = "&&" + S; - +void TypePrinter::printRValueReferenceBefore(const RValueReferenceType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getPointeeTypeAsWritten(), OS); // Handle things like 'int (&&A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeTypeAsWritten())) - S = '(' + S + ')'; - + OS << '('; + OS << "&&"; +} +void TypePrinter::printRValueReferenceAfter(const RValueReferenceType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getPointeeTypeAsWritten(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + // Handle things like 'int (&&A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa(T->getPointeeTypeAsWritten())) + OS << ')'; + printAfter(T->getPointeeTypeAsWritten(), OS); } -void TypePrinter::printMemberPointer(const MemberPointerType *T, - std::string &S) { - PrintingPolicy InnerPolicy(Policy); - Policy.SuppressTag = true; - std::string C = QualType(T->getClass(), 0).getAsString(InnerPolicy); - C += "::*"; - S = C + S; - +void TypePrinter::printMemberPointerBefore(const MemberPointerType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getPointeeType(), OS); // Handle things like 'int (Cls::*A)[4];' correctly. // FIXME: this should include vectors, but vectors use attributes I guess. if (isa(T->getPointeeType())) - S = '(' + S + ')'; - + OS << '('; + + PrintingPolicy InnerPolicy(Policy); + InnerPolicy.SuppressTag = false; + TypePrinter(InnerPolicy).print(QualType(T->getClass(), 0), OS, StringRef()); + + OS << "::*"; +} +void TypePrinter::printMemberPointerAfter(const MemberPointerType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getPointeeType(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + // Handle things like 'int (Cls::*A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa(T->getPointeeType())) + OS << ')'; + printAfter(T->getPointeeType(), OS); } -void TypePrinter::printConstantArray(const ConstantArrayType *T, - std::string &S) { - S += '['; - S += llvm::utostr(T->getSize().getZExtValue()); - S += ']'; - +void TypePrinter::printConstantArrayBefore(const ConstantArrayType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getElementType(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getElementType(), OS); +} +void TypePrinter::printConstantArrayAfter(const ConstantArrayType *T, + raw_ostream &OS) { + OS << '[' << T->getSize().getZExtValue() << ']'; + printAfter(T->getElementType(), OS); } -void TypePrinter::printIncompleteArray(const IncompleteArrayType *T, - std::string &S) { - S += "[]"; +void TypePrinter::printIncompleteArrayBefore(const IncompleteArrayType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getElementType(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getElementType(), OS); +} +void TypePrinter::printIncompleteArrayAfter(const IncompleteArrayType *T, + raw_ostream &OS) { + OS << "[]"; + printAfter(T->getElementType(), OS); } -void TypePrinter::printVariableArray(const VariableArrayType *T, - std::string &S) { - S += '['; - +void TypePrinter::printVariableArrayBefore(const VariableArrayType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getElementType(), OS); +} +void TypePrinter::printVariableArrayAfter(const VariableArrayType *T, + raw_ostream &OS) { + OS << '['; if (T->getIndexTypeQualifiers().hasQualifiers()) { - AppendTypeQualList(S, T->getIndexTypeCVRQualifiers()); - S += ' '; + AppendTypeQualList(OS, T->getIndexTypeCVRQualifiers()); + OS << ' '; } - + if (T->getSizeModifier() == VariableArrayType::Static) - S += "static"; + OS << "static"; else if (T->getSizeModifier() == VariableArrayType::Star) - S += '*'; - - if (T->getSizeExpr()) { - std::string SStr; - llvm::raw_string_ostream s(SStr); - T->getSizeExpr()->printPretty(s, 0, Policy); - S += s.str(); - } - S += ']'; - - IncludeStrongLifetimeRAII Strong(Policy); - print(T->getElementType(), S); + OS << '*'; + + if (T->getSizeExpr()) + T->getSizeExpr()->printPretty(OS, 0, Policy); + OS << ']'; + + printAfter(T->getElementType(), OS); } -void TypePrinter::printDependentSizedArray(const DependentSizedArrayType *T, - std::string &S) { - S += '['; - - if (T->getSizeExpr()) { - std::string SStr; - llvm::raw_string_ostream s(SStr); - T->getSizeExpr()->printPretty(s, 0, Policy); - S += s.str(); - } - S += ']'; - +void TypePrinter::printDependentSizedArrayBefore( + const DependentSizedArrayType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getElementType(), S); + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getElementType(), OS); +} +void TypePrinter::printDependentSizedArrayAfter( + const DependentSizedArrayType *T, + raw_ostream &OS) { + OS << '['; + if (T->getSizeExpr()) + T->getSizeExpr()->printPretty(OS, 0, Policy); + OS << ']'; + printAfter(T->getElementType(), OS); } -void TypePrinter::printDependentSizedExtVector( +void TypePrinter::printDependentSizedExtVectorBefore( const DependentSizedExtVectorType *T, - std::string &S) { - print(T->getElementType(), S); - - S += " __attribute__((ext_vector_type("; - if (T->getSizeExpr()) { - std::string SStr; - llvm::raw_string_ostream s(SStr); - T->getSizeExpr()->printPretty(s, 0, Policy); - S += s.str(); - } - S += ")))"; + raw_ostream &OS) { + printBefore(T->getElementType(), OS); +} +void TypePrinter::printDependentSizedExtVectorAfter( + const DependentSizedExtVectorType *T, + raw_ostream &OS) { + OS << " __attribute__((ext_vector_type("; + if (T->getSizeExpr()) + T->getSizeExpr()->printPretty(OS, 0, Policy); + OS << ")))"; + printAfter(T->getElementType(), OS); } -void TypePrinter::printVector(const VectorType *T, std::string &S) { +void TypePrinter::printVectorBefore(const VectorType *T, raw_ostream &OS) { switch (T->getVectorKind()) { case VectorType::AltiVecPixel: - S = "__vector __pixel " + S; + OS << "__vector __pixel "; break; case VectorType::AltiVecBool: - print(T->getElementType(), S); - S = "__vector __bool " + S; + OS << "__vector __bool "; + printBefore(T->getElementType(), OS); break; case VectorType::AltiVecVector: - print(T->getElementType(), S); - S = "__vector " + S; + OS << "__vector "; + printBefore(T->getElementType(), OS); break; case VectorType::NeonVector: - print(T->getElementType(), S); - S = ("__attribute__((neon_vector_type(" + - llvm::utostr_32(T->getNumElements()) + "))) " + S); + OS << "__attribute__((neon_vector_type(" + << T->getNumElements() << "))) "; + printBefore(T->getElementType(), OS); break; case VectorType::NeonPolyVector: - print(T->getElementType(), S); - S = ("__attribute__((neon_polyvector_type(" + - llvm::utostr_32(T->getNumElements()) + "))) " + S); + OS << "__attribute__((neon_polyvector_type(" << + T->getNumElements() << "))) "; + printBefore(T->getElementType(), OS); break; case VectorType::GenericVector: { // FIXME: We prefer to print the size directly here, but have no way // to get the size of the type. - print(T->getElementType(), S); - std::string V = "__attribute__((__vector_size__("; - V += llvm::utostr_32(T->getNumElements()); // convert back to bytes. - std::string ET; - print(T->getElementType(), ET); - V += " * sizeof(" + ET + ")))) "; - S = V + S; + OS << "__attribute__((__vector_size__(" + << T->getNumElements() + << " * sizeof("; + print(T->getElementType(), OS, StringRef()); + OS << ")))) "; + printBefore(T->getElementType(), OS); break; } } } +void TypePrinter::printVectorAfter(const VectorType *T, raw_ostream &OS) { + printAfter(T->getElementType(), OS); +} -void TypePrinter::printExtVector(const ExtVectorType *T, std::string &S) { - S += " __attribute__((ext_vector_type("; - S += llvm::utostr_32(T->getNumElements()); - S += ")))"; - print(T->getElementType(), S); +void TypePrinter::printExtVectorBefore(const ExtVectorType *T, + raw_ostream &OS) { + printBefore(T->getElementType(), OS); +} +void TypePrinter::printExtVectorAfter(const ExtVectorType *T, raw_ostream &OS) { + printAfter(T->getElementType(), OS); + OS << " __attribute__((ext_vector_type("; + OS << T->getNumElements(); + OS << ")))"; } void -FunctionProtoType::printExceptionSpecification(std::string &S, +FunctionProtoType::printExceptionSpecification(raw_ostream &OS, PrintingPolicy Policy) const { if (hasDynamicExceptionSpec()) { - S += " throw("; + OS << " throw("; if (getExceptionSpecType() == EST_MSAny) - S += "..."; + OS << "..."; else for (unsigned I = 0, N = getNumExceptions(); I != N; ++I) { if (I) - S += ", "; + OS << ", "; - S += getExceptionType(I).getAsString(Policy); + OS << getExceptionType(I).stream(Policy); } - S += ")"; + OS << ')'; } else if (isNoexceptExceptionSpec(getExceptionSpecType())) { - S += " noexcept"; + OS << " noexcept"; if (getExceptionSpecType() == EST_ComputedNoexcept) { - S += "("; - llvm::raw_string_ostream EOut(S); - getNoexceptExpr()->printPretty(EOut, 0, Policy); - EOut.flush(); - S += EOut.str(); - S += ")"; + OS << '('; + getNoexceptExpr()->printPretty(OS, 0, Policy); + OS << ')'; } } } -void TypePrinter::printFunctionProto(const FunctionProtoType *T, - std::string &S) { +void TypePrinter::printFunctionProtoBefore(const FunctionProtoType *T, + raw_ostream &OS) { + if (T->hasTrailingReturn()) { + OS << "auto "; + if (!HasEmptyPlaceHolder) + OS << '('; + } else { + // If needed for precedence reasons, wrap the inner part in grouping parens. + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); + printBefore(T->getResultType(), OS); + if (!PrevPHIsEmpty.get()) + OS << '('; + } +} + +void TypePrinter::printFunctionProtoAfter(const FunctionProtoType *T, + raw_ostream &OS) { // If needed for precedence reasons, wrap the inner part in grouping parens. - if (!S.empty()) - S = "(" + S + ")"; - - S += "("; - std::string Tmp; - PrintingPolicy ParamPolicy(Policy); - ParamPolicy.SuppressSpecifiers = false; - for (unsigned i = 0, e = T->getNumArgs(); i != e; ++i) { - if (i) S += ", "; - print(T->getArgType(i), Tmp); - S += Tmp; - Tmp.clear(); + if (!HasEmptyPlaceHolder) + OS << ')'; + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + + OS << '('; + { + ParamPolicyRAII ParamPolicy(Policy); + for (unsigned i = 0, e = T->getNumArgs(); i != e; ++i) { + if (i) OS << ", "; + print(T->getArgType(i), OS, StringRef()); + } } if (T->isVariadic()) { if (T->getNumArgs()) - S += ", "; - S += "..."; + OS << ", "; + OS << "..."; } else if (T->getNumArgs() == 0 && !Policy.LangOpts.CPlusPlus) { // Do not emit int() if we have a proto, emit 'int(void)'. - S += "void"; + OS << "void"; } - S += ")"; + OS << ')'; FunctionType::ExtInfo Info = T->getExtInfo(); switch(Info.getCC()) { case CC_Default: break; case CC_C: - S += " __attribute__((cdecl))"; + OS << " __attribute__((cdecl))"; break; case CC_X86StdCall: - S += " __attribute__((stdcall))"; + OS << " __attribute__((stdcall))"; break; case CC_X86FastCall: - S += " __attribute__((fastcall))"; + OS << " __attribute__((fastcall))"; break; case CC_X86ThisCall: - S += " __attribute__((thiscall))"; + OS << " __attribute__((thiscall))"; break; case CC_X86Pascal: - S += " __attribute__((pascal))"; + OS << " __attribute__((pascal))"; break; case CC_AAPCS: - S += " __attribute__((pcs(\"aapcs\")))"; + OS << " __attribute__((pcs(\"aapcs\")))"; break; case CC_AAPCS_VFP: - S += " __attribute__((pcs(\"aapcs-vfp\")))"; + OS << " __attribute__((pcs(\"aapcs-vfp\")))"; break; } if (Info.getNoReturn()) - S += " __attribute__((noreturn))"; + OS << " __attribute__((noreturn))"; if (Info.getRegParm()) - S += " __attribute__((regparm (" + - llvm::utostr_32(Info.getRegParm()) + ")))"; - - AppendTypeQualList(S, T->getTypeQuals()); + OS << " __attribute__((regparm (" + << Info.getRegParm() << ")))"; + + if (unsigned quals = T->getTypeQuals()) { + OS << ' '; + AppendTypeQualList(OS, quals); + } switch (T->getRefQualifier()) { case RQ_None: break; case RQ_LValue: - S += " &"; + OS << " &"; break; case RQ_RValue: - S += " &&"; + OS << " &&"; break; } - T->printExceptionSpecification(S, Policy); + T->printExceptionSpecification(OS, Policy); + if (T->hasTrailingReturn()) { - std::string ResultS; - print(T->getResultType(), ResultS); - S = "auto " + S + " -> " + ResultS; + OS << " -> "; + print(T->getResultType(), OS, StringRef()); } else - print(T->getResultType(), S); + printAfter(T->getResultType(), OS); } -void TypePrinter::printFunctionNoProto(const FunctionNoProtoType *T, - std::string &S) { +void TypePrinter::printFunctionNoProtoBefore(const FunctionNoProtoType *T, + raw_ostream &OS) { + // If needed for precedence reasons, wrap the inner part in grouping parens. + SaveAndRestore PrevPHIsEmpty(HasEmptyPlaceHolder, false); + printBefore(T->getResultType(), OS); + if (!PrevPHIsEmpty.get()) + OS << '('; +} +void TypePrinter::printFunctionNoProtoAfter(const FunctionNoProtoType *T, + raw_ostream &OS) { // If needed for precedence reasons, wrap the inner part in grouping parens. - if (!S.empty()) - S = "(" + S + ")"; + if (!HasEmptyPlaceHolder) + OS << ')'; + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); - S += "()"; + OS << "()"; if (T->getNoReturnAttr()) - S += " __attribute__((noreturn))"; - print(T->getResultType(), S); + OS << " __attribute__((noreturn))"; + printAfter(T->getResultType(), OS); } -static void printTypeSpec(const NamedDecl *D, std::string &S) { +void TypePrinter::printTypeSpec(const NamedDecl *D, raw_ostream &OS) { IdentifierInfo *II = D->getIdentifier(); - if (S.empty()) - S = II->getName().str(); - else - S = II->getName().str() + ' ' + S; + OS << II->getName(); + spaceBeforePlaceHolder(OS); } -void TypePrinter::printUnresolvedUsing(const UnresolvedUsingType *T, - std::string &S) { - printTypeSpec(T->getDecl(), S); +void TypePrinter::printUnresolvedUsingBefore(const UnresolvedUsingType *T, + raw_ostream &OS) { + printTypeSpec(T->getDecl(), OS); } +void TypePrinter::printUnresolvedUsingAfter(const UnresolvedUsingType *T, + raw_ostream &OS) { } -void TypePrinter::printTypedef(const TypedefType *T, std::string &S) { - printTypeSpec(T->getDecl(), S); +void TypePrinter::printTypedefBefore(const TypedefType *T, raw_ostream &OS) { + printTypeSpec(T->getDecl(), OS); } +void TypePrinter::printTypedefAfter(const TypedefType *T, raw_ostream &OS) { } -void TypePrinter::printTypeOfExpr(const TypeOfExprType *T, std::string &S) { - if (!S.empty()) // Prefix the basic type, e.g. 'typeof(e) X'. - S = ' ' + S; - std::string Str; - llvm::raw_string_ostream s(Str); - T->getUnderlyingExpr()->printPretty(s, 0, Policy); - S = "typeof " + s.str() + S; +void TypePrinter::printTypeOfExprBefore(const TypeOfExprType *T, + raw_ostream &OS) { + OS << "typeof "; + T->getUnderlyingExpr()->printPretty(OS, 0, Policy); + spaceBeforePlaceHolder(OS); } - -void TypePrinter::printTypeOf(const TypeOfType *T, std::string &S) { - if (!S.empty()) // Prefix the basic type, e.g. 'typeof(t) X'. - S = ' ' + S; - std::string Tmp; - print(T->getUnderlyingType(), Tmp); - S = "typeof(" + Tmp + ")" + S; +void TypePrinter::printTypeOfExprAfter(const TypeOfExprType *T, + raw_ostream &OS) { } + +void TypePrinter::printTypeOfBefore(const TypeOfType *T, raw_ostream &OS) { + OS << "typeof("; + print(T->getUnderlyingType(), OS, StringRef()); + OS << ')'; + spaceBeforePlaceHolder(OS); } +void TypePrinter::printTypeOfAfter(const TypeOfType *T, raw_ostream &OS) { } -void TypePrinter::printDecltype(const DecltypeType *T, std::string &S) { - if (!S.empty()) // Prefix the basic type, e.g. 'decltype(t) X'. - S = ' ' + S; - std::string Str; - llvm::raw_string_ostream s(Str); - T->getUnderlyingExpr()->printPretty(s, 0, Policy); - S = "decltype(" + s.str() + ")" + S; +void TypePrinter::printDecltypeBefore(const DecltypeType *T, raw_ostream &OS) { + OS << "decltype("; + T->getUnderlyingExpr()->printPretty(OS, 0, Policy); + OS << ')'; + spaceBeforePlaceHolder(OS); } +void TypePrinter::printDecltypeAfter(const DecltypeType *T, raw_ostream &OS) { } -void TypePrinter::printUnaryTransform(const UnaryTransformType *T, - std::string &S) { - if (!S.empty()) - S = ' ' + S; - std::string Str; +void TypePrinter::printUnaryTransformBefore(const UnaryTransformType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getBaseType(), Str); switch (T->getUTTKind()) { case UnaryTransformType::EnumUnderlyingType: - S = "__underlying_type(" + Str + ")" + S; - break; + OS << "__underlying_type("; + print(T->getBaseType(), OS, StringRef()); + OS << ')'; + spaceBeforePlaceHolder(OS); + return; } + + printBefore(T->getBaseType(), OS); +} +void TypePrinter::printUnaryTransformAfter(const UnaryTransformType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + + switch (T->getUTTKind()) { + case UnaryTransformType::EnumUnderlyingType: + return; + } + + printAfter(T->getBaseType(), OS); } -void TypePrinter::printAuto(const AutoType *T, std::string &S) { +void TypePrinter::printAutoBefore(const AutoType *T, raw_ostream &OS) { // If the type has been deduced, do not print 'auto'. if (T->isDeduced()) { - print(T->getDeducedType(), S); + printBefore(T->getDeducedType(), OS); } else { - if (!S.empty()) // Prefix the basic type, e.g. 'auto X'. - S = ' ' + S; - S = "auto" + S; + OS << "auto"; + spaceBeforePlaceHolder(OS); } } +void TypePrinter::printAutoAfter(const AutoType *T, raw_ostream &OS) { + // If the type has been deduced, do not print 'auto'. + if (T->isDeduced()) + printAfter(T->getDeducedType(), OS); +} -void TypePrinter::printAtomic(const AtomicType *T, std::string &S) { - if (!S.empty()) - S = ' ' + S; - std::string Str; +void TypePrinter::printAtomicBefore(const AtomicType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getValueType(), Str); - S = "_Atomic(" + Str + ")" + S; + OS << "_Atomic("; + print(T->getValueType(), OS, StringRef()); + OS << ')'; + spaceBeforePlaceHolder(OS); } +void TypePrinter::printAtomicAfter(const AtomicType *T, raw_ostream &OS) { } /// Appends the given scope to the end of a string. -void TypePrinter::AppendScope(DeclContext *DC, std::string &Buffer) { +void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) { if (DC->isTranslationUnit()) return; - AppendScope(DC->getParent(), Buffer); - - unsigned OldSize = Buffer.size(); + AppendScope(DC->getParent(), OS); if (NamespaceDecl *NS = dyn_cast(DC)) { if (Policy.SuppressUnwrittenScope && (NS->isAnonymousNamespace() || NS->isInline())) return; if (NS->getIdentifier()) - Buffer += NS->getNameAsString(); + OS << NS->getName() << "::"; else - Buffer += ""; + OS << "::"; } else if (ClassTemplateSpecializationDecl *Spec = dyn_cast(DC)) { IncludeStrongLifetimeRAII Strong(Policy); + OS << Spec->getIdentifier()->getName(); const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); - std::string TemplateArgsStr - = TemplateSpecializationType::PrintTemplateArgumentList( + TemplateSpecializationType::PrintTemplateArgumentList(OS, TemplateArgs.data(), TemplateArgs.size(), Policy); - Buffer += Spec->getIdentifier()->getName(); - Buffer += TemplateArgsStr; + OS << "::"; } else if (TagDecl *Tag = dyn_cast(DC)) { if (TypedefNameDecl *Typedef = Tag->getTypedefNameForAnonDecl()) - Buffer += Typedef->getIdentifier()->getName(); + OS << Typedef->getIdentifier()->getName() << "::"; else if (Tag->getIdentifier()) - Buffer += Tag->getIdentifier()->getName(); + OS << Tag->getIdentifier()->getName() << "::"; else return; } - - if (Buffer.size() != OldSize) - Buffer += "::"; } -void TypePrinter::printTag(TagDecl *D, std::string &InnerString) { +void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) { if (Policy.SuppressTag) return; - std::string Buffer; bool HasKindDecoration = false; // bool SuppressTagKeyword @@ -654,25 +842,24 @@ void TypePrinter::printTag(TagDecl *D, std::string &InnerString) { if (!(Policy.LangOpts.CPlusPlus || Policy.SuppressTagKeyword || D->getTypedefNameForAnonDecl())) { HasKindDecoration = true; - Buffer += D->getKindName(); - Buffer += ' '; + OS << D->getKindName(); + OS << ' '; } // Compute the full nested-name-specifier for this type. // In C, this will always be empty except when the type // being printed is anonymous within other Record. if (!Policy.SuppressScope) - AppendScope(D->getDeclContext(), Buffer); + AppendScope(D->getDeclContext(), OS); if (const IdentifierInfo *II = D->getIdentifier()) - Buffer += II->getNameStart(); + OS << II->getName(); else if (TypedefNameDecl *Typedef = D->getTypedefNameForAnonDecl()) { assert(Typedef->getIdentifier() && "Typedef without identifier?"); - Buffer += Typedef->getIdentifier()->getNameStart(); + OS << Typedef->getIdentifier()->getName(); } else { // Make an unambiguous representation for anonymous types, e.g. // - llvm::raw_string_ostream OS(Buffer); if (isa(D) && cast(D)->isLambda()) { OS << "getDecl(), S); +void TypePrinter::printRecordBefore(const RecordType *T, raw_ostream &OS) { + printTag(T->getDecl(), OS); } +void TypePrinter::printRecordAfter(const RecordType *T, raw_ostream &OS) { } -void TypePrinter::printEnum(const EnumType *T, std::string &S) { - printTag(T->getDecl(), S); +void TypePrinter::printEnumBefore(const EnumType *T, raw_ostream &OS) { + printTag(T->getDecl(), OS); } +void TypePrinter::printEnumAfter(const EnumType *T, raw_ostream &OS) { } -void TypePrinter::printTemplateTypeParm(const TemplateTypeParmType *T, - std::string &S) { - if (!S.empty()) // Prefix the basic type, e.g. 'parmname X'. - S = ' ' + S; - +void TypePrinter::printTemplateTypeParmBefore(const TemplateTypeParmType *T, + raw_ostream &OS) { if (IdentifierInfo *Id = T->getIdentifier()) - S = Id->getName().str() + S; + OS << Id->getName(); else - S = "type-parameter-" + llvm::utostr_32(T->getDepth()) + '-' + - llvm::utostr_32(T->getIndex()) + S; + OS << "type-parameter-" << T->getDepth() << '-' << T->getIndex(); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printTemplateTypeParmAfter(const TemplateTypeParmType *T, + raw_ostream &OS) { } -void TypePrinter::printSubstTemplateTypeParm(const SubstTemplateTypeParmType *T, - std::string &S) { +void TypePrinter::printSubstTemplateTypeParmBefore( + const SubstTemplateTypeParmType *T, + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - print(T->getReplacementType(), S); + printBefore(T->getReplacementType(), OS); +} +void TypePrinter::printSubstTemplateTypeParmAfter( + const SubstTemplateTypeParmType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + printAfter(T->getReplacementType(), OS); } -void TypePrinter::printSubstTemplateTypeParmPack( +void TypePrinter::printSubstTemplateTypeParmPackBefore( const SubstTemplateTypeParmPackType *T, - std::string &S) { + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - printTemplateTypeParm(T->getReplacedParameter(), S); + printTemplateTypeParmBefore(T->getReplacedParameter(), OS); +} +void TypePrinter::printSubstTemplateTypeParmPackAfter( + const SubstTemplateTypeParmPackType *T, + raw_ostream &OS) { + IncludeStrongLifetimeRAII Strong(Policy); + printTemplateTypeParmAfter(T->getReplacedParameter(), OS); } -void TypePrinter::printTemplateSpecialization( +void TypePrinter::printTemplateSpecializationBefore( const TemplateSpecializationType *T, - std::string &S) { + raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - std::string SpecString; + T->getTemplateName().print(OS, Policy); - { - llvm::raw_string_ostream OS(SpecString); - T->getTemplateName().print(OS, Policy); - } - - SpecString += TemplateSpecializationType::PrintTemplateArgumentList( - T->getArgs(), - T->getNumArgs(), - Policy); - if (S.empty()) - S.swap(SpecString); - else - S = SpecString + ' ' + S; + TemplateSpecializationType::PrintTemplateArgumentList(OS, + T->getArgs(), + T->getNumArgs(), + Policy); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printTemplateSpecializationAfter( + const TemplateSpecializationType *T, + raw_ostream &OS) { } -void TypePrinter::printInjectedClassName(const InjectedClassNameType *T, - std::string &S) { - printTemplateSpecialization(T->getInjectedTST(), S); +void TypePrinter::printInjectedClassNameBefore(const InjectedClassNameType *T, + raw_ostream &OS) { + printTemplateSpecializationBefore(T->getInjectedTST(), OS); } - -void TypePrinter::printElaborated(const ElaboratedType *T, std::string &S) { - std::string MyString; - - { - llvm::raw_string_ostream OS(MyString); - OS << TypeWithKeyword::getKeywordName(T->getKeyword()); - if (T->getKeyword() != ETK_None) - OS << " "; - NestedNameSpecifier* Qualifier = T->getQualifier(); - if (Qualifier) - Qualifier->print(OS, Policy); - } - - std::string TypeStr; - PrintingPolicy InnerPolicy(Policy); - InnerPolicy.SuppressTagKeyword = true; - InnerPolicy.SuppressScope = true; - TypePrinter(InnerPolicy).print(T->getNamedType(), TypeStr); +void TypePrinter::printInjectedClassNameAfter(const InjectedClassNameType *T, + raw_ostream &OS) { } + +void TypePrinter::printElaboratedBefore(const ElaboratedType *T, + raw_ostream &OS) { + OS << TypeWithKeyword::getKeywordName(T->getKeyword()); + if (T->getKeyword() != ETK_None) + OS << " "; + NestedNameSpecifier* Qualifier = T->getQualifier(); + if (Qualifier) + Qualifier->print(OS, Policy); - MyString += TypeStr; - if (S.empty()) - S.swap(MyString); - else - S = MyString + ' ' + S; + ElaboratedTypePolicyRAII PolicyRAII(Policy); + printBefore(T->getNamedType(), OS); +} +void TypePrinter::printElaboratedAfter(const ElaboratedType *T, + raw_ostream &OS) { + ElaboratedTypePolicyRAII PolicyRAII(Policy); + printAfter(T->getNamedType(), OS); } -void TypePrinter::printParen(const ParenType *T, std::string &S) { - if (!S.empty() && !isa(T->getInnerType())) - S = '(' + S + ')'; - print(T->getInnerType(), S); +void TypePrinter::printParenBefore(const ParenType *T, raw_ostream &OS) { + if (!HasEmptyPlaceHolder && !isa(T->getInnerType())) { + printBefore(T->getInnerType(), OS); + OS << '('; + } else + printBefore(T->getInnerType(), OS); +} +void TypePrinter::printParenAfter(const ParenType *T, raw_ostream &OS) { + if (!HasEmptyPlaceHolder && !isa(T->getInnerType())) { + OS << ')'; + printAfter(T->getInnerType(), OS); + } else + printAfter(T->getInnerType(), OS); } -void TypePrinter::printDependentName(const DependentNameType *T, std::string &S) { - std::string MyString; +void TypePrinter::printDependentNameBefore(const DependentNameType *T, + raw_ostream &OS) { + OS << TypeWithKeyword::getKeywordName(T->getKeyword()); + if (T->getKeyword() != ETK_None) + OS << " "; - { - llvm::raw_string_ostream OS(MyString); - OS << TypeWithKeyword::getKeywordName(T->getKeyword()); - if (T->getKeyword() != ETK_None) - OS << " "; - - T->getQualifier()->print(OS, Policy); - - OS << T->getIdentifier()->getName(); - } + T->getQualifier()->print(OS, Policy); - if (S.empty()) - S.swap(MyString); - else - S = MyString + ' ' + S; + OS << T->getIdentifier()->getName(); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printDependentNameAfter(const DependentNameType *T, + raw_ostream &OS) { } -void TypePrinter::printDependentTemplateSpecialization( - const DependentTemplateSpecializationType *T, std::string &S) { +void TypePrinter::printDependentTemplateSpecializationBefore( + const DependentTemplateSpecializationType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); - std::string MyString; - { - llvm::raw_string_ostream OS(MyString); - OS << TypeWithKeyword::getKeywordName(T->getKeyword()); - if (T->getKeyword() != ETK_None) - OS << " "; - - if (T->getQualifier()) - T->getQualifier()->print(OS, Policy); - OS << T->getIdentifier()->getName(); - OS << TemplateSpecializationType::PrintTemplateArgumentList( - T->getArgs(), - T->getNumArgs(), - Policy); - } + OS << TypeWithKeyword::getKeywordName(T->getKeyword()); + if (T->getKeyword() != ETK_None) + OS << " "; - if (S.empty()) - S.swap(MyString); - else - S = MyString + ' ' + S; + if (T->getQualifier()) + T->getQualifier()->print(OS, Policy); + OS << T->getIdentifier()->getName(); + TemplateSpecializationType::PrintTemplateArgumentList(OS, + T->getArgs(), + T->getNumArgs(), + Policy); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printDependentTemplateSpecializationAfter( + const DependentTemplateSpecializationType *T, raw_ostream &OS) { } -void TypePrinter::printPackExpansion(const PackExpansionType *T, - std::string &S) { - print(T->getPattern(), S); - S += "..."; +void TypePrinter::printPackExpansionBefore(const PackExpansionType *T, + raw_ostream &OS) { + printBefore(T->getPattern(), OS); +} +void TypePrinter::printPackExpansionAfter(const PackExpansionType *T, + raw_ostream &OS) { + printAfter(T->getPattern(), OS); + OS << "..."; } -void TypePrinter::printAttributed(const AttributedType *T, - std::string &S) { +void TypePrinter::printAttributedBefore(const AttributedType *T, + raw_ostream &OS) { // Prefer the macro forms of the GC and ownership qualifiers. if (T->getAttrKind() == AttributedType::attr_objc_gc || T->getAttrKind() == AttributedType::attr_objc_ownership) - return print(T->getEquivalentType(), S); + return printBefore(T->getEquivalentType(), OS); + + printBefore(T->getModifiedType(), OS); +} - print(T->getModifiedType(), S); +void TypePrinter::printAttributedAfter(const AttributedType *T, + raw_ostream &OS) { + // Prefer the macro forms of the GC and ownership qualifiers. + if (T->getAttrKind() == AttributedType::attr_objc_gc || + T->getAttrKind() == AttributedType::attr_objc_ownership) + return printAfter(T->getEquivalentType(), OS); // TODO: not all attributes are GCC-style attributes. - S += " __attribute__(("; + OS << " __attribute__(("; switch (T->getAttrKind()) { case AttributedType::attr_address_space: - S += "address_space("; - S += T->getEquivalentType().getAddressSpace(); - S += ")"; + OS << "address_space("; + OS << T->getEquivalentType().getAddressSpace(); + OS << ')'; break; case AttributedType::attr_vector_size: { - S += "__vector_size__("; + OS << "__vector_size__("; if (const VectorType *vector =T->getEquivalentType()->getAs()) { - S += vector->getNumElements(); - S += " * sizeof("; - - std::string tmp; - print(vector->getElementType(), tmp); - S += tmp; - S += ")"; + OS << vector->getNumElements(); + OS << " * sizeof("; + print(vector->getElementType(), OS, StringRef()); + OS << ')'; } - S += ")"; + OS << ')'; break; } case AttributedType::attr_neon_vector_type: case AttributedType::attr_neon_polyvector_type: { if (T->getAttrKind() == AttributedType::attr_neon_vector_type) - S += "neon_vector_type("; + OS << "neon_vector_type("; else - S += "neon_polyvector_type("; + OS << "neon_polyvector_type("; const VectorType *vector = T->getEquivalentType()->getAs(); - S += llvm::utostr_32(vector->getNumElements()); - S += ")"; + OS << vector->getNumElements(); + OS << ')'; break; } case AttributedType::attr_regparm: { - S += "regparm("; + OS << "regparm("; QualType t = T->getEquivalentType(); while (!t->isFunctionType()) t = t->getPointeeType(); - S += t->getAs()->getRegParmType(); - S += ")"; + OS << t->getAs()->getRegParmType(); + OS << ')'; break; } case AttributedType::attr_objc_gc: { - S += "objc_gc("; + OS << "objc_gc("; QualType tmp = T->getEquivalentType(); while (tmp.getObjCGCAttr() == Qualifiers::GCNone) { @@ -939,116 +1130,244 @@ void TypePrinter::printAttributed(const AttributedType *T, } if (tmp.isObjCGCWeak()) - S += "weak"; + OS << "weak"; else - S += "strong"; - S += ")"; + OS << "strong"; + OS << ')'; break; } case AttributedType::attr_objc_ownership: - S += "objc_ownership("; + OS << "objc_ownership("; switch (T->getEquivalentType().getObjCLifetime()) { case Qualifiers::OCL_None: llvm_unreachable("no ownership!"); - case Qualifiers::OCL_ExplicitNone: S += "none"; break; - case Qualifiers::OCL_Strong: S += "strong"; break; - case Qualifiers::OCL_Weak: S += "weak"; break; - case Qualifiers::OCL_Autoreleasing: S += "autoreleasing"; break; + case Qualifiers::OCL_ExplicitNone: OS << "none"; break; + case Qualifiers::OCL_Strong: OS << "strong"; break; + case Qualifiers::OCL_Weak: OS << "weak"; break; + case Qualifiers::OCL_Autoreleasing: OS << "autoreleasing"; break; } - S += ")"; + OS << ')'; break; - case AttributedType::attr_noreturn: S += "noreturn"; break; - case AttributedType::attr_cdecl: S += "cdecl"; break; - case AttributedType::attr_fastcall: S += "fastcall"; break; - case AttributedType::attr_stdcall: S += "stdcall"; break; - case AttributedType::attr_thiscall: S += "thiscall"; break; - case AttributedType::attr_pascal: S += "pascal"; break; + case AttributedType::attr_noreturn: OS << "noreturn"; break; + case AttributedType::attr_cdecl: OS << "cdecl"; break; + case AttributedType::attr_fastcall: OS << "fastcall"; break; + case AttributedType::attr_stdcall: OS << "stdcall"; break; + case AttributedType::attr_thiscall: OS << "thiscall"; break; + case AttributedType::attr_pascal: OS << "pascal"; break; case AttributedType::attr_pcs: { - S += "pcs("; + OS << "pcs("; QualType t = T->getEquivalentType(); while (!t->isFunctionType()) t = t->getPointeeType(); - S += (t->getAs()->getCallConv() == CC_AAPCS ? + OS << (t->getAs()->getCallConv() == CC_AAPCS ? "\"aapcs\"" : "\"aapcs-vfp\""); - S += ")"; + OS << ')'; break; } } - S += "))"; + OS << "))"; } -void TypePrinter::printObjCInterface(const ObjCInterfaceType *T, - std::string &S) { - if (!S.empty()) // Prefix the basic type, e.g. 'typedefname X'. - S = ' ' + S; - - std::string ObjCQIString = T->getDecl()->getNameAsString(); - S = ObjCQIString + S; +void TypePrinter::printObjCInterfaceBefore(const ObjCInterfaceType *T, + raw_ostream &OS) { + OS << T->getDecl()->getName(); + spaceBeforePlaceHolder(OS); } +void TypePrinter::printObjCInterfaceAfter(const ObjCInterfaceType *T, + raw_ostream &OS) { } -void TypePrinter::printObjCObject(const ObjCObjectType *T, - std::string &S) { +void TypePrinter::printObjCObjectBefore(const ObjCObjectType *T, + raw_ostream &OS) { if (T->qual_empty()) - return print(T->getBaseType(), S); + return printBefore(T->getBaseType(), OS); - std::string tmp; - print(T->getBaseType(), tmp); - tmp += '<'; + print(T->getBaseType(), OS, StringRef()); + OS << '<'; bool isFirst = true; for (ObjCObjectType::qual_iterator I = T->qual_begin(), E = T->qual_end(); I != E; ++I) { if (isFirst) isFirst = false; else - tmp += ','; - tmp += (*I)->getNameAsString(); + OS << ','; + OS << (*I)->getName(); } - tmp += '>'; - - if (!S.empty()) { - tmp += ' '; - tmp += S; - } - std::swap(tmp, S); + OS << '>'; + spaceBeforePlaceHolder(OS); +} +void TypePrinter::printObjCObjectAfter(const ObjCObjectType *T, + raw_ostream &OS) { + if (T->qual_empty()) + return printAfter(T->getBaseType(), OS); } -void TypePrinter::printObjCObjectPointer(const ObjCObjectPointerType *T, - std::string &S) { - std::string ObjCQIString; - - T->getPointeeType().getLocalQualifiers().getAsStringInternal(ObjCQIString, - Policy); - if (!ObjCQIString.empty()) - ObjCQIString += ' '; - +void TypePrinter::printObjCObjectPointerBefore(const ObjCObjectPointerType *T, + raw_ostream &OS) { + T->getPointeeType().getLocalQualifiers().print(OS, Policy, + /*appendSpaceIfNonEmpty=*/true); + if (T->isObjCIdType() || T->isObjCQualifiedIdType()) - ObjCQIString += "id"; + OS << "id"; else if (T->isObjCClassType() || T->isObjCQualifiedClassType()) - ObjCQIString += "Class"; + OS << "Class"; else if (T->isObjCSelType()) - ObjCQIString += "SEL"; + OS << "SEL"; else - ObjCQIString += T->getInterfaceDecl()->getNameAsString(); + OS << T->getInterfaceDecl()->getName(); if (!T->qual_empty()) { - ObjCQIString += '<'; + OS << '<'; for (ObjCObjectPointerType::qual_iterator I = T->qual_begin(), E = T->qual_end(); I != E; ++I) { - ObjCQIString += (*I)->getNameAsString(); + OS << (*I)->getName(); if (I+1 != E) - ObjCQIString += ','; + OS << ','; + } + OS << '>'; + } + + if (!T->isObjCIdType() && !T->isObjCQualifiedIdType()) { + OS << " *"; // Don't forget the implicit pointer. + } else { + spaceBeforePlaceHolder(OS); + } +} +void TypePrinter::printObjCObjectPointerAfter(const ObjCObjectPointerType *T, + raw_ostream &OS) { } + +void TemplateSpecializationType:: + PrintTemplateArgumentList(raw_ostream &OS, + const TemplateArgumentListInfo &Args, + const PrintingPolicy &Policy) { + return PrintTemplateArgumentList(OS, + Args.getArgumentArray(), + Args.size(), + Policy); +} + +void +TemplateSpecializationType::PrintTemplateArgumentList( + raw_ostream &OS, + const TemplateArgument *Args, + unsigned NumArgs, + const PrintingPolicy &Policy, + bool SkipBrackets) { + if (!SkipBrackets) + OS << '<'; + + bool needSpace = false; + for (unsigned Arg = 0; Arg < NumArgs; ++Arg) { + if (Arg > 0) + OS << ", "; + + // Print the argument into a string. + SmallString<128> Buf; + llvm::raw_svector_ostream ArgOS(Buf); + if (Args[Arg].getKind() == TemplateArgument::Pack) { + PrintTemplateArgumentList(ArgOS, + Args[Arg].pack_begin(), + Args[Arg].pack_size(), + Policy, true); + } else { + Args[Arg].print(Policy, ArgOS); + } + StringRef ArgString = ArgOS.str(); + + // If this is the first argument and its string representation + // begins with the global scope specifier ('::foo'), add a space + // to avoid printing the diagraph '<:'. + if (!Arg && !ArgString.empty() && ArgString[0] == ':') + OS << ' '; + + OS << ArgString; + + needSpace = (!ArgString.empty() && ArgString.back() == '>'); + } + + // If the last character of our string is '>', add another space to + // keep the two '>''s separate tokens. We don't *have* to do this in + // C++0x, but it's still good hygiene. + if (needSpace) + OS << ' '; + + if (!SkipBrackets) + OS << '>'; +} + +// Sadly, repeat all that with TemplateArgLoc. +void TemplateSpecializationType:: +PrintTemplateArgumentList(raw_ostream &OS, + const TemplateArgumentLoc *Args, unsigned NumArgs, + const PrintingPolicy &Policy) { + OS << '<'; + + bool needSpace = false; + for (unsigned Arg = 0; Arg < NumArgs; ++Arg) { + if (Arg > 0) + OS << ", "; + + // Print the argument into a string. + SmallString<128> Buf; + llvm::raw_svector_ostream ArgOS(Buf); + if (Args[Arg].getArgument().getKind() == TemplateArgument::Pack) { + PrintTemplateArgumentList(ArgOS, + Args[Arg].getArgument().pack_begin(), + Args[Arg].getArgument().pack_size(), + Policy, true); + } else { + Args[Arg].getArgument().print(Policy, ArgOS); } - ObjCQIString += '>'; + StringRef ArgString = ArgOS.str(); + + // If this is the first argument and its string representation + // begins with the global scope specifier ('::foo'), add a space + // to avoid printing the diagraph '<:'. + if (!Arg && !ArgString.empty() && ArgString[0] == ':') + OS << ' '; + + OS << ArgString; + + needSpace = (!ArgString.empty() && ArgString.back() == '>'); } - if (!T->isObjCIdType() && !T->isObjCQualifiedIdType()) - ObjCQIString += " *"; // Don't forget the implicit pointer. - else if (!S.empty()) // Prefix the basic type, e.g. 'typedefname X'. - S = ' ' + S; + // If the last character of our string is '>', add another space to + // keep the two '>''s separate tokens. We don't *have* to do this in + // C++0x, but it's still good hygiene. + if (needSpace) + OS << ' '; + + OS << '>'; +} + +void +FunctionProtoType::printExceptionSpecification(std::string &S, + PrintingPolicy Policy) const { - S = ObjCQIString + S; + if (hasDynamicExceptionSpec()) { + S += " throw("; + if (getExceptionSpecType() == EST_MSAny) + S += "..."; + else + for (unsigned I = 0, N = getNumExceptions(); I != N; ++I) { + if (I) + S += ", "; + + S += getExceptionType(I).getAsString(Policy); + } + S += ")"; + } else if (isNoexceptExceptionSpec(getExceptionSpecType())) { + S += " noexcept"; + if (getExceptionSpecType() == EST_ComputedNoexcept) { + S += "("; + llvm::raw_string_ostream EOut(S); + getNoexceptExpr()->printPretty(EOut, 0, Policy); + EOut.flush(); + S += EOut.str(); + S += ")"; + } + } } std::string TemplateSpecializationType:: @@ -1148,15 +1467,14 @@ PrintTemplateArgumentList(const TemplateArgumentLoc *Args, unsigned NumArgs, } void QualType::dump(const char *msg) const { - std::string R = "identifier"; - LangOptions LO; - getAsStringInternal(R, PrintingPolicy(LO)); if (msg) llvm::errs() << msg << ": "; - llvm::errs() << R << "\n"; + LangOptions LO; + print(llvm::errs(), PrintingPolicy(LO), "identifier"); + llvm::errs() << '\n'; } void QualType::dump() const { - dump(""); + dump(0); } void Type::dump() const { @@ -1171,51 +1489,99 @@ std::string Qualifiers::getAsString() const { // Appends qualifiers to the given string, separated by spaces. Will // prefix a space if the string is non-empty. Will not append a final // space. -void Qualifiers::getAsStringInternal(std::string &S, - const PrintingPolicy& Policy) const { - AppendTypeQualList(S, getCVRQualifiers()); +std::string Qualifiers::getAsString(const PrintingPolicy &Policy) const { + SmallString<64> Buf; + llvm::raw_svector_ostream StrOS(Buf); + print(StrOS, Policy); + return StrOS.str(); +} + +bool Qualifiers::isEmptyWhenPrinted(const PrintingPolicy &Policy) const { + if (getCVRQualifiers()) + return false; + + if (getAddressSpace()) + return false; + + if (getObjCGCAttr()) + return false; + + if (Qualifiers::ObjCLifetime lifetime = getObjCLifetime()) + if (!(lifetime == Qualifiers::OCL_Strong && Policy.SuppressStrongLifetime)) + return false; + + return true; +} + +// Appends qualifiers to the given string, separated by spaces. Will +// prefix a space if the string is non-empty. Will not append a final +// space. +void Qualifiers::print(raw_ostream &OS, const PrintingPolicy& Policy, + bool appendSpaceIfNonEmpty) const { + bool addSpace = false; + + unsigned quals = getCVRQualifiers(); + if (quals) { + AppendTypeQualList(OS, quals); + addSpace = true; + } if (unsigned addrspace = getAddressSpace()) { - if (!S.empty()) S += ' '; + if (addSpace) + OS << ' '; + addSpace = true; switch (addrspace) { case LangAS::opencl_global: - S += "__global"; + OS << "__global"; break; case LangAS::opencl_local: - S += "__local"; + OS << "__local"; break; case LangAS::opencl_constant: - S += "__constant"; + OS << "__constant"; break; default: - S += "__attribute__((address_space("; - S += llvm::utostr_32(addrspace); - S += ")))"; + OS << "__attribute__((address_space("; + OS << addrspace; + OS << ")))"; } } if (Qualifiers::GC gc = getObjCGCAttr()) { - if (!S.empty()) S += ' '; + if (addSpace) + OS << ' '; + addSpace = true; if (gc == Qualifiers::Weak) - S += "__weak"; + OS << "__weak"; else - S += "__strong"; + OS << "__strong"; } if (Qualifiers::ObjCLifetime lifetime = getObjCLifetime()) { - if (!S.empty() && - !(lifetime == Qualifiers::OCL_Strong && Policy.SuppressStrongLifetime)) - S += ' '; - + if (!(lifetime == Qualifiers::OCL_Strong && Policy.SuppressStrongLifetime)){ + if (addSpace) + OS << ' '; + addSpace = true; + } + switch (lifetime) { case Qualifiers::OCL_None: llvm_unreachable("none but true"); - case Qualifiers::OCL_ExplicitNone: S += "__unsafe_unretained"; break; + case Qualifiers::OCL_ExplicitNone: OS << "__unsafe_unretained"; break; case Qualifiers::OCL_Strong: if (!Policy.SuppressStrongLifetime) - S += "__strong"; + OS << "__strong"; break; - case Qualifiers::OCL_Weak: S += "__weak"; break; - case Qualifiers::OCL_Autoreleasing: S += "__autoreleasing"; break; + case Qualifiers::OCL_Weak: OS << "__weak"; break; + case Qualifiers::OCL_Autoreleasing: OS << "__autoreleasing"; break; } } + + if (appendSpaceIfNonEmpty && addSpace) + OS << ' '; +} + +std::string QualType::getAsString(const PrintingPolicy &Policy) const { + std::string S; + getAsStringInternal(S, Policy); + return S; } std::string QualType::getAsString(const Type *ty, Qualifiers qs) { @@ -1225,8 +1591,25 @@ std::string QualType::getAsString(const Type *ty, Qualifiers qs) { return buffer; } +void QualType::print(const Type *ty, Qualifiers qs, + raw_ostream &OS, const PrintingPolicy &policy, + const Twine &PlaceHolder) { + SmallString<128> PHBuf; + StringRef PH; + if (PlaceHolder.isSingleStringRef()) + PH = PlaceHolder.getSingleStringRef(); + else + PH = PlaceHolder.toStringRef(PHBuf); + + TypePrinter(policy).print(ty, qs, OS, PH); +} + void QualType::getAsStringInternal(const Type *ty, Qualifiers qs, std::string &buffer, const PrintingPolicy &policy) { - TypePrinter(policy).print(ty, qs, buffer); + SmallString<256> Buf; + llvm::raw_svector_ostream StrOS(Buf); + TypePrinter(policy).print(ty, qs, StrOS, buffer); + std::string str = StrOS.str(); + buffer.swap(str); } diff --git a/lib/AST/VTTBuilder.cpp b/lib/AST/VTTBuilder.cpp index f5ff624..5ca4e86 100644 --- a/lib/AST/VTTBuilder.cpp +++ b/lib/AST/VTTBuilder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/VTTBuilder.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/TargetInfo.h" diff --git a/lib/AST/VTableBuilder.cpp b/lib/AST/VTableBuilder.cpp index 107d9fb..104530f 100644 --- a/lib/AST/VTableBuilder.cpp +++ b/lib/AST/VTableBuilder.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/VTableBuilder.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/TargetInfo.h" @@ -164,7 +165,7 @@ FinalOverriders::FinalOverriders(const CXXRecordDecl *MostDerivedClass, SubobjectOffsets, SubobjectLayoutClassOffsets, SubobjectCounts); - // Get the the final overriders. + // Get the final overriders. CXXFinalOverriderMap FinalOverriders; MostDerivedClass->getFinalOverriders(FinalOverriders); @@ -630,7 +631,7 @@ VCallAndVBaseOffsetBuilder::AddVCallAndVBaseOffsets(BaseSubobject Base, // Get the base offset of the primary base. if (PrimaryBaseIsVirtual) { - assert(Layout.getVBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getVBaseClassOffset(PrimaryBase).isZero() && "Primary vbase should have a zero offset!"); const ASTRecordLayout &MostDerivedClassLayout = @@ -639,7 +640,7 @@ VCallAndVBaseOffsetBuilder::AddVCallAndVBaseOffsets(BaseSubobject Base, PrimaryBaseOffset = MostDerivedClassLayout.getVBaseClassOffset(PrimaryBase); } else { - assert(Layout.getBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getBaseClassOffset(PrimaryBase).isZero() && "Primary base should have a zero offset!"); PrimaryBaseOffset = Base.getBaseOffset(); @@ -682,7 +683,7 @@ void VCallAndVBaseOffsetBuilder::AddVCallOffsets(BaseSubobject Base, // primary base will have its vcall and vbase offsets emitted already. if (PrimaryBase && !Layout.isPrimaryBaseVirtual()) { // Get the base offset of the primary base. - assert(Layout.getBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getBaseClassOffset(PrimaryBase).isZero() && "Primary base should have a zero offset!"); AddVCallOffsets(BaseSubobject(PrimaryBase, Base.getBaseOffset()), @@ -1370,7 +1371,7 @@ VTableBuilder::IsOverriderUsed(const CXXMethodDecl *Overrider, break; if (Layout.isPrimaryBaseVirtual()) { - assert(Layout.getVBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getVBaseClassOffset(PrimaryBase).isZero() && "Primary base should always be at offset 0!"); const ASTRecordLayout &LayoutClassLayout = @@ -1384,7 +1385,7 @@ VTableBuilder::IsOverriderUsed(const CXXMethodDecl *Overrider, break; } } else { - assert(Layout.getBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getBaseClassOffset(PrimaryBase).isZero() && "Primary base should always be at offset 0!"); } @@ -1436,7 +1437,7 @@ VTableBuilder::AddMethods(BaseSubobject Base, CharUnits BaseOffsetInLayoutClass, CharUnits PrimaryBaseOffset; CharUnits PrimaryBaseOffsetInLayoutClass; if (Layout.isPrimaryBaseVirtual()) { - assert(Layout.getVBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getVBaseClassOffset(PrimaryBase).isZero() && "Primary vbase should have a zero offset!"); const ASTRecordLayout &MostDerivedClassLayout = @@ -1451,7 +1452,7 @@ VTableBuilder::AddMethods(BaseSubobject Base, CharUnits BaseOffsetInLayoutClass, PrimaryBaseOffsetInLayoutClass = LayoutClassLayout.getVBaseClassOffset(PrimaryBase); } else { - assert(Layout.getBaseClassOffsetInBits(PrimaryBase) == 0 && + assert(Layout.getBaseClassOffset(PrimaryBase).isZero() && "Primary base should have a zero offset!"); PrimaryBaseOffset = Base.getBaseOffset(); diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp new file mode 100644 index 0000000..085049d --- /dev/null +++ b/lib/ASTMatchers/ASTMatchFinder.cpp @@ -0,0 +1,547 @@ +//===--- ASTMatchFinder.cpp - Structural query framework ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements an algorithm to efficiently search for matches on AST nodes. +// Uses memoization to support recursive matches like HasDescendant. +// +// The general idea is to visit all AST nodes with a RecursiveASTVisitor, +// calling the Matches(...) method of each matcher we are running on each +// AST node. The matcher can recurse via the ASTMatchFinder interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include + +namespace clang { +namespace ast_matchers { +namespace internal { +namespace { + +// We use memoization to avoid running the same matcher on the same +// AST node twice. This pair is the key for looking up match +// result. It consists of an ID of the MatcherInterface (for +// identifying the matcher) and a pointer to the AST node. +typedef std::pair UntypedMatchInput; + +// Used to store the result of a match and possibly bound nodes. +struct MemoizedMatchResult { + bool ResultOfMatch; + BoundNodesTree Nodes; +}; + +// A RecursiveASTVisitor that traverses all children or all descendants of +// a node. +class MatchChildASTVisitor + : public RecursiveASTVisitor { +public: + typedef RecursiveASTVisitor VisitorBase; + + // Creates an AST visitor that matches 'matcher' on all children or + // descendants of a traversed node. max_depth is the maximum depth + // to traverse: use 1 for matching the children and INT_MAX for + // matching the descendants. + MatchChildASTVisitor(const UntypedBaseMatcher *BaseMatcher, + ASTMatchFinder *Finder, + BoundNodesTreeBuilder *Builder, + int MaxDepth, + ASTMatchFinder::TraversalKind Traversal, + ASTMatchFinder::BindKind Bind) + : BaseMatcher(BaseMatcher), + Finder(Finder), + Builder(Builder), + CurrentDepth(-1), + MaxDepth(MaxDepth), + Traversal(Traversal), + Bind(Bind), + Matches(false) {} + + // Returns true if a match is found in the subtree rooted at the + // given AST node. This is done via a set of mutually recursive + // functions. Here's how the recursion is done (the *wildcard can + // actually be Decl, Stmt, or Type): + // + // - Traverse(node) calls BaseTraverse(node) when it needs + // to visit the descendants of node. + // - BaseTraverse(node) then calls (via VisitorBase::Traverse*(node)) + // Traverse*(c) for each child c of 'node'. + // - Traverse*(c) in turn calls Traverse(c), completing the + // recursion. + template + bool findMatch(const T &Node) { + reset(); + traverse(Node); + return Matches; + } + + // The following are overriding methods from the base visitor class. + // They are public only to allow CRTP to work. They are *not *part + // of the public API of this class. + bool TraverseDecl(Decl *DeclNode) { + return (DeclNode == NULL) || traverse(*DeclNode); + } + bool TraverseStmt(Stmt *StmtNode) { + const Stmt *StmtToTraverse = StmtNode; + if (Traversal == + ASTMatchFinder::TK_IgnoreImplicitCastsAndParentheses) { + const Expr *ExprNode = dyn_cast_or_null(StmtNode); + if (ExprNode != NULL) { + StmtToTraverse = ExprNode->IgnoreParenImpCasts(); + } + } + return (StmtToTraverse == NULL) || traverse(*StmtToTraverse); + } + bool TraverseType(QualType TypeNode) { + return traverse(TypeNode); + } + + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { return true; } + +private: + // Used for updating the depth during traversal. + struct ScopedIncrement { + explicit ScopedIncrement(int *Depth) : Depth(Depth) { ++(*Depth); } + ~ScopedIncrement() { --(*Depth); } + + private: + int *Depth; + }; + + // Resets the state of this object. + void reset() { + Matches = false; + CurrentDepth = -1; + } + + // Forwards the call to the corresponding Traverse*() method in the + // base visitor class. + bool baseTraverse(const Decl &DeclNode) { + return VisitorBase::TraverseDecl(const_cast(&DeclNode)); + } + bool baseTraverse(const Stmt &StmtNode) { + return VisitorBase::TraverseStmt(const_cast(&StmtNode)); + } + bool baseTraverse(QualType TypeNode) { + return VisitorBase::TraverseType(TypeNode); + } + + // Traverses the subtree rooted at 'node'; returns true if the + // traversal should continue after this function returns; also sets + // matched_ to true if a match is found during the traversal. + template + bool traverse(const T &Node) { + TOOLING_COMPILE_ASSERT(IsBaseType::value, + traverse_can_only_be_instantiated_with_base_type); + ScopedIncrement ScopedDepth(&CurrentDepth); + if (CurrentDepth == 0) { + // We don't want to match the root node, so just recurse. + return baseTraverse(Node); + } + if (Bind != ASTMatchFinder::BK_All) { + if (BaseMatcher->matches(Node, Finder, Builder)) { + Matches = true; + return false; // Abort as soon as a match is found. + } + if (CurrentDepth < MaxDepth) { + // The current node doesn't match, and we haven't reached the + // maximum depth yet, so recurse. + return baseTraverse(Node); + } + // The current node doesn't match, and we have reached the + // maximum depth, so don't recurse (but continue the traversal + // such that other nodes at the current level can be visited). + return true; + } else { + BoundNodesTreeBuilder RecursiveBuilder; + if (BaseMatcher->matches(Node, Finder, &RecursiveBuilder)) { + // After the first match the matcher succeeds. + Matches = true; + Builder->addMatch(RecursiveBuilder.build()); + } + if (CurrentDepth < MaxDepth) { + baseTraverse(Node); + } + // In kBindAll mode we always search for more matches. + return true; + } + } + + const UntypedBaseMatcher *const BaseMatcher; + ASTMatchFinder *const Finder; + BoundNodesTreeBuilder *const Builder; + int CurrentDepth; + const int MaxDepth; + const ASTMatchFinder::TraversalKind Traversal; + const ASTMatchFinder::BindKind Bind; + bool Matches; +}; + +// Controls the outermost traversal of the AST and allows to match multiple +// matchers. +class MatchASTVisitor : public RecursiveASTVisitor, + public ASTMatchFinder { +public: + MatchASTVisitor(std::vector< std::pair > *Triggers) + : Triggers(Triggers), + ActiveASTContext(NULL) { + } + + void set_active_ast_context(ASTContext *NewActiveASTContext) { + ActiveASTContext = NewActiveASTContext; + } + + // The following Visit*() and Traverse*() functions "override" + // methods in RecursiveASTVisitor. + + bool VisitTypedefDecl(TypedefDecl *DeclNode) { + // When we see 'typedef A B', we add name 'B' to the set of names + // A's canonical type maps to. This is necessary for implementing + // IsDerivedFrom(x) properly, where x can be the name of the base + // class or any of its aliases. + // + // In general, the is-alias-of (as defined by typedefs) relation + // is tree-shaped, as you can typedef a type more than once. For + // example, + // + // typedef A B; + // typedef A C; + // typedef C D; + // typedef C E; + // + // gives you + // + // A + // |- B + // `- C + // |- D + // `- E + // + // It is wrong to assume that the relation is a chain. A correct + // implementation of IsDerivedFrom() needs to recognize that B and + // E are aliases, even though neither is a typedef of the other. + // Therefore, we cannot simply walk through one typedef chain to + // find out whether the type name matches. + const Type *TypeNode = DeclNode->getUnderlyingType().getTypePtr(); + const Type *CanonicalType = // root of the typedef tree + ActiveASTContext->getCanonicalType(TypeNode); + TypeAliases[CanonicalType].insert(DeclNode); + return true; + } + + bool TraverseDecl(Decl *DeclNode); + bool TraverseStmt(Stmt *StmtNode); + bool TraverseType(QualType TypeNode); + bool TraverseTypeLoc(TypeLoc TypeNode); + + // Matches children or descendants of 'Node' with 'BaseMatcher'. + template + bool memoizedMatchesRecursively(const T &Node, + const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, int MaxDepth, + TraversalKind Traversal, BindKind Bind) { + TOOLING_COMPILE_ASSERT((llvm::is_same::value) || + (llvm::is_same::value), + type_does_not_support_memoization); + const UntypedMatchInput input(BaseMatcher.getID(), &Node); + std::pair InsertResult + = ResultCache.insert(std::make_pair(input, MemoizedMatchResult())); + if (InsertResult.second) { + BoundNodesTreeBuilder DescendantBoundNodesBuilder; + InsertResult.first->second.ResultOfMatch = + matchesRecursively(Node, BaseMatcher, &DescendantBoundNodesBuilder, + MaxDepth, Traversal, Bind); + InsertResult.first->second.Nodes = + DescendantBoundNodesBuilder.build(); + } + InsertResult.first->second.Nodes.copyTo(Builder); + return InsertResult.first->second.ResultOfMatch; + } + + // Matches children or descendants of 'Node' with 'BaseMatcher'. + template + bool matchesRecursively(const T &Node, const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, int MaxDepth, + TraversalKind Traversal, BindKind Bind) { + MatchChildASTVisitor Visitor( + &BaseMatcher, this, Builder, MaxDepth, Traversal, Bind); + return Visitor.findMatch(Node); + } + + virtual bool classIsDerivedFrom(const CXXRecordDecl *Declaration, + const Matcher &Base, + BoundNodesTreeBuilder *Builder); + + // Implements ASTMatchFinder::MatchesChildOf. + virtual bool matchesChildOf(const Decl &DeclNode, + const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, + TraversalKind Traversal, + BindKind Bind) { + return matchesRecursively(DeclNode, BaseMatcher, Builder, 1, Traversal, + Bind); + } + virtual bool matchesChildOf(const Stmt &StmtNode, + const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, + TraversalKind Traversal, + BindKind Bind) { + return matchesRecursively(StmtNode, BaseMatcher, Builder, 1, Traversal, + Bind); + } + + // Implements ASTMatchFinder::MatchesDescendantOf. + virtual bool matchesDescendantOf(const Decl &DeclNode, + const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, + BindKind Bind) { + return memoizedMatchesRecursively(DeclNode, BaseMatcher, Builder, INT_MAX, + TK_AsIs, Bind); + } + virtual bool matchesDescendantOf(const Stmt &StmtNode, + const UntypedBaseMatcher &BaseMatcher, + BoundNodesTreeBuilder *Builder, + BindKind Bind) { + return memoizedMatchesRecursively(StmtNode, BaseMatcher, Builder, INT_MAX, + TK_AsIs, Bind); + } + + bool shouldVisitTemplateInstantiations() const { return true; } + bool shouldVisitImplicitCode() const { return true; } + +private: + // Implements a BoundNodesTree::Visitor that calls a MatchCallback with + // the aggregated bound nodes for each match. + class MatchVisitor : public BoundNodesTree::Visitor { + public: + MatchVisitor(ASTContext* Context, + MatchFinder::MatchCallback* Callback) + : Context(Context), + Callback(Callback) {} + + virtual void visitMatch(const BoundNodes& BoundNodesView) { + Callback->run(MatchFinder::MatchResult(BoundNodesView, Context)); + } + + private: + ASTContext* Context; + MatchFinder::MatchCallback* Callback; + }; + + // Returns true if 'TypeNode' has an alias that matches the given matcher. + bool typeHasMatchingAlias(const Type *TypeNode, + const Matcher Matcher, + BoundNodesTreeBuilder *Builder) { + const Type *const CanonicalType = + ActiveASTContext->getCanonicalType(TypeNode); + const std::set &Aliases = TypeAliases[CanonicalType]; + for (std::set::const_iterator + It = Aliases.begin(), End = Aliases.end(); + It != End; ++It) { + if (Matcher.matches(**It, this, Builder)) + return true; + } + return false; + } + + // Matches all registered matchers on the given node and calls the + // result callback for every node that matches. + template + void match(const T &node) { + for (std::vector< std::pair >::const_iterator + It = Triggers->begin(), End = Triggers->end(); + It != End; ++It) { + BoundNodesTreeBuilder Builder; + if (It->first->matches(node, this, &Builder)) { + BoundNodesTree BoundNodes = Builder.build(); + MatchVisitor Visitor(ActiveASTContext, It->second); + BoundNodes.visitMatches(&Visitor); + } + } + } + + std::vector< std::pair > *const Triggers; + ASTContext *ActiveASTContext; + + // Maps a canonical type to its TypedefDecls. + llvm::DenseMap > TypeAliases; + + // Maps (matcher, node) -> the match result for memoization. + typedef llvm::DenseMap MemoizationMap; + MemoizationMap ResultCache; +}; + +// Returns true if the given class is directly or indirectly derived +// from a base type with the given name. A class is considered to be +// also derived from itself. +bool MatchASTVisitor::classIsDerivedFrom(const CXXRecordDecl *Declaration, + const Matcher &Base, + BoundNodesTreeBuilder *Builder) { + if (Base.matches(*Declaration, this, Builder)) + return true; + if (!Declaration->hasDefinition()) + return false; + typedef CXXRecordDecl::base_class_const_iterator BaseIterator; + for (BaseIterator It = Declaration->bases_begin(), + End = Declaration->bases_end(); It != End; ++It) { + const Type *TypeNode = It->getType().getTypePtr(); + + if (typeHasMatchingAlias(TypeNode, Base, Builder)) + return true; + + // Type::getAs<...>() drills through typedefs. + if (TypeNode->getAs() != NULL || + TypeNode->getAs() != NULL) + // Dependent names and template TypeNode parameters will be matched when + // the template is instantiated. + continue; + CXXRecordDecl *ClassDecl = NULL; + TemplateSpecializationType const *TemplateType = + TypeNode->getAs(); + if (TemplateType != NULL) { + if (TemplateType->getTemplateName().isDependent()) + // Dependent template specializations will be matched when the + // template is instantiated. + continue; + + // For template specialization types which are specializing a template + // declaration which is an explicit or partial specialization of another + // template declaration, getAsCXXRecordDecl() returns the corresponding + // ClassTemplateSpecializationDecl. + // + // For template specialization types which are specializing a template + // declaration which is neither an explicit nor partial specialization of + // another template declaration, getAsCXXRecordDecl() returns NULL and + // we get the CXXRecordDecl of the templated declaration. + CXXRecordDecl *SpecializationDecl = + TemplateType->getAsCXXRecordDecl(); + if (SpecializationDecl != NULL) { + ClassDecl = SpecializationDecl; + } else { + ClassDecl = llvm::dyn_cast( + TemplateType->getTemplateName() + .getAsTemplateDecl()->getTemplatedDecl()); + } + } else { + ClassDecl = TypeNode->getAsCXXRecordDecl(); + } + assert(ClassDecl != NULL); + assert(ClassDecl != Declaration); + if (classIsDerivedFrom(ClassDecl, Base, Builder)) + return true; + } + return false; +} + +bool MatchASTVisitor::TraverseDecl(Decl *DeclNode) { + if (DeclNode == NULL) { + return true; + } + match(*DeclNode); + return RecursiveASTVisitor::TraverseDecl(DeclNode); +} + +bool MatchASTVisitor::TraverseStmt(Stmt *StmtNode) { + if (StmtNode == NULL) { + return true; + } + match(*StmtNode); + return RecursiveASTVisitor::TraverseStmt(StmtNode); +} + +bool MatchASTVisitor::TraverseType(QualType TypeNode) { + match(TypeNode); + return RecursiveASTVisitor::TraverseType(TypeNode); +} + +bool MatchASTVisitor::TraverseTypeLoc(TypeLoc TypeLoc) { + match(TypeLoc.getType()); + return RecursiveASTVisitor:: + TraverseTypeLoc(TypeLoc); +} + +class MatchASTConsumer : public ASTConsumer { +public: + MatchASTConsumer(std::vector< std::pair > *Triggers, + MatchFinder::ParsingDoneTestCallback *ParsingDone) + : Visitor(Triggers), + ParsingDone(ParsingDone) {} + +private: + virtual void HandleTranslationUnit(ASTContext &Context) { + if (ParsingDone != NULL) { + ParsingDone->run(); + } + Visitor.set_active_ast_context(&Context); + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + Visitor.set_active_ast_context(NULL); + } + + MatchASTVisitor Visitor; + MatchFinder::ParsingDoneTestCallback *ParsingDone; +}; + +} // end namespace +} // end namespace internal + +MatchFinder::MatchResult::MatchResult(const BoundNodes &Nodes, + ASTContext *Context) + : Nodes(Nodes), Context(Context), + SourceManager(&Context->getSourceManager()) {} + +MatchFinder::MatchCallback::~MatchCallback() {} +MatchFinder::ParsingDoneTestCallback::~ParsingDoneTestCallback() {} + +MatchFinder::MatchFinder() : ParsingDone(NULL) {} + +MatchFinder::~MatchFinder() { + for (std::vector< std::pair >::const_iterator + It = Triggers.begin(), End = Triggers.end(); + It != End; ++It) { + delete It->first; + } +} + +void MatchFinder::addMatcher(const DeclarationMatcher &NodeMatch, + MatchCallback *Action) { + Triggers.push_back(std::make_pair( + new internal::TypedBaseMatcher(NodeMatch), Action)); +} + +void MatchFinder::addMatcher(const TypeMatcher &NodeMatch, + MatchCallback *Action) { + Triggers.push_back(std::make_pair( + new internal::TypedBaseMatcher(NodeMatch), Action)); +} + +void MatchFinder::addMatcher(const StatementMatcher &NodeMatch, + MatchCallback *Action) { + Triggers.push_back(std::make_pair( + new internal::TypedBaseMatcher(NodeMatch), Action)); +} + +ASTConsumer *MatchFinder::newASTConsumer() { + return new internal::MatchASTConsumer(&Triggers, ParsingDone); +} + +void MatchFinder::registerTestCallbackAfterParsing( + MatchFinder::ParsingDoneTestCallback *NewParsingDone) { + ParsingDone = NewParsingDone; +} + +} // end namespace ast_matchers +} // end namespace clang diff --git a/lib/ASTMatchers/ASTMatchersInternal.cpp b/lib/ASTMatchers/ASTMatchersInternal.cpp new file mode 100644 index 0000000..69c5190 --- /dev/null +++ b/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -0,0 +1,102 @@ +//===--- ASTMatchersInternal.cpp - Structural query framework -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the base layer of the matcher framework. +// +//===----------------------------------------------------------------------===// + +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" + +namespace clang { +namespace ast_matchers { +namespace internal { + +BoundNodesTree::BoundNodesTree() {} + +BoundNodesTree::BoundNodesTree( + const std::map& DeclBindings, + const std::map& StmtBindings, + const std::vector RecursiveBindings) + : DeclBindings(DeclBindings), StmtBindings(StmtBindings), + RecursiveBindings(RecursiveBindings) {} + +void BoundNodesTree::copyTo(BoundNodesTreeBuilder* Builder) const { + copyBindingsTo(DeclBindings, Builder); + copyBindingsTo(StmtBindings, Builder); + for (std::vector::const_iterator + I = RecursiveBindings.begin(), + E = RecursiveBindings.end(); + I != E; ++I) { + Builder->addMatch(*I); + } +} + +template +void BoundNodesTree::copyBindingsTo( + const T& Bindings, BoundNodesTreeBuilder* Builder) const { + for (typename T::const_iterator I = Bindings.begin(), + E = Bindings.end(); + I != E; ++I) { + Builder->setBinding(I->first, I->second); + } +} + +void BoundNodesTree::visitMatches(Visitor* ResultVisitor) { + std::map AggregatedDeclBindings; + std::map AggregatedStmtBindings; + visitMatchesRecursively(ResultVisitor, AggregatedDeclBindings, + AggregatedStmtBindings); +} + +void BoundNodesTree:: +visitMatchesRecursively(Visitor* ResultVisitor, + std::map + AggregatedDeclBindings, + std::map + AggregatedStmtBindings) { + copy(DeclBindings.begin(), DeclBindings.end(), + inserter(AggregatedDeclBindings, AggregatedDeclBindings.begin())); + copy(StmtBindings.begin(), StmtBindings.end(), + inserter(AggregatedStmtBindings, AggregatedStmtBindings.begin())); + if (RecursiveBindings.empty()) { + ResultVisitor->visitMatch(BoundNodes(AggregatedDeclBindings, + AggregatedStmtBindings)); + } else { + for (unsigned I = 0; I < RecursiveBindings.size(); ++I) { + RecursiveBindings[I].visitMatchesRecursively(ResultVisitor, + AggregatedDeclBindings, + AggregatedStmtBindings); + } + } +} + +BoundNodesTreeBuilder::BoundNodesTreeBuilder() {} + +void BoundNodesTreeBuilder::setBinding(const std::string &Id, + const Decl *Node) { + DeclBindings[Id] = Node; +} + +void BoundNodesTreeBuilder::setBinding(const std::string &Id, + const Stmt *Node) { + StmtBindings[Id] = Node; +} + +void BoundNodesTreeBuilder::addMatch(const BoundNodesTree& Bindings) { + RecursiveBindings.push_back(Bindings); +} + +BoundNodesTree BoundNodesTreeBuilder::build() const { + return BoundNodesTree(DeclBindings, StmtBindings, RecursiveBindings); +} + +} // end namespace internal +} // end namespace ast_matchers +} // end namespace clang diff --git a/lib/ASTMatchers/CMakeLists.txt b/lib/ASTMatchers/CMakeLists.txt new file mode 100644 index 0000000..8fc7d4b --- /dev/null +++ b/lib/ASTMatchers/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS support) +set(LLVM_USED_LIBS clangBasic clangAST) + +add_clang_library(clangASTMatchers + ASTMatchFinder.cpp + ASTMatchersInternal.cpp + ) + +add_dependencies(clangASTMatchers + ClangAttrClasses + ClangAttrImpl + ClangAttrList + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticCommon + ClangStmtNodes + ) diff --git a/lib/ASTMatchers/Makefile b/lib/ASTMatchers/Makefile new file mode 100644 index 0000000..76d8271 --- /dev/null +++ b/lib/ASTMatchers/Makefile @@ -0,0 +1,13 @@ +##===- clang/lib/ASTMatchers/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL := ../.. +LIBRARYNAME := clangASTMatchers + +include $(CLANG_LEVEL)/Makefile diff --git a/lib/Analysis/AnalysisDeclContext.cpp b/lib/Analysis/AnalysisDeclContext.cpp index 659cc6d..7de7f39 100644 --- a/lib/Analysis/AnalysisDeclContext.cpp +++ b/lib/Analysis/AnalysisDeclContext.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -34,11 +35,9 @@ typedef llvm::DenseMap ManagedAnalysisMap; AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *Mgr, const Decl *d, - idx::TranslationUnit *tu, const CFG::BuildOptions &buildOptions) : Manager(Mgr), D(d), - TU(tu), cfgBuildOptions(buildOptions), forcedBlkExprs(0), builtCFG(false), @@ -50,11 +49,9 @@ AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *Mgr, } AnalysisDeclContext::AnalysisDeclContext(AnalysisDeclContextManager *Mgr, - const Decl *d, - idx::TranslationUnit *tu) + const Decl *d) : Manager(Mgr), D(d), - TU(tu), forcedBlkExprs(0), builtCFG(false), builtCompleteCFG(false), @@ -184,8 +181,16 @@ void AnalysisDeclContext::dumpCFG(bool ShowColors) { } ParentMap &AnalysisDeclContext::getParentMap() { - if (!PM) + if (!PM) { PM.reset(new ParentMap(getBody())); + if (const CXXConstructorDecl *C = dyn_cast(getDecl())) { + for (CXXConstructorDecl::init_const_iterator I = C->init_begin(), + E = C->init_end(); + I != E; ++I) { + PM->addStmt((*I)->getInit()); + } + } + } return *PM; } @@ -195,11 +200,10 @@ PseudoConstantAnalysis *AnalysisDeclContext::getPseudoConstantAnalysis() { return PCA.get(); } -AnalysisDeclContext *AnalysisDeclContextManager::getContext(const Decl *D, - idx::TranslationUnit *TU) { +AnalysisDeclContext *AnalysisDeclContextManager::getContext(const Decl *D) { AnalysisDeclContext *&AC = Contexts[D]; if (!AC) - AC = new AnalysisDeclContext(this, D, TU, cfgBuildOptions); + AC = new AnalysisDeclContext(this, D, cfgBuildOptions); return AC; } @@ -209,6 +213,14 @@ AnalysisDeclContext::getStackFrame(LocationContext const *Parent, const Stmt *S, return getLocationContextManager().getStackFrame(this, Parent, S, Blk, Idx); } +const BlockInvocationContext * +AnalysisDeclContext::getBlockInvocationContext(const LocationContext *parent, + const clang::BlockDecl *BD, + const void *ContextData) { + return getLocationContextManager().getBlockInvocationContext(this, parent, + BD, ContextData); +} + LocationContextManager & AnalysisDeclContext::getLocationContextManager() { assert(Manager && "Cannot create LocationContexts without an AnalysisDeclContextManager!"); @@ -239,7 +251,7 @@ void ScopeContext::Profile(llvm::FoldingSetNodeID &ID) { } void BlockInvocationContext::Profile(llvm::FoldingSetNodeID &ID) { - Profile(ID, getAnalysisDeclContext(), getParent(), BD); + Profile(ID, getAnalysisDeclContext(), getParent(), BD, ContextData); } //===----------------------------------------------------------------------===// @@ -288,6 +300,24 @@ LocationContextManager::getScope(AnalysisDeclContext *ctx, return getLocationContext(ctx, parent, s); } +const BlockInvocationContext * +LocationContextManager::getBlockInvocationContext(AnalysisDeclContext *ctx, + const LocationContext *parent, + const BlockDecl *BD, + const void *ContextData) { + llvm::FoldingSetNodeID ID; + BlockInvocationContext::Profile(ID, ctx, parent, BD, ContextData); + void *InsertPos; + BlockInvocationContext *L = + cast_or_null(Contexts.FindNodeOrInsertPos(ID, + InsertPos)); + if (!L) { + L = new BlockInvocationContext(ctx, parent, BD, ContextData); + Contexts.InsertNode(L, InsertPos); + } + return L; +} + //===----------------------------------------------------------------------===// // LocationContext methods. //===----------------------------------------------------------------------===// @@ -302,19 +332,6 @@ const StackFrameContext *LocationContext::getCurrentStackFrame() const { return NULL; } -const StackFrameContext * -LocationContext::getStackFrameForDeclContext(const DeclContext *DC) const { - const LocationContext *LC = this; - while (LC) { - if (const StackFrameContext *SFC = dyn_cast(LC)) { - if (cast(SFC->getDecl()) == DC) - return SFC; - } - LC = LC->getParent(); - } - return NULL; -} - bool LocationContext::isParentOf(const LocationContext *LC) const { do { const LocationContext *Parent = LC->getParent(); diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 2f1f1cb..05c5385 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -1,4 +1,4 @@ -//===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===// + //===--- CFG.cpp - Classes for representing and building CFGs----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,7 @@ #include "llvm/Support/SaveAndRestore.h" #include "clang/Analysis/CFG.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/StmtVisitor.h" #include "clang/AST/PrettyPrinter.h" @@ -312,19 +313,6 @@ private: CFGBlock *VisitAddrLabelExpr(AddrLabelExpr *A, AddStmtChoice asc); CFGBlock *VisitBinaryOperator(BinaryOperator *B, AddStmtChoice asc); CFGBlock *VisitBreakStmt(BreakStmt *B); - CFGBlock *VisitCXXCatchStmt(CXXCatchStmt *S); - CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, - AddStmtChoice asc); - CFGBlock *VisitCXXThrowExpr(CXXThrowExpr *T); - CFGBlock *VisitCXXTryStmt(CXXTryStmt *S); - CFGBlock *VisitCXXForRangeStmt(CXXForRangeStmt *S); - CFGBlock *VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E, - AddStmtChoice asc); - CFGBlock *VisitCXXConstructExpr(CXXConstructExpr *C, AddStmtChoice asc); - CFGBlock *VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *E, - AddStmtChoice asc); - CFGBlock *VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *C, - AddStmtChoice asc); CFGBlock *VisitCallExpr(CallExpr *C, AddStmtChoice asc); CFGBlock *VisitCaseStmt(CaseStmt *C); CFGBlock *VisitChooseExpr(ChooseExpr *C, AddStmtChoice asc); @@ -332,31 +320,47 @@ private: CFGBlock *VisitConditionalOperator(AbstractConditionalOperator *C, AddStmtChoice asc); CFGBlock *VisitContinueStmt(ContinueStmt *C); + CFGBlock *VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E, + AddStmtChoice asc); + CFGBlock *VisitCXXCatchStmt(CXXCatchStmt *S); + CFGBlock *VisitCXXConstructExpr(CXXConstructExpr *C, AddStmtChoice asc); + CFGBlock *VisitCXXForRangeStmt(CXXForRangeStmt *S); + CFGBlock *VisitCXXFunctionalCastExpr(CXXFunctionalCastExpr *E, + AddStmtChoice asc); + CFGBlock *VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *C, + AddStmtChoice asc); + CFGBlock *VisitCXXThrowExpr(CXXThrowExpr *T); + CFGBlock *VisitCXXTryStmt(CXXTryStmt *S); CFGBlock *VisitDeclStmt(DeclStmt *DS); CFGBlock *VisitDeclSubExpr(DeclStmt *DS); CFGBlock *VisitDefaultStmt(DefaultStmt *D); CFGBlock *VisitDoStmt(DoStmt *D); - CFGBlock *VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc); + CFGBlock *VisitExprWithCleanups(ExprWithCleanups *E, AddStmtChoice asc); CFGBlock *VisitForStmt(ForStmt *F); CFGBlock *VisitGotoStmt(GotoStmt *G); CFGBlock *VisitIfStmt(IfStmt *I); CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc); CFGBlock *VisitIndirectGotoStmt(IndirectGotoStmt *I); CFGBlock *VisitLabelStmt(LabelStmt *L); - CFGBlock *VisitLambdaExpr(LambdaExpr *L); + CFGBlock *VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc); + CFGBlock *VisitLogicalOperator(BinaryOperator *B); + std::pair VisitLogicalOperator(BinaryOperator *B, + Stmt *Term, + CFGBlock *TrueBlock, + CFGBlock *FalseBlock); CFGBlock *VisitMemberExpr(MemberExpr *M, AddStmtChoice asc); CFGBlock *VisitObjCAtCatchStmt(ObjCAtCatchStmt *S); - CFGBlock *VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S); CFGBlock *VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S); CFGBlock *VisitObjCAtThrowStmt(ObjCAtThrowStmt *S); CFGBlock *VisitObjCAtTryStmt(ObjCAtTryStmt *S); + CFGBlock *VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S); CFGBlock *VisitObjCForCollectionStmt(ObjCForCollectionStmt *S); - CFGBlock *VisitReturnStmt(ReturnStmt *R); CFGBlock *VisitPseudoObjectExpr(PseudoObjectExpr *E); - CFGBlock *VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E, - AddStmtChoice asc); + CFGBlock *VisitReturnStmt(ReturnStmt *R); CFGBlock *VisitStmtExpr(StmtExpr *S, AddStmtChoice asc); CFGBlock *VisitSwitchStmt(SwitchStmt *S); + CFGBlock *VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E, + AddStmtChoice asc); CFGBlock *VisitUnaryOperator(UnaryOperator *U, AddStmtChoice asc); CFGBlock *VisitWhileStmt(WhileStmt *W); @@ -772,13 +776,12 @@ void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B, // If this destructor is marked as a no-return destructor, we need to // create a new block for the destructor which does not have as a successor // anything built thus far: control won't flow out of this block. - QualType Ty; - if ((*I)->getType()->isReferenceType()) { + QualType Ty = (*I)->getType(); + if (Ty->isReferenceType()) { Ty = getReferenceInitTemporaryType(*Context, (*I)->getInit()); - } else { - Ty = Context->getBaseElementType((*I)->getType()); } - + Ty = Context->getBaseElementType(Ty); + const CXXDestructorDecl *Dtor = Ty->getAsCXXRecordDecl()->getDestructor(); if (cast(Dtor->getType())->getNoReturnAttr()) Block = createNoReturnBlock(); @@ -1070,9 +1073,6 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc) { case Stmt::LambdaExprClass: return VisitLambdaExpr(cast(S), asc); - case Stmt::AttributedStmtClass: - return Visit(cast(S)->getSubStmt(), asc); - case Stmt::MemberExprClass: return VisitMemberExpr(cast(S), asc); @@ -1166,55 +1166,111 @@ CFGBlock *CFGBuilder::VisitUnaryOperator(UnaryOperator *U, return Visit(U->getSubExpr(), AddStmtChoice()); } -CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, - AddStmtChoice asc) { - if (B->isLogicalOp()) { // && or || - CFGBlock *ConfluenceBlock = Block ? Block : createBlock(); - appendStmt(ConfluenceBlock, B); +CFGBlock *CFGBuilder::VisitLogicalOperator(BinaryOperator *B) { + CFGBlock *ConfluenceBlock = Block ? Block : createBlock(); + appendStmt(ConfluenceBlock, B); - if (badCFG) - return 0; + if (badCFG) + return 0; - // create the block evaluating the LHS - CFGBlock *LHSBlock = createBlock(false); - LHSBlock->setTerminator(B); + return VisitLogicalOperator(B, 0, ConfluenceBlock, ConfluenceBlock).first; +} - // create the block evaluating the RHS - Succ = ConfluenceBlock; - Block = NULL; - CFGBlock *RHSBlock = addStmt(B->getRHS()); +std::pair +CFGBuilder::VisitLogicalOperator(BinaryOperator *B, + Stmt *Term, + CFGBlock *TrueBlock, + CFGBlock *FalseBlock) { - if (RHSBlock) { - if (badCFG) - return 0; - } else { - // Create an empty block for cases where the RHS doesn't require - // any explicit statements in the CFG. - RHSBlock = createBlock(); + // Introspect the RHS. If it is a nested logical operation, we recursively + // build the CFG using this function. Otherwise, resort to default + // CFG construction behavior. + Expr *RHS = B->getRHS()->IgnoreParens(); + CFGBlock *RHSBlock, *ExitBlock; + + do { + if (BinaryOperator *B_RHS = dyn_cast(RHS)) + if (B_RHS->isLogicalOp()) { + llvm::tie(RHSBlock, ExitBlock) = + VisitLogicalOperator(B_RHS, Term, TrueBlock, FalseBlock); + break; + } + + // The RHS is not a nested logical operation. Don't push the terminator + // down further, but instead visit RHS and construct the respective + // pieces of the CFG, and link up the RHSBlock with the terminator + // we have been provided. + ExitBlock = RHSBlock = createBlock(false); + + if (!Term) { + assert(TrueBlock == FalseBlock); + addSuccessor(RHSBlock, TrueBlock); + } + else { + RHSBlock->setTerminator(Term); + TryResult KnownVal = tryEvaluateBool(RHS); + addSuccessor(RHSBlock, KnownVal.isFalse() ? NULL : TrueBlock); + addSuccessor(RHSBlock, KnownVal.isTrue() ? NULL : FalseBlock); } - // Generate the blocks for evaluating the LHS. - Block = LHSBlock; - CFGBlock *EntryLHSBlock = addStmt(B->getLHS()); + Block = RHSBlock; + RHSBlock = addStmt(RHS); + } + while (false); - // See if this is a known constant. - TryResult KnownVal = tryEvaluateBool(B->getLHS()); - if (KnownVal.isKnown() && (B->getOpcode() == BO_LOr)) - KnownVal.negate(); + if (badCFG) + return std::make_pair((CFGBlock*)0, (CFGBlock*)0); + + // Generate the blocks for evaluating the LHS. + Expr *LHS = B->getLHS()->IgnoreParens(); + + if (BinaryOperator *B_LHS = dyn_cast(LHS)) + if (B_LHS->isLogicalOp()) { + if (B->getOpcode() == BO_LOr) + FalseBlock = RHSBlock; + else + TrueBlock = RHSBlock; - // Now link the LHSBlock with RHSBlock. - if (B->getOpcode() == BO_LOr) { - addSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : ConfluenceBlock); - addSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : RHSBlock); - } else { - assert(B->getOpcode() == BO_LAnd); - addSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : RHSBlock); - addSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : ConfluenceBlock); + // For the LHS, treat 'B' as the terminator that we want to sink + // into the nested branch. The RHS always gets the top-most + // terminator. + return VisitLogicalOperator(B_LHS, B, TrueBlock, FalseBlock); } - return EntryLHSBlock; + // Create the block evaluating the LHS. + // This contains the '&&' or '||' as the terminator. + CFGBlock *LHSBlock = createBlock(false); + LHSBlock->setTerminator(B); + + Block = LHSBlock; + CFGBlock *EntryLHSBlock = addStmt(LHS); + + if (badCFG) + return std::make_pair((CFGBlock*)0, (CFGBlock*)0); + + // See if this is a known constant. + TryResult KnownVal = tryEvaluateBool(LHS); + + // Now link the LHSBlock with RHSBlock. + if (B->getOpcode() == BO_LOr) { + addSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : TrueBlock); + addSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : RHSBlock); + } else { + assert(B->getOpcode() == BO_LAnd); + addSuccessor(LHSBlock, KnownVal.isFalse() ? NULL : RHSBlock); + addSuccessor(LHSBlock, KnownVal.isTrue() ? NULL : FalseBlock); } + return std::make_pair(EntryLHSBlock, ExitBlock); +} + + +CFGBlock *CFGBuilder::VisitBinaryOperator(BinaryOperator *B, + AddStmtChoice asc) { + // && or || + if (B->isLogicalOp()) + return VisitLogicalOperator(B); + if (B->getOpcode() == BO_Comma) { // , autoCreateBlock(); appendStmt(Block, B); @@ -1284,7 +1340,7 @@ static bool CanThrow(Expr *E, ASTContext &Ctx) { const FunctionType *FT = Ty->getAs(); if (FT) { if (const FunctionProtoType *Proto = dyn_cast(FT)) - if (Proto->getExceptionSpecType() != EST_Uninstantiated && + if (!isUnresolvedExceptionSpec(Proto->getExceptionSpecType()) && Proto->isNothrow(Ctx)) return false; } @@ -1435,6 +1491,12 @@ CFGBlock *CFGBuilder::VisitConditionalOperator(AbstractConditionalOperator *C, if (badCFG) return 0; + // If the condition is a logical '&&' or '||', build a more accurate CFG. + if (BinaryOperator *Cond = + dyn_cast(C->getCond()->IgnoreParens())) + if (Cond->isLogicalOp()) + return VisitLogicalOperator(Cond, C, LHSBlock, RHSBlock).first; + // Create the block that will contain the condition. Block = createBlock(false); @@ -1471,11 +1533,10 @@ CFGBlock *CFGBuilder::VisitDeclStmt(DeclStmt *DS) { CFGBlock *B = 0; - // FIXME: Add a reverse iterator for DeclStmt to avoid this extra copy. - typedef SmallVector BufTy; - BufTy Buf(DS->decl_begin(), DS->decl_end()); - - for (BufTy::reverse_iterator I = Buf.rbegin(), E = Buf.rend(); I != E; ++I) { + // Build an individual DeclStmt for each decl. + for (DeclStmt::reverse_decl_iterator I = DS->decl_rbegin(), + E = DS->decl_rend(); + I != E; ++I) { // Get the alignment of the new DeclStmt, padding out to >=8 bytes. unsigned A = llvm::AlignOf::Alignment < 8 ? 8 : llvm::AlignOf::Alignment; @@ -1645,6 +1706,19 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { } } + // Specially handle "if (expr1 || ...)" and "if (expr1 && ...)" by + // having these handle the actual control-flow jump. Note that + // if we introduce a condition variable, e.g. "if (int x = exp1 || exp2)" + // we resort to the old control-flow behavior. This special handling + // removes infeasible paths from the control-flow graph by having the + // control-flow transfer of '&&' or '||' go directly into the then/else + // blocks directly. + if (!I->getConditionVariable()) + if (BinaryOperator *Cond = + dyn_cast(I->getCond()->IgnoreParens())) + if (Cond->isLogicalOp()) + return VisitLogicalOperator(Cond, I, ThenBlock, ElseBlock).first; + // Now create a new block containing the if statement. Block = createBlock(false); @@ -1795,75 +1869,26 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { SaveAndRestore save_break(BreakJumpTarget); BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); - // Because of short-circuit evaluation, the condition of the loop can span - // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that - // evaluate the condition. - CFGBlock *ExitConditionBlock = createBlock(false); - CFGBlock *EntryConditionBlock = ExitConditionBlock; - - // Set the terminator for the "exit" condition block. - ExitConditionBlock->setTerminator(F); - - // Now add the actual condition to the condition block. Because the condition - // itself may contain control-flow, new blocks may be created. - if (Stmt *C = F->getCond()) { - Block = ExitConditionBlock; - EntryConditionBlock = addStmt(C); - if (badCFG) - return 0; - assert(Block == EntryConditionBlock || - (Block == 0 && EntryConditionBlock == Succ)); - - // If this block contains a condition variable, add both the condition - // variable and initializer to the CFG. - if (VarDecl *VD = F->getConditionVariable()) { - if (Expr *Init = VD->getInit()) { - autoCreateBlock(); - appendStmt(Block, F->getConditionVariableDeclStmt()); - EntryConditionBlock = addStmt(Init); - assert(Block == EntryConditionBlock); - } - } - - if (Block) { - if (badCFG) - return 0; - } - } - - // The condition block is the implicit successor for the loop body as well as - // any code above the loop. - Succ = EntryConditionBlock; - - // See if this is a known constant. - TryResult KnownVal(true); - - if (F->getCond()) - KnownVal = tryEvaluateBool(F->getCond()); + CFGBlock *BodyBlock = 0, *TransitionBlock = 0; // Now create the loop body. { assert(F->getBody()); - // Save the current values for Block, Succ, and continue targets. - SaveAndRestore save_Block(Block), save_Succ(Succ); - SaveAndRestore save_continue(ContinueJumpTarget); + // Save the current values for Block, Succ, continue and break targets. + SaveAndRestore save_Block(Block), save_Succ(Succ); + SaveAndRestore save_continue(ContinueJumpTarget); - // Create a new block to contain the (bottom) of the loop body. - Block = NULL; - - // Loop body should end with destructor of Condition variable (if any). - addAutomaticObjDtors(ScopePos, LoopBeginScopePos, F); + // Create an empty block to represent the transition block for looping back + // to the head of the loop. If we have increment code, it will + // go in this block as well. + Block = Succ = TransitionBlock = createBlock(false); + TransitionBlock->setLoopTarget(F); if (Stmt *I = F->getInc()) { // Generate increment code in its own basic block. This is the target of // continue statements. Succ = addStmt(I); - } else { - // No increment code. Create a special, empty, block that is used as the - // target block for "looping back" to the start of the loop. - assert(Succ == EntryConditionBlock); - Succ = Block ? Block : createBlock(); } // Finish up the increment (or empty) block if it hasn't been already. @@ -1874,11 +1899,13 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { Block = 0; } - ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos); + // The starting block for the loop increment is the block that should + // represent the 'loop target' for looping back to the start of the loop. + ContinueJumpTarget = JumpTarget(Succ, ContinueScopePos); + ContinueJumpTarget.block->setLoopTarget(F); - // The starting block for the loop increment is the block that should - // represent the 'loop target' for looping back to the start of the loop. - ContinueJumpTarget.block->setLoopTarget(F); + // Loop body should end with destructor of Condition variable (if any). + addAutomaticObjDtors(ScopePos, LoopBeginScopePos, F); // If body is not a compound statement create implicit scope // and add destructors. @@ -1887,20 +1914,79 @@ CFGBlock *CFGBuilder::VisitForStmt(ForStmt *F) { // Now populate the body block, and in the process create new blocks as we // walk the body of the loop. - CFGBlock *BodyBlock = addStmt(F->getBody()); + BodyBlock = addStmt(F->getBody()); - if (!BodyBlock) - BodyBlock = ContinueJumpTarget.block;//can happen for "for (...;...;...);" + if (!BodyBlock) { + // In the case of "for (...;...;...);" we can have a null BodyBlock. + // Use the continue jump target as the proxy for the body. + BodyBlock = ContinueJumpTarget.block; + } else if (badCFG) return 0; + } + + // Because of short-circuit evaluation, the condition of the loop can span + // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that + // evaluate the condition. + CFGBlock *EntryConditionBlock = 0, *ExitConditionBlock = 0; + + do { + Expr *C = F->getCond(); + + // Specially handle logical operators, which have a slightly + // more optimal CFG representation. + if (BinaryOperator *Cond = + dyn_cast_or_null(C ? C->IgnoreParens() : 0)) + if (Cond->isLogicalOp()) { + llvm::tie(EntryConditionBlock, ExitConditionBlock) = + VisitLogicalOperator(Cond, F, BodyBlock, LoopSuccessor); + break; + } - // This new body block is a successor to our "exit" condition block. + // The default case when not handling logical operators. + EntryConditionBlock = ExitConditionBlock = createBlock(false); + ExitConditionBlock->setTerminator(F); + + // See if this is a known constant. + TryResult KnownVal(true); + + if (C) { + // Now add the actual condition to the condition block. + // Because the condition itself may contain control-flow, new blocks may + // be created. Thus we update "Succ" after adding the condition. + Block = ExitConditionBlock; + EntryConditionBlock = addStmt(C); + + // If this block contains a condition variable, add both the condition + // variable and initializer to the CFG. + if (VarDecl *VD = F->getConditionVariable()) { + if (Expr *Init = VD->getInit()) { + autoCreateBlock(); + appendStmt(Block, F->getConditionVariableDeclStmt()); + EntryConditionBlock = addStmt(Init); + assert(Block == EntryConditionBlock); + } + } + + if (Block && badCFG) + return 0; + + KnownVal = tryEvaluateBool(C); + } + + // Add the loop body entry as a successor to the condition. addSuccessor(ExitConditionBlock, KnownVal.isFalse() ? NULL : BodyBlock); - } + // Link up the condition block with the code that follows the loop. (the + // false branch). + addSuccessor(ExitConditionBlock, KnownVal.isTrue() ? NULL : LoopSuccessor); - // Link up the condition block with the code that follows the loop. (the - // false branch). - addSuccessor(ExitConditionBlock, KnownVal.isTrue() ? NULL : LoopSuccessor); + } while (false); + + // Link up the loop-back block to the entry condition block. + addSuccessor(TransitionBlock, EntryConditionBlock); + + // The condition block is the implicit successor for any code above the loop. + Succ = EntryConditionBlock; // If the loop contains initialization, create a new block for those // statements. This block can also contain statements that precede the loop. @@ -2108,74 +2194,30 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { return 0; LoopSuccessor = Block; Block = 0; - } else + } else { LoopSuccessor = Succ; - - // Because of short-circuit evaluation, the condition of the loop can span - // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that - // evaluate the condition. - CFGBlock *ExitConditionBlock = createBlock(false); - CFGBlock *EntryConditionBlock = ExitConditionBlock; - - // Set the terminator for the "exit" condition block. - ExitConditionBlock->setTerminator(W); - - // Now add the actual condition to the condition block. Because the condition - // itself may contain control-flow, new blocks may be created. Thus we update - // "Succ" after adding the condition. - if (Stmt *C = W->getCond()) { - Block = ExitConditionBlock; - EntryConditionBlock = addStmt(C); - // The condition might finish the current 'Block'. - Block = EntryConditionBlock; - - // If this block contains a condition variable, add both the condition - // variable and initializer to the CFG. - if (VarDecl *VD = W->getConditionVariable()) { - if (Expr *Init = VD->getInit()) { - autoCreateBlock(); - appendStmt(Block, W->getConditionVariableDeclStmt()); - EntryConditionBlock = addStmt(Init); - assert(Block == EntryConditionBlock); - } - } - - if (Block) { - if (badCFG) - return 0; - } } - // The condition block is the implicit successor for the loop body as well as - // any code above the loop. - Succ = EntryConditionBlock; - - // See if this is a known constant. - const TryResult& KnownVal = tryEvaluateBool(W->getCond()); + CFGBlock *BodyBlock = 0, *TransitionBlock = 0; // Process the loop body. { assert(W->getBody()); - // Save the current values for Block, Succ, and continue and break targets + // Save the current values for Block, Succ, continue and break targets. SaveAndRestore save_Block(Block), save_Succ(Succ); SaveAndRestore save_continue(ContinueJumpTarget), - save_break(BreakJumpTarget); + save_break(BreakJumpTarget); // Create an empty block to represent the transition block for looping back // to the head of the loop. - Block = 0; - assert(Succ == EntryConditionBlock); - Succ = createBlock(); - Succ->setLoopTarget(W); + Succ = TransitionBlock = createBlock(false); + TransitionBlock->setLoopTarget(W); ContinueJumpTarget = JumpTarget(Succ, LoopBeginScopePos); // All breaks should go to the code following the loop. BreakJumpTarget = JumpTarget(LoopSuccessor, ScopePos); - // NULL out Block to force lazy instantiation of blocks for the body. - Block = NULL; - // Loop body should end with destructor of Condition variable (if any). addAutomaticObjDtors(ScopePos, LoopBeginScopePos, W); @@ -2185,22 +2227,69 @@ CFGBlock *CFGBuilder::VisitWhileStmt(WhileStmt *W) { addLocalScopeAndDtors(W->getBody()); // Create the body. The returned block is the entry to the loop body. - CFGBlock *BodyBlock = addStmt(W->getBody()); + BodyBlock = addStmt(W->getBody()); if (!BodyBlock) BodyBlock = ContinueJumpTarget.block; // can happen for "while(...) ;" - else if (Block) { - if (badCFG) - return 0; + else if (Block && badCFG) + return 0; + } + + // Because of short-circuit evaluation, the condition of the loop can span + // multiple basic blocks. Thus we need the "Entry" and "Exit" blocks that + // evaluate the condition. + CFGBlock *EntryConditionBlock = 0, *ExitConditionBlock = 0; + + do { + Expr *C = W->getCond(); + + // Specially handle logical operators, which have a slightly + // more optimal CFG representation. + if (BinaryOperator *Cond = dyn_cast(C->IgnoreParens())) + if (Cond->isLogicalOp()) { + llvm::tie(EntryConditionBlock, ExitConditionBlock) = + VisitLogicalOperator(Cond, W, BodyBlock, + LoopSuccessor); + break; + } + + // The default case when not handling logical operators. + EntryConditionBlock = ExitConditionBlock = createBlock(false); + ExitConditionBlock->setTerminator(W); + + // Now add the actual condition to the condition block. + // Because the condition itself may contain control-flow, new blocks may + // be created. Thus we update "Succ" after adding the condition. + Block = ExitConditionBlock; + Block = EntryConditionBlock = addStmt(C); + + // If this block contains a condition variable, add both the condition + // variable and initializer to the CFG. + if (VarDecl *VD = W->getConditionVariable()) { + if (Expr *Init = VD->getInit()) { + autoCreateBlock(); + appendStmt(Block, W->getConditionVariableDeclStmt()); + EntryConditionBlock = addStmt(Init); + assert(Block == EntryConditionBlock); + } } + if (Block && badCFG) + return 0; + + // See if this is a known constant. + const TryResult& KnownVal = tryEvaluateBool(C); + // Add the loop body entry as a successor to the condition. addSuccessor(ExitConditionBlock, KnownVal.isFalse() ? NULL : BodyBlock); - } + // Link up the condition block with the code that follows the loop. (the + // false branch). + addSuccessor(ExitConditionBlock, KnownVal.isTrue() ? NULL : LoopSuccessor); - // Link up the condition block with the code that follows the loop. (the - // false branch). - addSuccessor(ExitConditionBlock, KnownVal.isTrue() ? NULL : LoopSuccessor); + } while(false); + + // Link up the loop-back block to the entry condition block. + addSuccessor(TransitionBlock, EntryConditionBlock); // There can be no more statements in the condition block since we loop back // to this block. NULL out Block to force lazy creation of another block. @@ -3203,8 +3292,8 @@ CFGImplicitDtor::getDestructorDecl(ASTContext &astContext) const { } bool CFGImplicitDtor::isNoReturn(ASTContext &astContext) const { - if (const CXXDestructorDecl *cdecl = getDestructorDecl(astContext)) { - QualType ty = cdecl->getType(); + if (const CXXDestructorDecl *decl = getDestructorDecl(astContext)) { + QualType ty = decl->getType(); return cast(ty)->getNoReturnAttr(); } return false; @@ -3631,8 +3720,7 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper* Helper, const Type* T = VD->getType().getTypePtr(); if (const ReferenceType* RT = T->getAs()) T = RT->getPointeeType().getTypePtr(); - else if (const Type *ET = T->getArrayElementTypeNoTypeQual()) - T = ET; + T = T->getBaseElementTypeUnsafe(); OS << ".~" << T->getAsCXXRecordDecl()->getName().str() << "()"; OS << " (Implicit destructor)\n"; @@ -3644,11 +3732,7 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper* Helper, } else if (const CFGMemberDtor *ME = E.getAs()) { const FieldDecl *FD = ME->getFieldDecl(); - - const Type *T = FD->getType().getTypePtr(); - if (const Type *ET = T->getArrayElementTypeNoTypeQual()) - T = ET; - + const Type *T = FD->getType()->getBaseElementTypeUnsafe(); OS << "this->" << FD->getName(); OS << ".~" << T->getAsCXXRecordDecl()->getName() << "()"; OS << " (Member object destructor)\n"; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index ca2392b..d57e481 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LLVM_USED_LIBS clangBasic clangAST) - add_clang_library(clangAnalysis AnalysisDeclContext.cpp CallGraph.cpp @@ -20,5 +18,17 @@ add_clang_library(clangAnalysis UninitializedValues.cpp ) -add_dependencies(clangAnalysis ClangAttrClasses ClangAttrList - ClangDiagnosticAnalysis ClangDeclNodes ClangStmtNodes) +add_dependencies(clangAnalysis + ClangAttrClasses + ClangAttrList + ClangCommentNodes + ClangDiagnosticCommon + ClangDeclNodes + ClangDiagnosticAnalysis + ClangStmtNodes + ) + +target_link_libraries(clangAnalysis + clangBasic + clangAST + ) diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 96a16c3..6b75956 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -25,12 +25,11 @@ namespace { /// given function body. class CGBuilder : public StmtVisitor { CallGraph *G; - const Decl *FD; CallGraphNode *CallerNode; public: - CGBuilder(CallGraph *g, const Decl *D, CallGraphNode *N) - : G(g), FD(D), CallerNode(N) {} + CGBuilder(CallGraph *g, CallGraphNode *N) + : G(g), CallerNode(N) {} void VisitStmt(Stmt *S) { VisitChildren(S); } @@ -99,7 +98,7 @@ void CallGraph::addNodeForDecl(Decl* D, bool IsGlobal) { Root->addCallee(Node, this); // Process all the calls by this function as well. - CGBuilder builder(this, D, Node); + CGBuilder builder(this, Node); if (Stmt *Body = D->getBody()) builder.Visit(Body); } diff --git a/lib/Analysis/CocoaConventions.cpp b/lib/Analysis/CocoaConventions.cpp index 7e9e38f..ce973af 100644 --- a/lib/Analysis/CocoaConventions.cpp +++ b/lib/Analysis/CocoaConventions.cpp @@ -17,6 +17,8 @@ #include "clang/AST/DeclObjC.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" +#include + using namespace clang; using namespace ento; diff --git a/lib/Analysis/FormatString.cpp b/lib/Analysis/FormatString.cpp index ba45865..e7ea486 100644 --- a/lib/Analysis/FormatString.cpp +++ b/lib/Analysis/FormatString.cpp @@ -15,7 +15,7 @@ #include "FormatStringParsing.h" #include "clang/Basic/LangOptions.h" -using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::ArgType; using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::FormatSpecifier; using clang::analyze_format_string::LengthModifier; @@ -229,18 +229,34 @@ clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, } //===----------------------------------------------------------------------===// -// Methods on ArgTypeResult. +// Methods on ArgType. //===----------------------------------------------------------------------===// -bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { +bool ArgType::matchesType(ASTContext &C, QualType argTy) const { + if (Ptr) { + // It has to be a pointer. + const PointerType *PT = argTy->getAs(); + if (!PT) + return false; + + // We cannot write through a const qualified pointer. + if (PT->getPointeeType().isConstQualified()) + return false; + + argTy = PT->getPointeeType(); + } + switch (K) { case InvalidTy: - llvm_unreachable("ArgTypeResult must be valid"); + llvm_unreachable("ArgType must be valid"); case UnknownTy: return true; case AnyCharTy: { + if (const EnumType *ETy = argTy->getAs()) + argTy = ETy->getDecl()->getIntegerType(); + if (const BuiltinType *BT = argTy->getAs()) switch (BT->getKind()) { default: @@ -255,7 +271,10 @@ bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { } case SpecificTy: { + if (const EnumType *ETy = argTy->getAs()) + argTy = ETy->getDecl()->getIntegerType(); argTy = C.getCanonicalType(argTy).getUnqualifiedType(); + if (T == argTy) return true; // Check for "compatible types". @@ -265,10 +284,9 @@ bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { break; case BuiltinType::Char_S: case BuiltinType::SChar: - return T == C.UnsignedCharTy; case BuiltinType::Char_U: case BuiltinType::UChar: - return T == C.SignedCharTy; + return T == C.UnsignedCharTy || T == C.SignedCharTy; case BuiltinType::Short: return T == C.UnsignedShortTy; case BuiltinType::UShort: @@ -319,20 +337,21 @@ bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { } case WIntTy: { - // Instead of doing a lookup for the definition of 'wint_t' (which - // is defined by the system headers) instead see if wchar_t and - // the argument type promote to the same type. - QualType PromoWChar = - C.getWCharType()->isPromotableIntegerType() - ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType(); + QualType PromoArg = argTy->isPromotableIntegerType() ? C.getPromotedIntegerType(argTy) : argTy; - PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType(); + QualType WInt = C.getCanonicalType(C.getWIntType()).getUnqualifiedType(); PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType(); - return PromoWChar == PromoArg; + // If the promoted argument is the corresponding signed type of the + // wint_t type, then it should match. + if (PromoArg->hasSignedIntegerRepresentation() && + C.getCorrespondingUnsignedType(PromoArg) == WInt) + return true; + + return WInt == PromoArg; } case CPointerTy: @@ -358,40 +377,63 @@ bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const { } } - llvm_unreachable("Invalid ArgTypeResult Kind!"); + llvm_unreachable("Invalid ArgType Kind!"); } -QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const { +QualType ArgType::getRepresentativeType(ASTContext &C) const { + QualType Res; switch (K) { case InvalidTy: - llvm_unreachable("No representative type for Invalid ArgTypeResult"); + llvm_unreachable("No representative type for Invalid ArgType"); case UnknownTy: - return QualType(); + llvm_unreachable("No representative type for Unknown ArgType"); case AnyCharTy: - return C.CharTy; + Res = C.CharTy; + break; case SpecificTy: - return T; + Res = T; + break; case CStrTy: - return C.getPointerType(C.CharTy); + Res = C.getPointerType(C.CharTy); + break; case WCStrTy: - return C.getPointerType(C.getWCharType()); + Res = C.getPointerType(C.getWCharType()); + break; case ObjCPointerTy: - return C.ObjCBuiltinIdTy; + Res = C.ObjCBuiltinIdTy; + break; case CPointerTy: - return C.VoidPtrTy; + Res = C.VoidPtrTy; + break; case WIntTy: { - QualType WC = C.getWCharType(); - return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC; + Res = C.getWIntType(); + break; } } - llvm_unreachable("Invalid ArgTypeResult Kind!"); + if (Ptr) + Res = C.getPointerType(Res); + return Res; } -std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { +std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { std::string S = getRepresentativeType(C).getAsString(); - if (Name && S != Name) - return std::string("'") + Name + "' (aka '" + S + "')"; + + std::string Alias; + if (Name) { + // Use a specific name for this type, e.g. "size_t". + Alias = Name; + if (Ptr) { + // If ArgType is actually a pointer to T, append an asterisk. + Alias += (Alias[Alias.size()-1] == '*') ? "*" : " *"; + } + // If Alias is the same as the underlying type, e.g. wchar_t, then drop it. + if (S == Alias) + Alias.clear(); + } + + if (!Alias.empty()) + return std::string("'") + Alias + "' (aka '" + S + "')"; return std::string("'") + S + "'"; } @@ -400,7 +442,7 @@ std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const { // Methods on OptionalAmount. //===----------------------------------------------------------------------===// -ArgTypeResult +ArgType analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { return Ctx.IntTy; } @@ -676,3 +718,37 @@ bool FormatSpecifier::hasStandardLengthConversionCombination() const { } return true; } + +bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, + LengthModifier &LM) { + assert(isa(QT) && "Expected a TypedefType"); + const TypedefNameDecl *Typedef = cast(QT)->getDecl(); + + for (;;) { + const IdentifierInfo *Identifier = Typedef->getIdentifier(); + if (Identifier->getName() == "size_t") { + LM.setKind(LengthModifier::AsSizeT); + return true; + } else if (Identifier->getName() == "ssize_t") { + // Not C99, but common in Unix. + LM.setKind(LengthModifier::AsSizeT); + return true; + } else if (Identifier->getName() == "intmax_t") { + LM.setKind(LengthModifier::AsIntMax); + return true; + } else if (Identifier->getName() == "uintmax_t") { + LM.setKind(LengthModifier::AsIntMax); + return true; + } else if (Identifier->getName() == "ptrdiff_t") { + LM.setKind(LengthModifier::AsPtrDiff); + return true; + } + + QualType T = Typedef->getUnderlyingType(); + if (!isa(T)) + break; + + Typedef = cast(T)->getDecl(); + } + return false; +} diff --git a/lib/Analysis/LiveVariables.cpp b/lib/Analysis/LiveVariables.cpp index ff6607d..38f8199 100644 --- a/lib/Analysis/LiveVariables.cpp +++ b/lib/Analysis/LiveVariables.cpp @@ -284,6 +284,14 @@ void TransferFunctions::Visit(Stmt *S) { } break; } + case Stmt::ObjCMessageExprClass: { + // In calls to super, include the implicit "self" pointer as being live. + ObjCMessageExpr *CE = cast(S); + if (CE->getReceiverKind() == ObjCMessageExpr::SuperInstance) + val.liveDecls = LV.DSetFact.add(val.liveDecls, + LV.analysisContext.getSelfDecl()); + break; + } case Stmt::DeclStmtClass: { const DeclStmt *DS = cast(S); if (const VarDecl *VD = dyn_cast(DS->getSingleDecl())) { @@ -455,6 +463,12 @@ LiveVariablesImpl::runOnBlock(const CFGBlock *block, for (CFGBlock::const_reverse_iterator it = block->rbegin(), ei = block->rend(); it != ei; ++it) { const CFGElement &elem = *it; + + if (const CFGAutomaticObjDtor *Dtor = dyn_cast(&elem)){ + val.liveDecls = DSetFact.add(val.liveDecls, Dtor->getVarDecl()); + continue; + } + if (!isa(elem)) continue; @@ -486,6 +500,11 @@ LiveVariables::computeLiveness(AnalysisDeclContext &AC, if (!cfg) return 0; + // The analysis currently has scalability issues for very large CFGs. + // Bail out if it looks too large. + if (cfg->getNumBlockIDs() > 300000) + return 0; + LiveVariablesImpl *LV = new LiveVariablesImpl(AC, killAtAssign); // Construct the dataflow worklist. Enqueue the exit block as the diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp index e1049b3..9e4c0fe 100644 --- a/lib/Analysis/PrintfFormatString.cpp +++ b/lib/Analysis/PrintfFormatString.cpp @@ -15,7 +15,7 @@ #include "clang/Analysis/Analyses/FormatString.h" #include "FormatStringParsing.h" -using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::ArgType; using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::LengthModifier; using clang::analyze_format_string::OptionalAmount; @@ -241,20 +241,20 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H, // Methods on PrintfSpecifier. //===----------------------------------------------------------------------===// -ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx, - bool IsObjCLiteral) const { +ArgType PrintfSpecifier::getArgType(ASTContext &Ctx, + bool IsObjCLiteral) const { const PrintfConversionSpecifier &CS = getConversionSpecifier(); if (!CS.consumesDataArgument()) - return ArgTypeResult::Invalid(); + return ArgType::Invalid(); if (CS.getKind() == ConversionSpecifier::cArg) switch (LM.getKind()) { case LengthModifier::None: return Ctx.IntTy; case LengthModifier::AsLong: - return ArgTypeResult(ArgTypeResult::WIntTy, "wint_t"); + return ArgType(ArgType::WIntTy, "wint_t"); default: - return ArgTypeResult::Invalid(); + return ArgType::Invalid(); } if (CS.isIntArg()) @@ -263,22 +263,22 @@ ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx, // GNU extension. return Ctx.LongLongTy; case LengthModifier::None: return Ctx.IntTy; - case LengthModifier::AsChar: return ArgTypeResult::AnyCharTy; + case LengthModifier::AsChar: return ArgType::AnyCharTy; case LengthModifier::AsShort: return Ctx.ShortTy; case LengthModifier::AsLong: return Ctx.LongTy; case LengthModifier::AsLongLong: case LengthModifier::AsQuad: return Ctx.LongLongTy; case LengthModifier::AsIntMax: - return ArgTypeResult(Ctx.getIntMaxType(), "intmax_t"); + return ArgType(Ctx.getIntMaxType(), "intmax_t"); case LengthModifier::AsSizeT: // FIXME: How to get the corresponding signed version of size_t? - return ArgTypeResult(); + return ArgType(); case LengthModifier::AsPtrDiff: - return ArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t"); + return ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: - return ArgTypeResult::Invalid(); + return ArgType::Invalid(); } if (CS.isUIntArg()) @@ -294,16 +294,16 @@ ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx, case LengthModifier::AsQuad: return Ctx.UnsignedLongLongTy; case LengthModifier::AsIntMax: - return ArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t"); + return ArgType(Ctx.getUIntMaxType(), "uintmax_t"); case LengthModifier::AsSizeT: - return ArgTypeResult(Ctx.getSizeType(), "size_t"); + return ArgType(Ctx.getSizeType(), "size_t"); case LengthModifier::AsPtrDiff: // FIXME: How to get the corresponding unsigned // version of ptrdiff_t? - return ArgTypeResult(); + return ArgType(); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: - return ArgTypeResult::Invalid(); + return ArgType::Invalid(); } if (CS.isDoubleArg()) { @@ -312,37 +312,90 @@ ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx, return Ctx.DoubleTy; } + if (CS.getKind() == ConversionSpecifier::nArg) { + switch (LM.getKind()) { + case LengthModifier::None: + return ArgType::PtrTo(Ctx.IntTy); + case LengthModifier::AsChar: + return ArgType::PtrTo(Ctx.SignedCharTy); + case LengthModifier::AsShort: + return ArgType::PtrTo(Ctx.ShortTy); + case LengthModifier::AsLong: + return ArgType::PtrTo(Ctx.LongTy); + case LengthModifier::AsLongLong: + case LengthModifier::AsQuad: + return ArgType::PtrTo(Ctx.LongLongTy); + case LengthModifier::AsIntMax: + return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); + case LengthModifier::AsSizeT: + return ArgType(); // FIXME: ssize_t + case LengthModifier::AsPtrDiff: + return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); + case LengthModifier::AsLongDouble: + return ArgType(); // FIXME: Is this a known extension? + case LengthModifier::AsAllocate: + case LengthModifier::AsMAllocate: + return ArgType::Invalid(); + } + } + switch (CS.getKind()) { case ConversionSpecifier::sArg: if (LM.getKind() == LengthModifier::AsWideChar) { if (IsObjCLiteral) return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()); - return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *"); + return ArgType(ArgType::WCStrTy, "wchar_t *"); } - return ArgTypeResult::CStrTy; + return ArgType::CStrTy; case ConversionSpecifier::SArg: if (IsObjCLiteral) return Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()); - return ArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t *"); + return ArgType(ArgType::WCStrTy, "wchar_t *"); case ConversionSpecifier::CArg: if (IsObjCLiteral) return Ctx.UnsignedShortTy; - return ArgTypeResult(Ctx.WCharTy, "wchar_t"); + return ArgType(Ctx.WCharTy, "wchar_t"); case ConversionSpecifier::pArg: - return ArgTypeResult::CPointerTy; + return ArgType::CPointerTy; case ConversionSpecifier::ObjCObjArg: - return ArgTypeResult::ObjCPointerTy; + return ArgType::ObjCPointerTy; default: break; } // FIXME: Handle other cases. - return ArgTypeResult(); + return ArgType(); } bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, bool IsObjCLiteral) { - // Handle strings first (char *, wchar_t *) + // %n is different from other conversion specifiers; don't try to fix it. + if (CS.getKind() == ConversionSpecifier::nArg) + return false; + + // Handle Objective-C objects first. Note that while the '%@' specifier will + // not warn for structure pointer or void pointer arguments (because that's + // how CoreFoundation objects are implemented), we only show a fixit for '%@' + // if we know it's an object (block, id, class, or __attribute__((NSObject))). + if (QT->isObjCRetainableType()) { + if (!IsObjCLiteral) + return false; + + CS.setKind(ConversionSpecifier::ObjCObjArg); + + // Disable irrelevant flags + HasThousandsGrouping = false; + HasPlusPrefix = false; + HasSpacePrefix = false; + HasAlternativeForm = false; + HasLeadingZeroes = false; + Precision.setHowSpecified(OptionalAmount::NotSpecified); + LM.setKind(LengthModifier::None); + + return true; + } + + // Handle strings next (char *, wchar_t *) if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { CS.setKind(ConversionSpecifier::sArg); @@ -359,6 +412,10 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, return true; } + // If it's an enum, get its underlying type. + if (const EnumType *ETy = QT->getAs()) + QT = ETy->getDecl()->getIntegerType(); + // We can only work with builtin types. const BuiltinType *BT = QT->getAs(); if (!BT) @@ -421,24 +478,11 @@ bool PrintfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, } // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. - if (isa(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { - const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); - if (Identifier->getName() == "size_t") { - LM.setKind(LengthModifier::AsSizeT); - } else if (Identifier->getName() == "ssize_t") { - // Not C99, but common in Unix. - LM.setKind(LengthModifier::AsSizeT); - } else if (Identifier->getName() == "intmax_t") { - LM.setKind(LengthModifier::AsIntMax); - } else if (Identifier->getName() == "uintmax_t") { - LM.setKind(LengthModifier::AsIntMax); - } else if (Identifier->getName() == "ptrdiff_t") { - LM.setKind(LengthModifier::AsPtrDiff); - } - } + if (isa(QT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) + namedTypeToLengthModifier(QT, LM); // If fixing the length modifier was enough, we are done. - const analyze_printf::ArgTypeResult &ATR = getArgType(Ctx, IsObjCLiteral); + const analyze_printf::ArgType &ATR = getArgType(Ctx, IsObjCLiteral); if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT)) return true; diff --git a/lib/Analysis/ProgramPoint.cpp b/lib/Analysis/ProgramPoint.cpp index 3f711b4..7d67e8a 100644 --- a/lib/Analysis/ProgramPoint.cpp +++ b/lib/Analysis/ProgramPoint.cpp @@ -36,8 +36,10 @@ ProgramPoint ProgramPoint::getProgramPoint(const Stmt *S, ProgramPoint::Kind K, return PreStore(S, LC, tag); case ProgramPoint::PostLValueKind: return PostLValue(S, LC, tag); - case ProgramPoint::PostPurgeDeadSymbolsKind: - return PostPurgeDeadSymbols(S, LC, tag); + case ProgramPoint::PostStmtPurgeDeadSymbolsKind: + return PostStmtPurgeDeadSymbols(S, LC, tag); + case ProgramPoint::PreStmtPurgeDeadSymbolsKind: + return PreStmtPurgeDeadSymbols(S, LC, tag); } } diff --git a/lib/Analysis/PseudoConstantAnalysis.cpp b/lib/Analysis/PseudoConstantAnalysis.cpp index c8b491a..5d659ce 100644 --- a/lib/Analysis/PseudoConstantAnalysis.cpp +++ b/lib/Analysis/PseudoConstantAnalysis.cpp @@ -17,6 +17,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/Stmt.h" +#include "llvm/ADT/SmallPtrSet.h" #include using namespace clang; diff --git a/lib/Analysis/ScanfFormatString.cpp b/lib/Analysis/ScanfFormatString.cpp index 6bc4adb..2942400 100644 --- a/lib/Analysis/ScanfFormatString.cpp +++ b/lib/Analysis/ScanfFormatString.cpp @@ -15,12 +15,11 @@ #include "clang/Analysis/Analyses/FormatString.h" #include "FormatStringParsing.h" -using clang::analyze_format_string::ArgTypeResult; +using clang::analyze_format_string::ArgType; using clang::analyze_format_string::FormatStringHandler; using clang::analyze_format_string::LengthModifier; using clang::analyze_format_string::OptionalAmount; using clang::analyze_format_string::ConversionSpecifier; -using clang::analyze_scanf::ScanfArgTypeResult; using clang::analyze_scanf::ScanfConversionSpecifier; using clang::analyze_scanf::ScanfSpecifier; using clang::UpdateOnReturn; @@ -194,37 +193,42 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, return ScanfSpecifierResult(Start, FS); } -ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { +ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { const ScanfConversionSpecifier &CS = getConversionSpecifier(); if (!CS.consumesDataArgument()) - return ScanfArgTypeResult::Invalid(); + return ArgType::Invalid(); switch(CS.getKind()) { // Signed int. case ConversionSpecifier::dArg: case ConversionSpecifier::iArg: switch (LM.getKind()) { - case LengthModifier::None: return ArgTypeResult(Ctx.IntTy); + case LengthModifier::None: + return ArgType::PtrTo(Ctx.IntTy); case LengthModifier::AsChar: - return ArgTypeResult(ArgTypeResult::AnyCharTy); - case LengthModifier::AsShort: return ArgTypeResult(Ctx.ShortTy); - case LengthModifier::AsLong: return ArgTypeResult(Ctx.LongTy); + return ArgType::PtrTo(ArgType::AnyCharTy); + case LengthModifier::AsShort: + return ArgType::PtrTo(Ctx.ShortTy); + case LengthModifier::AsLong: + return ArgType::PtrTo(Ctx.LongTy); case LengthModifier::AsLongLong: case LengthModifier::AsQuad: - return ArgTypeResult(Ctx.LongLongTy); + return ArgType::PtrTo(Ctx.LongLongTy); case LengthModifier::AsIntMax: - return ScanfArgTypeResult(Ctx.getIntMaxType(), "intmax_t *"); + return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); case LengthModifier::AsSizeT: // FIXME: ssize_t. - return ScanfArgTypeResult(); + return ArgType(); case LengthModifier::AsPtrDiff: - return ScanfArgTypeResult(Ctx.getPointerDiffType(), "ptrdiff_t *"); + return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); case LengthModifier::AsLongDouble: // GNU extension. - return ArgTypeResult(Ctx.LongLongTy); - case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); - case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); + return ArgType::PtrTo(Ctx.LongLongTy); + case LengthModifier::AsAllocate: + return ArgType::Invalid(); + case LengthModifier::AsMAllocate: + return ArgType::Invalid(); } // Unsigned int. @@ -233,25 +237,31 @@ ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { case ConversionSpecifier::xArg: case ConversionSpecifier::XArg: switch (LM.getKind()) { - case LengthModifier::None: return ArgTypeResult(Ctx.UnsignedIntTy); - case LengthModifier::AsChar: return ArgTypeResult(Ctx.UnsignedCharTy); - case LengthModifier::AsShort: return ArgTypeResult(Ctx.UnsignedShortTy); - case LengthModifier::AsLong: return ArgTypeResult(Ctx.UnsignedLongTy); + case LengthModifier::None: + return ArgType::PtrTo(Ctx.UnsignedIntTy); + case LengthModifier::AsChar: + return ArgType::PtrTo(Ctx.UnsignedCharTy); + case LengthModifier::AsShort: + return ArgType::PtrTo(Ctx.UnsignedShortTy); + case LengthModifier::AsLong: + return ArgType::PtrTo(Ctx.UnsignedLongTy); case LengthModifier::AsLongLong: case LengthModifier::AsQuad: - return ArgTypeResult(Ctx.UnsignedLongLongTy); + return ArgType::PtrTo(Ctx.UnsignedLongLongTy); case LengthModifier::AsIntMax: - return ScanfArgTypeResult(Ctx.getUIntMaxType(), "uintmax_t *"); + return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); case LengthModifier::AsSizeT: - return ScanfArgTypeResult(Ctx.getSizeType(), "size_t *"); + return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); case LengthModifier::AsPtrDiff: // FIXME: Unsigned version of ptrdiff_t? - return ScanfArgTypeResult(); + return ArgType(); case LengthModifier::AsLongDouble: // GNU extension. - return ArgTypeResult(Ctx.UnsignedLongLongTy); - case LengthModifier::AsAllocate: return ScanfArgTypeResult::Invalid(); - case LengthModifier::AsMAllocate: return ScanfArgTypeResult::Invalid(); + return ArgType::PtrTo(Ctx.UnsignedLongLongTy); + case LengthModifier::AsAllocate: + return ArgType::Invalid(); + case LengthModifier::AsMAllocate: + return ArgType::Invalid(); } // Float. @@ -264,12 +274,14 @@ ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { case ConversionSpecifier::gArg: case ConversionSpecifier::GArg: switch (LM.getKind()) { - case LengthModifier::None: return ArgTypeResult(Ctx.FloatTy); - case LengthModifier::AsLong: return ArgTypeResult(Ctx.DoubleTy); + case LengthModifier::None: + return ArgType::PtrTo(Ctx.FloatTy); + case LengthModifier::AsLong: + return ArgType::PtrTo(Ctx.DoubleTy); case LengthModifier::AsLongDouble: - return ArgTypeResult(Ctx.LongDoubleTy); + return ArgType::PtrTo(Ctx.LongDoubleTy); default: - return ScanfArgTypeResult::Invalid(); + return ArgType::Invalid(); } // Char, string and scanlist. @@ -277,37 +289,65 @@ ScanfArgTypeResult ScanfSpecifier::getArgType(ASTContext &Ctx) const { case ConversionSpecifier::sArg: case ConversionSpecifier::ScanListArg: switch (LM.getKind()) { - case LengthModifier::None: return ScanfArgTypeResult::CStrTy; + case LengthModifier::None: + return ArgType::PtrTo(ArgType::AnyCharTy); case LengthModifier::AsLong: - return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); + return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t")); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: - return ScanfArgTypeResult(ArgTypeResult::CStrTy); + return ArgType::PtrTo(ArgType::CStrTy); default: - return ScanfArgTypeResult::Invalid(); + return ArgType::Invalid(); } case ConversionSpecifier::CArg: case ConversionSpecifier::SArg: // FIXME: Mac OS X specific? switch (LM.getKind()) { case LengthModifier::None: - return ScanfArgTypeResult(ScanfArgTypeResult::WCStrTy, "wchar_t *"); + return ArgType::PtrTo(ArgType(Ctx.getWCharType(), "wchar_t")); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: - return ScanfArgTypeResult(ArgTypeResult::WCStrTy, "wchar_t **"); + return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); default: - return ScanfArgTypeResult::Invalid(); + return ArgType::Invalid(); } // Pointer. case ConversionSpecifier::pArg: - return ScanfArgTypeResult(ArgTypeResult(ArgTypeResult::CPointerTy)); + return ArgType::PtrTo(ArgType::CPointerTy); + + // Write-back. + case ConversionSpecifier::nArg: + switch (LM.getKind()) { + case LengthModifier::None: + return ArgType::PtrTo(Ctx.IntTy); + case LengthModifier::AsChar: + return ArgType::PtrTo(Ctx.SignedCharTy); + case LengthModifier::AsShort: + return ArgType::PtrTo(Ctx.ShortTy); + case LengthModifier::AsLong: + return ArgType::PtrTo(Ctx.LongTy); + case LengthModifier::AsLongLong: + case LengthModifier::AsQuad: + return ArgType::PtrTo(Ctx.LongLongTy); + case LengthModifier::AsIntMax: + return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); + case LengthModifier::AsSizeT: + return ArgType(); // FIXME: ssize_t + case LengthModifier::AsPtrDiff: + return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); + case LengthModifier::AsLongDouble: + return ArgType(); // FIXME: Is this a known extension? + case LengthModifier::AsAllocate: + case LengthModifier::AsMAllocate: + return ArgType::Invalid(); + } default: break; } - return ScanfArgTypeResult(); + return ArgType(); } bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, @@ -315,7 +355,16 @@ bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, if (!QT->isPointerType()) return false; + // %n is different from other conversion specifiers; don't try to fix it. + if (CS.getKind() == ConversionSpecifier::nArg) + return false; + QualType PT = QT->getPointeeType(); + + // If it's an enum, get its underlying type. + if (const EnumType *ETy = QT->getAs()) + QT = ETy->getDecl()->getIntegerType(); + const BuiltinType *BT = PT->getAs(); if (!BT) return false; @@ -377,25 +426,12 @@ bool ScanfSpecifier::fixType(QualType QT, const LangOptions &LangOpt, } // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. - if (isa(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) { - const IdentifierInfo *Identifier = QT.getBaseTypeIdentifier(); - if (Identifier->getName() == "size_t") { - LM.setKind(LengthModifier::AsSizeT); - } else if (Identifier->getName() == "ssize_t") { - // Not C99, but common in Unix. - LM.setKind(LengthModifier::AsSizeT); - } else if (Identifier->getName() == "intmax_t") { - LM.setKind(LengthModifier::AsIntMax); - } else if (Identifier->getName() == "uintmax_t") { - LM.setKind(LengthModifier::AsIntMax); - } else if (Identifier->getName() == "ptrdiff_t") { - LM.setKind(LengthModifier::AsPtrDiff); - } - } + if (isa(PT) && (LangOpt.C99 || LangOpt.CPlusPlus0x)) + namedTypeToLengthModifier(PT, LM); // If fixing the length modifier was enough, we are done. - const analyze_scanf::ScanfArgTypeResult &ATR = getArgType(Ctx); - if (hasValidLengthModifier() && ATR.isValid() && ATR.matchesType(Ctx, QT)) + const analyze_scanf::ArgType &AT = getArgType(Ctx); + if (hasValidLengthModifier() && AT.isValid() && AT.matchesType(Ctx, QT)) return true; // Figure out the conversion specifier. @@ -452,48 +488,3 @@ bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, assert(I == E && "Format string not exhausted"); return false; } - -bool ScanfArgTypeResult::matchesType(ASTContext& C, QualType argTy) const { - switch (K) { - case InvalidTy: - llvm_unreachable("ArgTypeResult must be valid"); - case UnknownTy: - return true; - case CStrTy: - return ArgTypeResult(ArgTypeResult::CStrTy).matchesType(C, argTy); - case WCStrTy: - return ArgTypeResult(ArgTypeResult::WCStrTy).matchesType(C, argTy); - case PtrToArgTypeResultTy: { - const PointerType *PT = argTy->getAs(); - if (!PT) - return false; - return A.matchesType(C, PT->getPointeeType()); - } - } - - llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); -} - -QualType ScanfArgTypeResult::getRepresentativeType(ASTContext &C) const { - switch (K) { - case InvalidTy: - llvm_unreachable("No representative type for Invalid ArgTypeResult"); - case UnknownTy: - return QualType(); - case CStrTy: - return C.getPointerType(C.CharTy); - case WCStrTy: - return C.getPointerType(C.getWCharType()); - case PtrToArgTypeResultTy: - return C.getPointerType(A.getRepresentativeType(C)); - } - - llvm_unreachable("Invalid ScanfArgTypeResult Kind!"); -} - -std::string ScanfArgTypeResult::getRepresentativeTypeName(ASTContext& C) const { - std::string S = getRepresentativeType(C).getAsString(); - if (!Name) - return std::string("'") + S + "'"; - return std::string("'") + Name + "' (aka '" + S + "')"; -} diff --git a/lib/Analysis/ThreadSafety.cpp b/lib/Analysis/ThreadSafety.cpp index 2f7e794..5954682 100644 --- a/lib/Analysis/ThreadSafety.cpp +++ b/lib/Analysis/ThreadSafety.cpp @@ -26,6 +26,7 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/OperatorKinds.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableMap.h" @@ -45,8 +46,15 @@ ThreadSafetyHandler::~ThreadSafetyHandler() {} namespace { -/// \brief A MutexID object uniquely identifies a particular mutex, and -/// is built from an Expr* (i.e. calling a lock function). +/// SExpr implements a simple expression language that is used to store, +/// compare, and pretty-print C++ expressions. Unlike a clang Expr, a SExpr +/// does not capture surface syntax, and it does not distinguish between +/// C++ concepts, like pointers and references, that have no real semantic +/// differences. This simplicity allows SExprs to be meaningfully compared, +/// e.g. +/// (x) = x +/// (*this).foo = this->foo +/// *&a = a /// /// Thread-safety analysis works by comparing lock expressions. Within the /// body of a function, an expression such as "x->foo->bar.mu" will resolve to @@ -59,41 +67,194 @@ namespace { /// /// The current implementation assumes, but does not verify, that multiple uses /// of the same lock expression satisfies these criteria. -/// -/// Clang introduces an additional wrinkle, which is that it is difficult to -/// derive canonical expressions, or compare expressions directly for equality. -/// Thus, we identify a mutex not by an Expr, but by the list of named -/// declarations that are referenced by the Expr. In other words, -/// x->foo->bar.mu will be a four element vector with the Decls for -/// mu, bar, and foo, and x. The vector will uniquely identify the expression -/// for all practical purposes. Null is used to denote 'this'. -/// -/// Note we will need to perform substitution on "this" and function parameter -/// names when constructing a lock expression. -/// -/// For example: -/// class C { Mutex Mu; void lock() EXCLUSIVE_LOCK_FUNCTION(this->Mu); }; -/// void myFunc(C *X) { ... X->lock() ... } -/// The original expression for the mutex acquired by myFunc is "this->Mu", but -/// "X" is substituted for "this" so we get X->Mu(); -/// -/// For another example: -/// foo(MyList *L) EXCLUSIVE_LOCKS_REQUIRED(L->Mu) { ... } -/// MyList *MyL; -/// foo(MyL); // requires lock MyL->Mu to be held -class MutexID { - SmallVector DeclSeq; - - /// Build a Decl sequence representing the lock from the given expression. +class SExpr { +private: + enum ExprOp { + EOP_Nop, //< No-op + EOP_Wildcard, //< Matches anything. + EOP_This, //< This keyword. + EOP_NVar, //< Named variable. + EOP_LVar, //< Local variable. + EOP_Dot, //< Field access + EOP_Call, //< Function call + EOP_MCall, //< Method call + EOP_Index, //< Array index + EOP_Unary, //< Unary operation + EOP_Binary, //< Binary operation + EOP_Unknown //< Catchall for everything else + }; + + + class SExprNode { + private: + unsigned char Op; //< Opcode of the root node + unsigned char Flags; //< Additional opcode-specific data + unsigned short Sz; //< Number of child nodes + const void* Data; //< Additional opcode-specific data + + public: + SExprNode(ExprOp O, unsigned F, const void* D) + : Op(static_cast(O)), + Flags(static_cast(F)), Sz(1), Data(D) + { } + + unsigned size() const { return Sz; } + void setSize(unsigned S) { Sz = S; } + + ExprOp kind() const { return static_cast(Op); } + + const NamedDecl* getNamedDecl() const { + assert(Op == EOP_NVar || Op == EOP_LVar || Op == EOP_Dot); + return reinterpret_cast(Data); + } + + const NamedDecl* getFunctionDecl() const { + assert(Op == EOP_Call || Op == EOP_MCall); + return reinterpret_cast(Data); + } + + bool isArrow() const { return Op == EOP_Dot && Flags == 1; } + void setArrow(bool A) { Flags = A ? 1 : 0; } + + unsigned arity() const { + switch (Op) { + case EOP_Nop: return 0; + case EOP_Wildcard: return 0; + case EOP_NVar: return 0; + case EOP_LVar: return 0; + case EOP_This: return 0; + case EOP_Dot: return 1; + case EOP_Call: return Flags+1; // First arg is function. + case EOP_MCall: return Flags+1; // First arg is implicit obj. + case EOP_Index: return 2; + case EOP_Unary: return 1; + case EOP_Binary: return 2; + case EOP_Unknown: return Flags; + } + return 0; + } + + bool operator==(const SExprNode& Other) const { + // Ignore flags and size -- they don't matter. + return (Op == Other.Op && + Data == Other.Data); + } + + bool operator!=(const SExprNode& Other) const { + return !(*this == Other); + } + + bool matches(const SExprNode& Other) const { + return (*this == Other) || + (Op == EOP_Wildcard) || + (Other.Op == EOP_Wildcard); + } + }; + + + /// \brief Encapsulates the lexical context of a function call. The lexical + /// context includes the arguments to the call, including the implicit object + /// argument. When an attribute containing a mutex expression is attached to + /// a method, the expression may refer to formal parameters of the method. + /// Actual arguments must be substituted for formal parameters to derive + /// the appropriate mutex expression in the lexical context where the function + /// is called. PrevCtx holds the context in which the arguments themselves + /// should be evaluated; multiple calling contexts can be chained together + /// by the lock_returned attribute. + struct CallingContext { + const NamedDecl* AttrDecl; // The decl to which the attribute is attached. + Expr* SelfArg; // Implicit object argument -- e.g. 'this' + bool SelfArrow; // is Self referred to with -> or .? + unsigned NumArgs; // Number of funArgs + Expr** FunArgs; // Function arguments + CallingContext* PrevCtx; // The previous context; or 0 if none. + + CallingContext(const NamedDecl *D = 0, Expr *S = 0, + unsigned N = 0, Expr **A = 0, CallingContext *P = 0) + : AttrDecl(D), SelfArg(S), SelfArrow(false), + NumArgs(N), FunArgs(A), PrevCtx(P) + { } + }; + + typedef SmallVector NodeVector; + +private: + // A SExpr is a list of SExprNodes in prefix order. The Size field allows + // the list to be traversed as a tree. + NodeVector NodeVec; + +private: + unsigned makeNop() { + NodeVec.push_back(SExprNode(EOP_Nop, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeWildcard() { + NodeVec.push_back(SExprNode(EOP_Wildcard, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeNamedVar(const NamedDecl *D) { + NodeVec.push_back(SExprNode(EOP_NVar, 0, D)); + return NodeVec.size()-1; + } + + unsigned makeLocalVar(const NamedDecl *D) { + NodeVec.push_back(SExprNode(EOP_LVar, 0, D)); + return NodeVec.size()-1; + } + + unsigned makeThis() { + NodeVec.push_back(SExprNode(EOP_This, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeDot(const NamedDecl *D, bool Arrow) { + NodeVec.push_back(SExprNode(EOP_Dot, Arrow ? 1 : 0, D)); + return NodeVec.size()-1; + } + + unsigned makeCall(unsigned NumArgs, const NamedDecl *D) { + NodeVec.push_back(SExprNode(EOP_Call, NumArgs, D)); + return NodeVec.size()-1; + } + + unsigned makeMCall(unsigned NumArgs, const NamedDecl *D) { + NodeVec.push_back(SExprNode(EOP_MCall, NumArgs, D)); + return NodeVec.size()-1; + } + + unsigned makeIndex() { + NodeVec.push_back(SExprNode(EOP_Index, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeUnary() { + NodeVec.push_back(SExprNode(EOP_Unary, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeBinary() { + NodeVec.push_back(SExprNode(EOP_Binary, 0, 0)); + return NodeVec.size()-1; + } + + unsigned makeUnknown(unsigned Arity) { + NodeVec.push_back(SExprNode(EOP_Unknown, Arity, 0)); + return NodeVec.size()-1; + } + + /// Build an SExpr from the given C++ expression. /// Recursive function that terminates on DeclRefExpr. - /// Note: this function merely creates a MutexID; it does not check to + /// Note: this function merely creates a SExpr; it does not check to /// ensure that the original expression is a valid mutex expression. - void buildMutexID(Expr *Exp, const NamedDecl *D, Expr *Parent, - unsigned NumArgs, Expr **FunArgs) { - if (!Exp) { - DeclSeq.clear(); - return; - } + /// + /// NDeref returns the number of Derefence and AddressOf operations + /// preceeding the Expr; this is used to decide whether to pretty-print + /// SExprs with . or ->. + unsigned buildSExpr(Expr *Exp, CallingContext* CallCtx, int* NDeref = 0) { + if (!Exp) + return 0; if (DeclRefExpr *DRE = dyn_cast(Exp)) { NamedDecl *ND = cast(DRE->getDecl()->getCanonicalDecl()); @@ -103,144 +264,246 @@ class MutexID { cast(PV->getDeclContext())->getCanonicalDecl(); unsigned i = PV->getFunctionScopeIndex(); - if (FunArgs && FD == D->getCanonicalDecl()) { + if (CallCtx && CallCtx->FunArgs && + FD == CallCtx->AttrDecl->getCanonicalDecl()) { // Substitute call arguments for references to function parameters - assert(i < NumArgs); - buildMutexID(FunArgs[i], D, 0, 0, 0); - return; + assert(i < CallCtx->NumArgs); + return buildSExpr(CallCtx->FunArgs[i], CallCtx->PrevCtx, NDeref); } // Map the param back to the param of the original function declaration. - DeclSeq.push_back(FD->getParamDecl(i)); - return; + makeNamedVar(FD->getParamDecl(i)); + return 1; } // Not a function parameter -- just store the reference. - DeclSeq.push_back(ND); - } else if (MemberExpr *ME = dyn_cast(Exp)) { - NamedDecl *ND = ME->getMemberDecl(); - DeclSeq.push_back(ND); - buildMutexID(ME->getBase(), D, Parent, NumArgs, FunArgs); + makeNamedVar(ND); + return 1; } else if (isa(Exp)) { - if (Parent) - buildMutexID(Parent, D, 0, 0, 0); + // Substitute parent for 'this' + if (CallCtx && CallCtx->SelfArg) { + if (!CallCtx->SelfArrow && NDeref) + // 'this' is a pointer, but self is not, so need to take address. + --(*NDeref); + return buildSExpr(CallCtx->SelfArg, CallCtx->PrevCtx, NDeref); + } else { - DeclSeq.push_back(0); // Use 0 to represent 'this'. - return; // mutexID is still valid in this case + makeThis(); + return 1; } + } else if (MemberExpr *ME = dyn_cast(Exp)) { + NamedDecl *ND = ME->getMemberDecl(); + int ImplicitDeref = ME->isArrow() ? 1 : 0; + unsigned Root = makeDot(ND, false); + unsigned Sz = buildSExpr(ME->getBase(), CallCtx, &ImplicitDeref); + NodeVec[Root].setArrow(ImplicitDeref > 0); + NodeVec[Root].setSize(Sz + 1); + return Sz + 1; } else if (CXXMemberCallExpr *CMCE = dyn_cast(Exp)) { - DeclSeq.push_back(CMCE->getMethodDecl()->getCanonicalDecl()); - buildMutexID(CMCE->getImplicitObjectArgument(), - D, Parent, NumArgs, FunArgs); + // When calling a function with a lock_returned attribute, replace + // the function call with the expression in lock_returned. + if (LockReturnedAttr* At = + CMCE->getMethodDecl()->getAttr()) { + CallingContext LRCallCtx(CMCE->getMethodDecl()); + LRCallCtx.SelfArg = CMCE->getImplicitObjectArgument(); + LRCallCtx.SelfArrow = + dyn_cast(CMCE->getCallee())->isArrow(); + LRCallCtx.NumArgs = CMCE->getNumArgs(); + LRCallCtx.FunArgs = CMCE->getArgs(); + LRCallCtx.PrevCtx = CallCtx; + return buildSExpr(At->getArg(), &LRCallCtx); + } + // Hack to treat smart pointers and iterators as pointers; + // ignore any method named get(). + if (CMCE->getMethodDecl()->getNameAsString() == "get" && + CMCE->getNumArgs() == 0) { + if (NDeref && dyn_cast(CMCE->getCallee())->isArrow()) + ++(*NDeref); + return buildSExpr(CMCE->getImplicitObjectArgument(), CallCtx, NDeref); + } unsigned NumCallArgs = CMCE->getNumArgs(); + unsigned Root = + makeMCall(NumCallArgs, CMCE->getMethodDecl()->getCanonicalDecl()); + unsigned Sz = buildSExpr(CMCE->getImplicitObjectArgument(), CallCtx); Expr** CallArgs = CMCE->getArgs(); for (unsigned i = 0; i < NumCallArgs; ++i) { - buildMutexID(CallArgs[i], D, Parent, NumArgs, FunArgs); + Sz += buildSExpr(CallArgs[i], CallCtx); } + NodeVec[Root].setSize(Sz + 1); + return Sz + 1; } else if (CallExpr *CE = dyn_cast(Exp)) { - buildMutexID(CE->getCallee(), D, Parent, NumArgs, FunArgs); + if (LockReturnedAttr* At = + CE->getDirectCallee()->getAttr()) { + CallingContext LRCallCtx(CE->getDirectCallee()); + LRCallCtx.NumArgs = CE->getNumArgs(); + LRCallCtx.FunArgs = CE->getArgs(); + LRCallCtx.PrevCtx = CallCtx; + return buildSExpr(At->getArg(), &LRCallCtx); + } + // Treat smart pointers and iterators as pointers; + // ignore the * and -> operators. + if (CXXOperatorCallExpr *OE = dyn_cast(CE)) { + OverloadedOperatorKind k = OE->getOperator(); + if (k == OO_Star) { + if (NDeref) ++(*NDeref); + return buildSExpr(OE->getArg(0), CallCtx, NDeref); + } + else if (k == OO_Arrow) { + return buildSExpr(OE->getArg(0), CallCtx, NDeref); + } + } unsigned NumCallArgs = CE->getNumArgs(); + unsigned Root = makeCall(NumCallArgs, 0); + unsigned Sz = buildSExpr(CE->getCallee(), CallCtx); Expr** CallArgs = CE->getArgs(); for (unsigned i = 0; i < NumCallArgs; ++i) { - buildMutexID(CallArgs[i], D, Parent, NumArgs, FunArgs); + Sz += buildSExpr(CallArgs[i], CallCtx); } + NodeVec[Root].setSize(Sz+1); + return Sz+1; } else if (BinaryOperator *BOE = dyn_cast(Exp)) { - buildMutexID(BOE->getLHS(), D, Parent, NumArgs, FunArgs); - buildMutexID(BOE->getRHS(), D, Parent, NumArgs, FunArgs); + unsigned Root = makeBinary(); + unsigned Sz = buildSExpr(BOE->getLHS(), CallCtx); + Sz += buildSExpr(BOE->getRHS(), CallCtx); + NodeVec[Root].setSize(Sz); + return Sz; } else if (UnaryOperator *UOE = dyn_cast(Exp)) { - buildMutexID(UOE->getSubExpr(), D, Parent, NumArgs, FunArgs); + // Ignore & and * operators -- they're no-ops. + // However, we try to figure out whether the expression is a pointer, + // so we can use . and -> appropriately in error messages. + if (UOE->getOpcode() == UO_Deref) { + if (NDeref) ++(*NDeref); + return buildSExpr(UOE->getSubExpr(), CallCtx, NDeref); + } + if (UOE->getOpcode() == UO_AddrOf) { + if (DeclRefExpr* DRE = dyn_cast(UOE->getSubExpr())) { + if (DRE->getDecl()->isCXXInstanceMember()) { + // This is a pointer-to-member expression, e.g. &MyClass::mu_. + // We interpret this syntax specially, as a wildcard. + unsigned Root = makeDot(DRE->getDecl(), false); + makeWildcard(); + NodeVec[Root].setSize(2); + return 2; + } + } + if (NDeref) --(*NDeref); + return buildSExpr(UOE->getSubExpr(), CallCtx, NDeref); + } + unsigned Root = makeUnary(); + unsigned Sz = buildSExpr(UOE->getSubExpr(), CallCtx); + NodeVec[Root].setSize(Sz); + return Sz; } else if (ArraySubscriptExpr *ASE = dyn_cast(Exp)) { - buildMutexID(ASE->getBase(), D, Parent, NumArgs, FunArgs); - buildMutexID(ASE->getIdx(), D, Parent, NumArgs, FunArgs); + unsigned Root = makeIndex(); + unsigned Sz = buildSExpr(ASE->getBase(), CallCtx); + Sz += buildSExpr(ASE->getIdx(), CallCtx); + NodeVec[Root].setSize(Sz); + return Sz; } else if (AbstractConditionalOperator *CE = - dyn_cast(Exp)) { - buildMutexID(CE->getCond(), D, Parent, NumArgs, FunArgs); - buildMutexID(CE->getTrueExpr(), D, Parent, NumArgs, FunArgs); - buildMutexID(CE->getFalseExpr(), D, Parent, NumArgs, FunArgs); + dyn_cast(Exp)) { + unsigned Root = makeUnknown(3); + unsigned Sz = buildSExpr(CE->getCond(), CallCtx); + Sz += buildSExpr(CE->getTrueExpr(), CallCtx); + Sz += buildSExpr(CE->getFalseExpr(), CallCtx); + NodeVec[Root].setSize(Sz); + return Sz; } else if (ChooseExpr *CE = dyn_cast(Exp)) { - buildMutexID(CE->getCond(), D, Parent, NumArgs, FunArgs); - buildMutexID(CE->getLHS(), D, Parent, NumArgs, FunArgs); - buildMutexID(CE->getRHS(), D, Parent, NumArgs, FunArgs); + unsigned Root = makeUnknown(3); + unsigned Sz = buildSExpr(CE->getCond(), CallCtx); + Sz += buildSExpr(CE->getLHS(), CallCtx); + Sz += buildSExpr(CE->getRHS(), CallCtx); + NodeVec[Root].setSize(Sz); + return Sz; } else if (CastExpr *CE = dyn_cast(Exp)) { - buildMutexID(CE->getSubExpr(), D, Parent, NumArgs, FunArgs); + return buildSExpr(CE->getSubExpr(), CallCtx, NDeref); } else if (ParenExpr *PE = dyn_cast(Exp)) { - buildMutexID(PE->getSubExpr(), D, Parent, NumArgs, FunArgs); + return buildSExpr(PE->getSubExpr(), CallCtx, NDeref); + } else if (ExprWithCleanups *EWC = dyn_cast(Exp)) { + return buildSExpr(EWC->getSubExpr(), CallCtx, NDeref); + } else if (CXXBindTemporaryExpr *E = dyn_cast(Exp)) { + return buildSExpr(E->getSubExpr(), CallCtx, NDeref); } else if (isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp) || - isa(Exp)) { - return; // FIXME: Ignore literals for now + isa(Exp) || + isa(Exp) || + isa(Exp) || + isa(Exp) || + isa(Exp) || + isa(Exp) || + isa(Exp) || + isa(Exp)) { + makeNop(); + return 1; // FIXME: Ignore literals for now } else { - // Ignore. FIXME: mark as invalid expression? + makeNop(); + return 1; // Ignore. FIXME: mark as invalid expression? } } - /// \brief Construct a MutexID from an expression. + /// \brief Construct a SExpr from an expression. /// \param MutexExp The original mutex expression within an attribute /// \param DeclExp An expression involving the Decl on which the attribute /// occurs. /// \param D The declaration to which the lock/unlock attribute is attached. - void buildMutexIDFromExp(Expr *MutexExp, Expr *DeclExp, const NamedDecl *D) { - Expr *Parent = 0; - unsigned NumArgs = 0; - Expr **FunArgs = 0; + void buildSExprFromExpr(Expr *MutexExp, Expr *DeclExp, const NamedDecl *D) { + CallingContext CallCtx(D); // If we are processing a raw attribute expression, with no substitutions. if (DeclExp == 0) { - buildMutexID(MutexExp, D, 0, 0, 0); + buildSExpr(MutexExp, 0); return; } - // Examine DeclExp to find Parent and FunArgs, which are used to substitute + // Examine DeclExp to find SelfArg and FunArgs, which are used to substitute // for formal parameters when we call buildMutexID later. if (MemberExpr *ME = dyn_cast(DeclExp)) { - Parent = ME->getBase(); + CallCtx.SelfArg = ME->getBase(); + CallCtx.SelfArrow = ME->isArrow(); } else if (CXXMemberCallExpr *CE = dyn_cast(DeclExp)) { - Parent = CE->getImplicitObjectArgument(); - NumArgs = CE->getNumArgs(); - FunArgs = CE->getArgs(); + CallCtx.SelfArg = CE->getImplicitObjectArgument(); + CallCtx.SelfArrow = dyn_cast(CE->getCallee())->isArrow(); + CallCtx.NumArgs = CE->getNumArgs(); + CallCtx.FunArgs = CE->getArgs(); } else if (CallExpr *CE = dyn_cast(DeclExp)) { - NumArgs = CE->getNumArgs(); - FunArgs = CE->getArgs(); + CallCtx.NumArgs = CE->getNumArgs(); + CallCtx.FunArgs = CE->getArgs(); } else if (CXXConstructExpr *CE = dyn_cast(DeclExp)) { - Parent = 0; // FIXME -- get the parent from DeclStmt - NumArgs = CE->getNumArgs(); - FunArgs = CE->getArgs(); + CallCtx.SelfArg = 0; // FIXME -- get the parent from DeclStmt + CallCtx.NumArgs = CE->getNumArgs(); + CallCtx.FunArgs = CE->getArgs(); } else if (D && isa(D)) { // There's no such thing as a "destructor call" in the AST. - Parent = DeclExp; + CallCtx.SelfArg = DeclExp; } // If the attribute has no arguments, then assume the argument is "this". if (MutexExp == 0) { - buildMutexID(Parent, D, 0, 0, 0); + buildSExpr(CallCtx.SelfArg, 0); return; } - buildMutexID(MutexExp, D, Parent, NumArgs, FunArgs); + // For most attributes. + buildSExpr(MutexExp, &CallCtx); } -public: - explicit MutexID(clang::Decl::EmptyShell e) { - DeclSeq.clear(); + /// \brief Get index of next sibling of node i. + unsigned getNextSibling(unsigned i) const { + return i + NodeVec[i].size(); } +public: + explicit SExpr(clang::Decl::EmptyShell e) { NodeVec.clear(); } + /// \param MutexExp The original mutex expression within an attribute /// \param DeclExp An expression involving the Decl on which the attribute /// occurs. /// \param D The declaration to which the lock/unlock attribute is attached. /// Caller must check isValid() after construction. - MutexID(Expr* MutexExp, Expr *DeclExp, const NamedDecl* D) { - buildMutexIDFromExp(MutexExp, DeclExp, D); + SExpr(Expr* MutexExp, Expr *DeclExp, const NamedDecl* D) { + buildSExprFromExpr(MutexExp, DeclExp, D); } /// Return true if this is a valid decl sequence. /// Caller must call this by hand after construction to handle errors. bool isValid() const { - return !DeclSeq.empty(); + return !NodeVec.empty(); } /// Issue a warning about an invalid lock expression @@ -255,44 +518,144 @@ public: Handler.handleInvalidLockExp(Loc); } - bool operator==(const MutexID &other) const { - return DeclSeq == other.DeclSeq; + bool operator==(const SExpr &other) const { + return NodeVec == other.NodeVec; } - bool operator!=(const MutexID &other) const { + bool operator!=(const SExpr &other) const { return !(*this == other); } - // SmallVector overloads Operator< to do lexicographic ordering. Note that - // we use pointer equality (and <) to compare NamedDecls. This means the order - // of MutexIDs in a lockset is nondeterministic. In order to output - // diagnostics in a deterministic ordering, we must order all diagnostics to - // output by SourceLocation when iterating through this lockset. - bool operator<(const MutexID &other) const { - return DeclSeq < other.DeclSeq; + bool matches(const SExpr &Other, unsigned i = 0, unsigned j = 0) const { + if (NodeVec[i].matches(Other.NodeVec[j])) { + unsigned n = NodeVec[i].arity(); + bool Result = true; + unsigned ci = i+1; // first child of i + unsigned cj = j+1; // first child of j + for (unsigned k = 0; k < n; + ++k, ci=getNextSibling(ci), cj = Other.getNextSibling(cj)) { + Result = Result && matches(Other, ci, cj); + } + return Result; + } + return false; } - /// \brief Returns the name of the first Decl in the list for a given MutexID; - /// e.g. the lock expression foo.bar() has name "bar". - /// The caret will point unambiguously to the lock expression, so using this - /// name in diagnostics is a way to get simple, and consistent, mutex names. - /// We do not want to output the entire expression text for security reasons. - std::string getName() const { + /// \brief Pretty print a lock expression for use in error messages. + std::string toString(unsigned i = 0) const { assert(isValid()); - if (!DeclSeq.front()) - return "this"; // Use 0 to represent 'this'. - return DeclSeq.front()->getNameAsString(); + if (i >= NodeVec.size()) + return ""; + + const SExprNode* N = &NodeVec[i]; + switch (N->kind()) { + case EOP_Nop: + return "_"; + case EOP_Wildcard: + return "(?)"; + case EOP_This: + return "this"; + case EOP_NVar: + case EOP_LVar: { + return N->getNamedDecl()->getNameAsString(); + } + case EOP_Dot: { + if (NodeVec[i+1].kind() == EOP_Wildcard) { + std::string S = "&"; + S += N->getNamedDecl()->getQualifiedNameAsString(); + return S; + } + std::string FieldName = N->getNamedDecl()->getNameAsString(); + if (NodeVec[i+1].kind() == EOP_This) + return FieldName; + + std::string S = toString(i+1); + if (N->isArrow()) + return S + "->" + FieldName; + else + return S + "." + FieldName; + } + case EOP_Call: { + std::string S = toString(i+1) + "("; + unsigned NumArgs = N->arity()-1; + unsigned ci = getNextSibling(i+1); + for (unsigned k=0; kgetFunctionDecl()) + S += D->getNameAsString() + "("; + else + S += "#("; + unsigned NumArgs = N->arity()-1; + unsigned ci = getNextSibling(i+1); + for (unsigned k=0; karity(); + if (NumChildren == 0) + return "(...)"; + std::string S = "("; + unsigned ci = i+1; + for (unsigned j = 0; j < NumChildren; ++j, ci = getNextSibling(ci)) { + S += toString(ci); + if (j+1 < NumChildren) S += "#"; + } + S += ")"; + return S; + } + } + return ""; } +}; - void Profile(llvm::FoldingSetNodeID &ID) const { - for (SmallVectorImpl::const_iterator I = DeclSeq.begin(), - E = DeclSeq.end(); I != E; ++I) { - ID.AddPointer(*I); - } + + +/// \brief A short list of SExprs +class MutexIDList : public SmallVector { +public: + /// \brief Return true if the list contains the specified SExpr + /// Performs a linear search, because these lists are almost always very small. + bool contains(const SExpr& M) { + for (iterator I=begin(),E=end(); I != E; ++I) + if ((*I) == M) return true; + return false; + } + + /// \brief Push M onto list, bud discard duplicates + void push_back_nodup(const SExpr& M) { + if (!contains(M)) push_back(M); } }; + /// \brief This is a helper class that stores info about the most recent /// accquire of a Lock. /// @@ -307,14 +670,18 @@ struct LockData { /// /// FIXME: add support for re-entrant locking and lock up/downgrading LockKind LKind; - MutexID UnderlyingMutex; // for ScopedLockable objects + bool Managed; // for ScopedLockable objects + SExpr UnderlyingMutex; // for ScopedLockable objects - LockData(SourceLocation AcquireLoc, LockKind LKind) - : AcquireLoc(AcquireLoc), LKind(LKind), UnderlyingMutex(Decl::EmptyShell()) + LockData(SourceLocation AcquireLoc, LockKind LKind, bool M = false) + : AcquireLoc(AcquireLoc), LKind(LKind), Managed(M), + UnderlyingMutex(Decl::EmptyShell()) {} - LockData(SourceLocation AcquireLoc, LockKind LKind, const MutexID &Mu) - : AcquireLoc(AcquireLoc), LKind(LKind), UnderlyingMutex(Mu) {} + LockData(SourceLocation AcquireLoc, LockKind LKind, const SExpr &Mu) + : AcquireLoc(AcquireLoc), LKind(LKind), Managed(false), + UnderlyingMutex(Mu) + {} bool operator==(const LockData &other) const { return AcquireLoc == other.AcquireLoc && LKind == other.LKind; @@ -331,10 +698,102 @@ struct LockData { }; -/// A Lockset maps each MutexID (defined above) to information about how it has +/// \brief A FactEntry stores a single fact that is known at a particular point +/// in the program execution. Currently, this is information regarding a lock +/// that is held at that point. +struct FactEntry { + SExpr MutID; + LockData LDat; + + FactEntry(const SExpr& M, const LockData& L) + : MutID(M), LDat(L) + { } +}; + + +typedef unsigned short FactID; + +/// \brief FactManager manages the memory for all facts that are created during +/// the analysis of a single routine. +class FactManager { +private: + std::vector Facts; + +public: + FactID newLock(const SExpr& M, const LockData& L) { + Facts.push_back(FactEntry(M,L)); + return static_cast(Facts.size() - 1); + } + + const FactEntry& operator[](FactID F) const { return Facts[F]; } + FactEntry& operator[](FactID F) { return Facts[F]; } +}; + + +/// \brief A FactSet is the set of facts that are known to be true at a +/// particular program point. FactSets must be small, because they are +/// frequently copied, and are thus implemented as a set of indices into a +/// table maintained by a FactManager. A typical FactSet only holds 1 or 2 +/// locks, so we can get away with doing a linear search for lookup. Note +/// that a hashtable or map is inappropriate in this case, because lookups +/// may involve partial pattern matches, rather than exact matches. +class FactSet { +private: + typedef SmallVector FactVec; + + FactVec FactIDs; + +public: + typedef FactVec::iterator iterator; + typedef FactVec::const_iterator const_iterator; + + iterator begin() { return FactIDs.begin(); } + const_iterator begin() const { return FactIDs.begin(); } + + iterator end() { return FactIDs.end(); } + const_iterator end() const { return FactIDs.end(); } + + bool isEmpty() const { return FactIDs.size() == 0; } + + FactID addLock(FactManager& FM, const SExpr& M, const LockData& L) { + FactID F = FM.newLock(M, L); + FactIDs.push_back(F); + return F; + } + + bool removeLock(FactManager& FM, const SExpr& M) { + unsigned n = FactIDs.size(); + if (n == 0) + return false; + + for (unsigned i = 0; i < n-1; ++i) { + if (FM[FactIDs[i]].MutID.matches(M)) { + FactIDs[i] = FactIDs[n-1]; + FactIDs.pop_back(); + return true; + } + } + if (FM[FactIDs[n-1]].MutID.matches(M)) { + FactIDs.pop_back(); + return true; + } + return false; + } + + LockData* findLock(FactManager& FM, const SExpr& M) const { + for (const_iterator I=begin(), E=end(); I != E; ++I) { + if (FM[*I].MutID.matches(M)) return &FM[*I].LDat; + } + return 0; + } +}; + + + +/// A Lockset maps each SExpr (defined above) to information about how it has /// been locked. -typedef llvm::ImmutableMap Lockset; -typedef llvm::ImmutableMap LocalVarContext; +typedef llvm::ImmutableMap Lockset; +typedef llvm::ImmutableMap LocalVarContext; class LocalVariableMap; @@ -345,15 +804,15 @@ enum CFGBlockSide { CBS_Entry, CBS_Exit }; /// maintained for each block in the CFG. See LocalVariableMap for more /// information about the contexts. struct CFGBlockInfo { - Lockset EntrySet; // Lockset held at entry to block - Lockset ExitSet; // Lockset held at exit from block + FactSet EntrySet; // Lockset held at entry to block + FactSet ExitSet; // Lockset held at exit from block LocalVarContext EntryContext; // Context held at entry to block LocalVarContext ExitContext; // Context held at exit from block SourceLocation EntryLoc; // Location of first statement in block SourceLocation ExitLoc; // Location of last statement in block. unsigned EntryIndex; // Used to replay contexts later - const Lockset &getSet(CFGBlockSide Side) const { + const FactSet &getSet(CFGBlockSide Side) const { return Side == CBS_Entry ? EntrySet : ExitSet; } SourceLocation getLocation(CFGBlockSide Side) const { @@ -361,14 +820,12 @@ struct CFGBlockInfo { } private: - CFGBlockInfo(Lockset EmptySet, LocalVarContext EmptyCtx) - : EntrySet(EmptySet), ExitSet(EmptySet), - EntryContext(EmptyCtx), ExitContext(EmptyCtx) + CFGBlockInfo(LocalVarContext EmptyCtx) + : EntryContext(EmptyCtx), ExitContext(EmptyCtx) { } public: - static CFGBlockInfo getEmptyBlockInfo(Lockset::Factory &F, - LocalVariableMap &M); + static CFGBlockInfo getEmptyBlockInfo(LocalVariableMap &M); }; @@ -398,21 +855,21 @@ public: public: friend class LocalVariableMap; - NamedDecl *Dec; // The original declaration for this variable. - Expr *Exp; // The expression for this variable, OR - unsigned Ref; // Reference to another VarDefinition - Context Ctx; // The map with which Exp should be interpreted. + const NamedDecl *Dec; // The original declaration for this variable. + const Expr *Exp; // The expression for this variable, OR + unsigned Ref; // Reference to another VarDefinition + Context Ctx; // The map with which Exp should be interpreted. bool isReference() { return !Exp; } private: // Create ordinary variable definition - VarDefinition(NamedDecl *D, Expr *E, Context C) + VarDefinition(const NamedDecl *D, const Expr *E, Context C) : Dec(D), Exp(E), Ref(0), Ctx(C) { } // Create reference to previous definition - VarDefinition(NamedDecl *D, unsigned R, Context C) + VarDefinition(const NamedDecl *D, unsigned R, Context C) : Dec(D), Exp(0), Ref(R), Ctx(C) { } }; @@ -430,7 +887,7 @@ public: } /// Look up a definition, within the given context. - const VarDefinition* lookup(NamedDecl *D, Context Ctx) { + const VarDefinition* lookup(const NamedDecl *D, Context Ctx) { const unsigned *i = Ctx.lookup(D); if (!i) return 0; @@ -441,7 +898,7 @@ public: /// Look up the definition for D within the given context. Returns /// NULL if the expression is not statically known. If successful, also /// modifies Ctx to hold the context of the return Expr. - Expr* lookupExpr(NamedDecl *D, Context &Ctx) { + const Expr* lookupExpr(const NamedDecl *D, Context &Ctx) { const unsigned *P = Ctx.lookup(D); if (!P) return 0; @@ -476,7 +933,7 @@ public: llvm::errs() << "Undefined"; return; } - NamedDecl *Dec = VarDefinitions[i].Dec; + const NamedDecl *Dec = VarDefinitions[i].Dec; if (!Dec) { llvm::errs() << "<>"; return; @@ -488,7 +945,7 @@ public: /// Dumps an ASCII representation of the variable map to llvm::errs() void dump() { for (unsigned i = 1, e = VarDefinitions.size(); i < e; ++i) { - Expr *Exp = VarDefinitions[i].Exp; + const Expr *Exp = VarDefinitions[i].Exp; unsigned Ref = VarDefinitions[i].Ref; dumpVarDefinitionName(i); @@ -504,7 +961,7 @@ public: /// Dumps an ASCII representation of a Context to llvm::errs() void dumpContext(Context C) { for (Context::iterator I = C.begin(), E = C.end(); I != E; ++I) { - NamedDecl *D = I.getKey(); + const NamedDecl *D = I.getKey(); D->printName(llvm::errs()); const unsigned *i = C.lookup(D); llvm::errs() << " -> "; @@ -528,7 +985,7 @@ protected: // Adds a new definition to the given context, and returns a new context. // This method should be called when declaring a new variable. - Context addDefinition(NamedDecl *D, Expr *Exp, Context Ctx) { + Context addDefinition(const NamedDecl *D, Expr *Exp, Context Ctx) { assert(!Ctx.contains(D)); unsigned newID = VarDefinitions.size(); Context NewCtx = ContextFactory.add(Ctx, D, newID); @@ -537,7 +994,7 @@ protected: } // Add a new reference to an existing definition. - Context addReference(NamedDecl *D, unsigned i, Context Ctx) { + Context addReference(const NamedDecl *D, unsigned i, Context Ctx) { unsigned newID = VarDefinitions.size(); Context NewCtx = ContextFactory.add(Ctx, D, newID); VarDefinitions.push_back(VarDefinition(D, i, Ctx)); @@ -546,7 +1003,7 @@ protected: // Updates a definition only if that definition is already in the map. // This method should be called when assigning to an existing variable. - Context updateDefinition(NamedDecl *D, Expr *Exp, Context Ctx) { + Context updateDefinition(const NamedDecl *D, Expr *Exp, Context Ctx) { if (Ctx.contains(D)) { unsigned newID = VarDefinitions.size(); Context NewCtx = ContextFactory.remove(Ctx, D); @@ -559,7 +1016,7 @@ protected: // Removes a definition from the context, but keeps the variable name // as a valid variable. The index 0 is a placeholder for cleared definitions. - Context clearDefinition(NamedDecl *D, Context Ctx) { + Context clearDefinition(const NamedDecl *D, Context Ctx) { Context NewCtx = Ctx; if (NewCtx.contains(D)) { NewCtx = ContextFactory.remove(NewCtx, D); @@ -569,7 +1026,7 @@ protected: } // Remove a definition entirely frmo the context. - Context removeDefinition(NamedDecl *D, Context Ctx) { + Context removeDefinition(const NamedDecl *D, Context Ctx) { Context NewCtx = Ctx; if (NewCtx.contains(D)) { NewCtx = ContextFactory.remove(NewCtx, D); @@ -586,9 +1043,8 @@ protected: // This has to be defined after LocalVariableMap. -CFGBlockInfo CFGBlockInfo::getEmptyBlockInfo(Lockset::Factory &F, - LocalVariableMap &M) { - return CFGBlockInfo(F.getEmptyMap(), M.getEmptyContext()); +CFGBlockInfo CFGBlockInfo::getEmptyBlockInfo(LocalVariableMap &M) { + return CFGBlockInfo(M.getEmptyContext()); } @@ -655,7 +1111,7 @@ LocalVariableMap::Context LocalVariableMap::intersectContexts(Context C1, Context C2) { Context Result = C1; for (Context::iterator I = C1.begin(), E = C1.end(); I != E; ++I) { - NamedDecl *Dec = I.getKey(); + const NamedDecl *Dec = I.getKey(); unsigned i1 = I.getData(); const unsigned *i2 = C2.lookup(Dec); if (!i2) // variable doesn't exist on second path @@ -672,7 +1128,7 @@ LocalVariableMap::intersectContexts(Context C1, Context C2) { LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { Context Result = getEmptyContext(); for (Context::iterator I = C.begin(), E = C.end(); I != E; ++I) { - NamedDecl *Dec = I.getKey(); + const NamedDecl *Dec = I.getKey(); unsigned i = I.getData(); Result = addReference(Dec, i, Result); } @@ -684,7 +1140,7 @@ LocalVariableMap::Context LocalVariableMap::createReferenceContext(Context C) { // createReferenceContext. void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { for (Context::iterator I = C1.begin(), E = C1.end(); I != E; ++I) { - NamedDecl *Dec = I.getKey(); + const NamedDecl *Dec = I.getKey(); unsigned i1 = I.getData(); VarDefinition *VDef = &VarDefinitions[i1]; assert(VDef->isReference()); @@ -725,7 +1181,7 @@ void LocalVariableMap::intersectBackEdge(Context C1, Context C2) { // incoming back edge, it duplicates the context, creating new definitions // that refer back to the originals. (These correspond to places where SSA // might have to insert a phi node.) On the second pass, these definitions are -// set to NULL if the the variable has changed on the back-edge (i.e. a phi +// set to NULL if the variable has changed on the back-edge (i.e. a phi // node was actually required.) E.g. // // { Context | VarDefinitions } @@ -869,24 +1325,294 @@ static void findBlockLocations(CFG *CFGraph, class ThreadSafetyAnalyzer { friend class BuildLockset; - ThreadSafetyHandler &Handler; - Lockset::Factory LocksetFactory; - LocalVariableMap LocalVarMap; + ThreadSafetyHandler &Handler; + LocalVariableMap LocalVarMap; + FactManager FactMan; + std::vector BlockInfo; public: ThreadSafetyAnalyzer(ThreadSafetyHandler &H) : Handler(H) {} - Lockset intersectAndWarn(const CFGBlockInfo &Block1, CFGBlockSide Side1, - const CFGBlockInfo &Block2, CFGBlockSide Side2, - LockErrorKind LEK); + void addLock(FactSet &FSet, const SExpr &Mutex, const LockData &LDat); + void removeLock(FactSet &FSet, const SExpr &Mutex, + SourceLocation UnlockLoc, bool FullyRemove=false); + + template + void getMutexIDs(MutexIDList &Mtxs, AttrType *Attr, Expr *Exp, + const NamedDecl *D); - Lockset addLock(Lockset &LSet, Expr *MutexExp, const NamedDecl *D, - LockKind LK, SourceLocation Loc); + template + void getMutexIDs(MutexIDList &Mtxs, AttrType *Attr, Expr *Exp, + const NamedDecl *D, + const CFGBlock *PredBlock, const CFGBlock *CurrBlock, + Expr *BrE, bool Neg); + + const CallExpr* getTrylockCallExpr(const Stmt *Cond, LocalVarContext C, + bool &Negate); + + void getEdgeLockset(FactSet &Result, const FactSet &ExitSet, + const CFGBlock* PredBlock, + const CFGBlock *CurrBlock); + + void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, + SourceLocation JoinLoc, + LockErrorKind LEK1, LockErrorKind LEK2, + bool Modify=true); + + void intersectAndWarn(FactSet &FSet1, const FactSet &FSet2, + SourceLocation JoinLoc, LockErrorKind LEK1, + bool Modify=true) { + intersectAndWarn(FSet1, FSet2, JoinLoc, LEK1, LEK1, Modify); + } void runAnalysis(AnalysisDeclContext &AC); }; +/// \brief Add a new lock to the lockset, warning if the lock is already there. +/// \param Mutex -- the Mutex expression for the lock +/// \param LDat -- the LockData for the lock +void ThreadSafetyAnalyzer::addLock(FactSet &FSet, const SExpr &Mutex, + const LockData &LDat) { + // FIXME: deal with acquired before/after annotations. + // FIXME: Don't always warn when we have support for reentrant locks. + if (FSet.findLock(FactMan, Mutex)) { + Handler.handleDoubleLock(Mutex.toString(), LDat.AcquireLoc); + } else { + FSet.addLock(FactMan, Mutex, LDat); + } +} + + +/// \brief Remove a lock from the lockset, warning if the lock is not there. +/// \param LockExp The lock expression corresponding to the lock to be removed +/// \param UnlockLoc The source location of the unlock (only used in error msg) +void ThreadSafetyAnalyzer::removeLock(FactSet &FSet, + const SExpr &Mutex, + SourceLocation UnlockLoc, + bool FullyRemove) { + const LockData *LDat = FSet.findLock(FactMan, Mutex); + if (!LDat) { + Handler.handleUnmatchedUnlock(Mutex.toString(), UnlockLoc); + return; + } + + if (LDat->UnderlyingMutex.isValid()) { + // This is scoped lockable object, which manages the real mutex. + if (FullyRemove) { + // We're destroying the managing object. + // Remove the underlying mutex if it exists; but don't warn. + if (FSet.findLock(FactMan, LDat->UnderlyingMutex)) + FSet.removeLock(FactMan, LDat->UnderlyingMutex); + } else { + // We're releasing the underlying mutex, but not destroying the + // managing object. Warn on dual release. + if (!FSet.findLock(FactMan, LDat->UnderlyingMutex)) { + Handler.handleUnmatchedUnlock(LDat->UnderlyingMutex.toString(), + UnlockLoc); + } + FSet.removeLock(FactMan, LDat->UnderlyingMutex); + return; + } + } + FSet.removeLock(FactMan, Mutex); +} + + +/// \brief Extract the list of mutexIDs from the attribute on an expression, +/// and push them onto Mtxs, discarding any duplicates. +template +void ThreadSafetyAnalyzer::getMutexIDs(MutexIDList &Mtxs, AttrType *Attr, + Expr *Exp, const NamedDecl *D) { + typedef typename AttrType::args_iterator iterator_type; + + if (Attr->args_size() == 0) { + // The mutex held is the "this" object. + SExpr Mu(0, Exp, D); + if (!Mu.isValid()) + SExpr::warnInvalidLock(Handler, 0, Exp, D); + else + Mtxs.push_back_nodup(Mu); + return; + } + + for (iterator_type I=Attr->args_begin(), E=Attr->args_end(); I != E; ++I) { + SExpr Mu(*I, Exp, D); + if (!Mu.isValid()) + SExpr::warnInvalidLock(Handler, *I, Exp, D); + else + Mtxs.push_back_nodup(Mu); + } +} + + +/// \brief Extract the list of mutexIDs from a trylock attribute. If the +/// trylock applies to the given edge, then push them onto Mtxs, discarding +/// any duplicates. +template +void ThreadSafetyAnalyzer::getMutexIDs(MutexIDList &Mtxs, AttrType *Attr, + Expr *Exp, const NamedDecl *D, + const CFGBlock *PredBlock, + const CFGBlock *CurrBlock, + Expr *BrE, bool Neg) { + // Find out which branch has the lock + bool branch = 0; + if (CXXBoolLiteralExpr *BLE = dyn_cast_or_null(BrE)) { + branch = BLE->getValue(); + } + else if (IntegerLiteral *ILE = dyn_cast_or_null(BrE)) { + branch = ILE->getValue().getBoolValue(); + } + int branchnum = branch ? 0 : 1; + if (Neg) branchnum = !branchnum; + + // If we've taken the trylock branch, then add the lock + int i = 0; + for (CFGBlock::const_succ_iterator SI = PredBlock->succ_begin(), + SE = PredBlock->succ_end(); SI != SE && i < 2; ++SI, ++i) { + if (*SI == CurrBlock && i == branchnum) { + getMutexIDs(Mtxs, Attr, Exp, D); + } + } +} + + +bool getStaticBooleanValue(Expr* E, bool& TCond) { + if (isa(E) || isa(E)) { + TCond = false; + return true; + } else if (CXXBoolLiteralExpr *BLE = dyn_cast(E)) { + TCond = BLE->getValue(); + return true; + } else if (IntegerLiteral *ILE = dyn_cast(E)) { + TCond = ILE->getValue().getBoolValue(); + return true; + } else if (ImplicitCastExpr *CE = dyn_cast(E)) { + return getStaticBooleanValue(CE->getSubExpr(), TCond); + } + return false; +} + + +// If Cond can be traced back to a function call, return the call expression. +// The negate variable should be called with false, and will be set to true +// if the function call is negated, e.g. if (!mu.tryLock(...)) +const CallExpr* ThreadSafetyAnalyzer::getTrylockCallExpr(const Stmt *Cond, + LocalVarContext C, + bool &Negate) { + if (!Cond) + return 0; + + if (const CallExpr *CallExp = dyn_cast(Cond)) { + return CallExp; + } + else if (const ParenExpr *PE = dyn_cast(Cond)) { + return getTrylockCallExpr(PE->getSubExpr(), C, Negate); + } + else if (const ImplicitCastExpr *CE = dyn_cast(Cond)) { + return getTrylockCallExpr(CE->getSubExpr(), C, Negate); + } + else if (const DeclRefExpr *DRE = dyn_cast(Cond)) { + const Expr *E = LocalVarMap.lookupExpr(DRE->getDecl(), C); + return getTrylockCallExpr(E, C, Negate); + } + else if (const UnaryOperator *UOP = dyn_cast(Cond)) { + if (UOP->getOpcode() == UO_LNot) { + Negate = !Negate; + return getTrylockCallExpr(UOP->getSubExpr(), C, Negate); + } + return 0; + } + else if (const BinaryOperator *BOP = dyn_cast(Cond)) { + if (BOP->getOpcode() == BO_EQ || BOP->getOpcode() == BO_NE) { + if (BOP->getOpcode() == BO_NE) + Negate = !Negate; + + bool TCond = false; + if (getStaticBooleanValue(BOP->getRHS(), TCond)) { + if (!TCond) Negate = !Negate; + return getTrylockCallExpr(BOP->getLHS(), C, Negate); + } + else if (getStaticBooleanValue(BOP->getLHS(), TCond)) { + if (!TCond) Negate = !Negate; + return getTrylockCallExpr(BOP->getRHS(), C, Negate); + } + return 0; + } + return 0; + } + // FIXME -- handle && and || as well. + return 0; +} + + +/// \brief Find the lockset that holds on the edge between PredBlock +/// and CurrBlock. The edge set is the exit set of PredBlock (passed +/// as the ExitSet parameter) plus any trylocks, which are conditionally held. +void ThreadSafetyAnalyzer::getEdgeLockset(FactSet& Result, + const FactSet &ExitSet, + const CFGBlock *PredBlock, + const CFGBlock *CurrBlock) { + Result = ExitSet; + + if (!PredBlock->getTerminatorCondition()) + return; + + bool Negate = false; + const Stmt *Cond = PredBlock->getTerminatorCondition(); + const CFGBlockInfo *PredBlockInfo = &BlockInfo[PredBlock->getBlockID()]; + const LocalVarContext &LVarCtx = PredBlockInfo->ExitContext; + + CallExpr *Exp = + const_cast(getTrylockCallExpr(Cond, LVarCtx, Negate)); + if (!Exp) + return; + + NamedDecl *FunDecl = dyn_cast_or_null(Exp->getCalleeDecl()); + if(!FunDecl || !FunDecl->hasAttrs()) + return; + + + MutexIDList ExclusiveLocksToAdd; + MutexIDList SharedLocksToAdd; + + // If the condition is a call to a Trylock function, then grab the attributes + AttrVec &ArgAttrs = FunDecl->getAttrs(); + for (unsigned i = 0; i < ArgAttrs.size(); ++i) { + Attr *Attr = ArgAttrs[i]; + switch (Attr->getKind()) { + case attr::ExclusiveTrylockFunction: { + ExclusiveTrylockFunctionAttr *A = + cast(Attr); + getMutexIDs(ExclusiveLocksToAdd, A, Exp, FunDecl, + PredBlock, CurrBlock, A->getSuccessValue(), Negate); + break; + } + case attr::SharedTrylockFunction: { + SharedTrylockFunctionAttr *A = + cast(Attr); + getMutexIDs(ExclusiveLocksToAdd, A, Exp, FunDecl, + PredBlock, CurrBlock, A->getSuccessValue(), Negate); + break; + } + default: + break; + } + } + + // Add and remove locks. + SourceLocation Loc = Exp->getExprLoc(); + for (unsigned i=0,n=ExclusiveLocksToAdd.size(); i { friend class ThreadSafetyAnalyzer; - ThreadSafetyHandler &Handler; - Lockset::Factory &LocksetFactory; - LocalVariableMap &LocalVarMap; - - Lockset LSet; + ThreadSafetyAnalyzer *Analyzer; + FactSet FSet; LocalVariableMap::Context LVarCtx; unsigned CtxIndex; // Helper functions - void addLock(const MutexID &Mutex, const LockData &LDat); - void removeLock(const MutexID &Mutex, SourceLocation UnlockLoc); + const ValueDecl *getValueDecl(Expr *Exp); - template - void addLocksToSet(LockKind LK, AttrType *Attr, - Expr *Exp, NamedDecl *D, VarDecl *VD = 0); - void removeLocksFromSet(UnlockFunctionAttr *Attr, - Expr *Exp, NamedDecl* FunDecl); + void warnIfMutexNotHeld(const NamedDecl *D, Expr *Exp, AccessKind AK, + Expr *MutexExp, ProtectedOperationKind POK); - const ValueDecl *getValueDecl(Expr *Exp); - void warnIfMutexNotHeld (const NamedDecl *D, Expr *Exp, AccessKind AK, - Expr *MutexExp, ProtectedOperationKind POK); void checkAccess(Expr *Exp, AccessKind AK); void checkDereference(Expr *Exp, AccessKind AK); - void handleCall(Expr *Exp, NamedDecl *D, VarDecl *VD = 0); - - template - void addTrylock(LockKind LK, AttrType *Attr, Expr *Exp, NamedDecl *FunDecl, - const CFGBlock* PredBlock, const CFGBlock *CurrBlock, - Expr *BrE, bool Neg); - CallExpr* getTrylockCallExpr(Stmt *Cond, LocalVariableMap::Context C, - bool &Negate); - void handleTrylock(Stmt *Cond, const CFGBlock* PredBlock, - const CFGBlock *CurrBlock); + void handleCall(Expr *Exp, const NamedDecl *D, VarDecl *VD = 0); /// \brief Returns true if the lockset contains a lock, regardless of whether /// the lock is held exclusively or shared. - bool locksetContains(const MutexID &Lock) const { - return LSet.lookup(Lock); + bool locksetContains(const SExpr &Mu) const { + return FSet.findLock(Analyzer->FactMan, Mu); } /// \brief Returns true if the lockset contains a lock with the passed in /// locktype. - bool locksetContains(const MutexID &Lock, LockKind KindRequested) const { - const LockData *LockHeld = LSet.lookup(Lock); + bool locksetContains(const SExpr &Mu, LockKind KindRequested) const { + const LockData *LockHeld = FSet.findLock(Analyzer->FactMan, Mu); return (LockHeld && KindRequested == LockHeld->LKind); } @@ -946,7 +1653,7 @@ class BuildLockset : public StmtVisitor { /// passed in locktype. So for example, if we pass in LK_Shared, this function /// returns true if the lock is held LK_Shared or LK_Exclusive. If we pass in /// LK_Exclusive, this function returns true if the lock is held LK_Exclusive. - bool locksetContainsAtLeast(const MutexID &Lock, + bool locksetContainsAtLeast(const SExpr &Lock, LockKind KindRequested) const { switch (KindRequested) { case LK_Shared: @@ -958,12 +1665,10 @@ class BuildLockset : public StmtVisitor { } public: - BuildLockset(ThreadSafetyAnalyzer *analyzer, CFGBlockInfo &Info) + BuildLockset(ThreadSafetyAnalyzer *Anlzr, CFGBlockInfo &Info) : StmtVisitor(), - Handler(analyzer->Handler), - LocksetFactory(analyzer->LocksetFactory), - LocalVarMap(analyzer->LocalVarMap), - LSet(Info.EntrySet), + Analyzer(Anlzr), + FSet(Info.EntrySet), LVarCtx(Info.EntryContext), CtxIndex(Info.EntryIndex) {} @@ -976,104 +1681,6 @@ public: void VisitDeclStmt(DeclStmt *S); }; -/// \brief Add a new lock to the lockset, warning if the lock is already there. -/// \param Mutex -- the Mutex expression for the lock -/// \param LDat -- the LockData for the lock -void BuildLockset::addLock(const MutexID &Mutex, const LockData& LDat) { - // FIXME: deal with acquired before/after annotations. - // FIXME: Don't always warn when we have support for reentrant locks. - if (locksetContains(Mutex)) - Handler.handleDoubleLock(Mutex.getName(), LDat.AcquireLoc); - else - LSet = LocksetFactory.add(LSet, Mutex, LDat); -} - -/// \brief Remove a lock from the lockset, warning if the lock is not there. -/// \param LockExp The lock expression corresponding to the lock to be removed -/// \param UnlockLoc The source location of the unlock (only used in error msg) -void BuildLockset::removeLock(const MutexID &Mutex, SourceLocation UnlockLoc) { - const LockData *LDat = LSet.lookup(Mutex); - if (!LDat) - Handler.handleUnmatchedUnlock(Mutex.getName(), UnlockLoc); - else { - // For scoped-lockable vars, remove the mutex associated with this var. - if (LDat->UnderlyingMutex.isValid()) - removeLock(LDat->UnderlyingMutex, UnlockLoc); - LSet = LocksetFactory.remove(LSet, Mutex); - } -} - -/// \brief This function, parameterized by an attribute type, is used to add a -/// set of locks specified as attribute arguments to the lockset. -template -void BuildLockset::addLocksToSet(LockKind LK, AttrType *Attr, - Expr *Exp, NamedDecl* FunDecl, VarDecl *VD) { - typedef typename AttrType::args_iterator iterator_type; - - SourceLocation ExpLocation = Exp->getExprLoc(); - - // Figure out if we're calling the constructor of scoped lockable class - bool isScopedVar = false; - if (VD) { - if (CXXConstructorDecl *CD = dyn_cast(FunDecl)) { - CXXRecordDecl* PD = CD->getParent(); - if (PD && PD->getAttr()) - isScopedVar = true; - } - } - - if (Attr->args_size() == 0) { - // The mutex held is the "this" object. - MutexID Mutex(0, Exp, FunDecl); - if (!Mutex.isValid()) - MutexID::warnInvalidLock(Handler, 0, Exp, FunDecl); - else - addLock(Mutex, LockData(ExpLocation, LK)); - return; - } - - for (iterator_type I=Attr->args_begin(), E=Attr->args_end(); I != E; ++I) { - MutexID Mutex(*I, Exp, FunDecl); - if (!Mutex.isValid()) - MutexID::warnInvalidLock(Handler, *I, Exp, FunDecl); - else { - addLock(Mutex, LockData(ExpLocation, LK)); - if (isScopedVar) { - // For scoped lockable vars, map this var to its underlying mutex. - DeclRefExpr DRE(VD, false, VD->getType(), VK_LValue, VD->getLocation()); - MutexID SMutex(&DRE, 0, 0); - addLock(SMutex, LockData(VD->getLocation(), LK, Mutex)); - } - } - } -} - -/// \brief This function removes a set of locks specified as attribute -/// arguments from the lockset. -void BuildLockset::removeLocksFromSet(UnlockFunctionAttr *Attr, - Expr *Exp, NamedDecl* FunDecl) { - SourceLocation ExpLocation; - if (Exp) ExpLocation = Exp->getExprLoc(); - - if (Attr->args_size() == 0) { - // The mutex held is the "this" object. - MutexID Mu(0, Exp, FunDecl); - if (!Mu.isValid()) - MutexID::warnInvalidLock(Handler, 0, Exp, FunDecl); - else - removeLock(Mu, ExpLocation); - return; - } - - for (UnlockFunctionAttr::args_iterator I = Attr->args_begin(), - E = Attr->args_end(); I != E; ++I) { - MutexID Mutex(*I, Exp, FunDecl); - if (!Mutex.isValid()) - MutexID::warnInvalidLock(Handler, *I, Exp, FunDecl); - else - removeLock(Mutex, ExpLocation); - } -} /// \brief Gets the value decl pointer from DeclRefExprs or MemberExprs const ValueDecl *BuildLockset::getValueDecl(Expr *Exp) { @@ -1093,11 +1700,12 @@ void BuildLockset::warnIfMutexNotHeld(const NamedDecl *D, Expr *Exp, ProtectedOperationKind POK) { LockKind LK = getLockKindFromAccessKind(AK); - MutexID Mutex(MutexExp, Exp, D); + SExpr Mutex(MutexExp, Exp, D); if (!Mutex.isValid()) - MutexID::warnInvalidLock(Handler, MutexExp, Exp, D); + SExpr::warnInvalidLock(Analyzer->Handler, MutexExp, Exp, D); else if (!locksetContainsAtLeast(Mutex, LK)) - Handler.handleMutexNotHeld(D, POK, Mutex.getName(), LK, Exp->getExprLoc()); + Analyzer->Handler.handleMutexNotHeld(D, POK, Mutex.toString(), LK, + Exp->getExprLoc()); } /// \brief This method identifies variable dereferences and checks pt_guarded_by @@ -1116,8 +1724,9 @@ void BuildLockset::checkDereference(Expr *Exp, AccessKind AK) { if(!D || !D->hasAttrs()) return; - if (D->getAttr() && LSet.isEmpty()) - Handler.handleNoMutexHeld(D, POK_VarDereference, AK, Exp->getExprLoc()); + if (D->getAttr() && FSet.isEmpty()) + Analyzer->Handler.handleNoMutexHeld(D, POK_VarDereference, AK, + Exp->getExprLoc()); const AttrVec &ArgAttrs = D->getAttrs(); for(unsigned i = 0, Size = ArgAttrs.size(); i < Size; ++i) @@ -1134,8 +1743,9 @@ void BuildLockset::checkAccess(Expr *Exp, AccessKind AK) { if(!D || !D->hasAttrs()) return; - if (D->getAttr() && LSet.isEmpty()) - Handler.handleNoMutexHeld(D, POK_VarAccess, AK, Exp->getExprLoc()); + if (D->getAttr() && FSet.isEmpty()) + Analyzer->Handler.handleNoMutexHeld(D, POK_VarAccess, AK, + Exp->getExprLoc()); const AttrVec &ArgAttrs = D->getAttrs(); for(unsigned i = 0, Size = ArgAttrs.size(); i < Size; ++i) @@ -1153,68 +1763,68 @@ void BuildLockset::checkAccess(Expr *Exp, AccessKind AK) { /// and check that the appropriate locks are held. Non-const method calls with /// the same signature as const method calls can be also treated as reads. /// -/// FIXME: We need to also visit CallExprs to catch/check global functions. -/// -/// FIXME: Do not flag an error for member variables accessed in constructors/ -/// destructors -void BuildLockset::handleCall(Expr *Exp, NamedDecl *D, VarDecl *VD) { - AttrVec &ArgAttrs = D->getAttrs(); +void BuildLockset::handleCall(Expr *Exp, const NamedDecl *D, VarDecl *VD) { + const AttrVec &ArgAttrs = D->getAttrs(); + MutexIDList ExclusiveLocksToAdd; + MutexIDList SharedLocksToAdd; + MutexIDList LocksToRemove; + for(unsigned i = 0; i < ArgAttrs.size(); ++i) { - Attr *Attr = ArgAttrs[i]; - switch (Attr->getKind()) { + Attr *At = const_cast(ArgAttrs[i]); + switch (At->getKind()) { // When we encounter an exclusive lock function, we need to add the lock // to our lockset with kind exclusive. case attr::ExclusiveLockFunction: { - ExclusiveLockFunctionAttr *A = cast(Attr); - addLocksToSet(LK_Exclusive, A, Exp, D, VD); + ExclusiveLockFunctionAttr *A = cast(At); + Analyzer->getMutexIDs(ExclusiveLocksToAdd, A, Exp, D); break; } // When we encounter a shared lock function, we need to add the lock // to our lockset with kind shared. case attr::SharedLockFunction: { - SharedLockFunctionAttr *A = cast(Attr); - addLocksToSet(LK_Shared, A, Exp, D, VD); + SharedLockFunctionAttr *A = cast(At); + Analyzer->getMutexIDs(SharedLocksToAdd, A, Exp, D); break; } // When we encounter an unlock function, we need to remove unlocked // mutexes from the lockset, and flag a warning if they are not there. case attr::UnlockFunction: { - UnlockFunctionAttr *UFAttr = cast(Attr); - removeLocksFromSet(UFAttr, Exp, D); + UnlockFunctionAttr *A = cast(At); + Analyzer->getMutexIDs(LocksToRemove, A, Exp, D); break; } case attr::ExclusiveLocksRequired: { - ExclusiveLocksRequiredAttr *ELRAttr = - cast(Attr); + ExclusiveLocksRequiredAttr *A = cast(At); for (ExclusiveLocksRequiredAttr::args_iterator - I = ELRAttr->args_begin(), E = ELRAttr->args_end(); I != E; ++I) + I = A->args_begin(), E = A->args_end(); I != E; ++I) warnIfMutexNotHeld(D, Exp, AK_Written, *I, POK_FunctionCall); break; } case attr::SharedLocksRequired: { - SharedLocksRequiredAttr *SLRAttr = cast(Attr); + SharedLocksRequiredAttr *A = cast(At); - for (SharedLocksRequiredAttr::args_iterator I = SLRAttr->args_begin(), - E = SLRAttr->args_end(); I != E; ++I) + for (SharedLocksRequiredAttr::args_iterator I = A->args_begin(), + E = A->args_end(); I != E; ++I) warnIfMutexNotHeld(D, Exp, AK_Read, *I, POK_FunctionCall); break; } case attr::LocksExcluded: { - LocksExcludedAttr *LEAttr = cast(Attr); - for (LocksExcludedAttr::args_iterator I = LEAttr->args_begin(), - E = LEAttr->args_end(); I != E; ++I) { - MutexID Mutex(*I, Exp, D); + LocksExcludedAttr *A = cast(At); + for (LocksExcludedAttr::args_iterator I = A->args_begin(), + E = A->args_end(); I != E; ++I) { + SExpr Mutex(*I, Exp, D); if (!Mutex.isValid()) - MutexID::warnInvalidLock(Handler, *I, Exp, D); + SExpr::warnInvalidLock(Analyzer->Handler, *I, Exp, D); else if (locksetContains(Mutex)) - Handler.handleFunExcludesLock(D->getName(), Mutex.getName(), - Exp->getExprLoc()); + Analyzer->Handler.handleFunExcludesLock(D->getName(), + Mutex.toString(), + Exp->getExprLoc()); } break; } @@ -1224,102 +1834,50 @@ void BuildLockset::handleCall(Expr *Exp, NamedDecl *D, VarDecl *VD) { break; } } -} - - -/// \brief Add lock to set, if the current block is in the taken branch of a -/// trylock. -template -void BuildLockset::addTrylock(LockKind LK, AttrType *Attr, Expr *Exp, - NamedDecl *FunDecl, const CFGBlock *PredBlock, - const CFGBlock *CurrBlock, Expr *BrE, bool Neg) { - // Find out which branch has the lock - bool branch = 0; - if (CXXBoolLiteralExpr *BLE = dyn_cast_or_null(BrE)) { - branch = BLE->getValue(); - } - else if (IntegerLiteral *ILE = dyn_cast_or_null(BrE)) { - branch = ILE->getValue().getBoolValue(); - } - int branchnum = branch ? 0 : 1; - if (Neg) branchnum = !branchnum; - // If we've taken the trylock branch, then add the lock - int i = 0; - for (CFGBlock::const_succ_iterator SI = PredBlock->succ_begin(), - SE = PredBlock->succ_end(); SI != SE && i < 2; ++SI, ++i) { - if (*SI == CurrBlock && i == branchnum) { - addLocksToSet(LK, Attr, Exp, FunDecl, 0); + // Figure out if we're calling the constructor of scoped lockable class + bool isScopedVar = false; + if (VD) { + if (const CXXConstructorDecl *CD = dyn_cast(D)) { + const CXXRecordDecl* PD = CD->getParent(); + if (PD && PD->getAttr()) + isScopedVar = true; } } -} - -// If Cond can be traced back to a function call, return the call expression. -// The negate variable should be called with false, and will be set to true -// if the function call is negated, e.g. if (!mu.tryLock(...)) -CallExpr* BuildLockset::getTrylockCallExpr(Stmt *Cond, - LocalVariableMap::Context C, - bool &Negate) { - if (!Cond) - return 0; - - if (CallExpr *CallExp = dyn_cast(Cond)) { - return CallExp; - } - else if (ImplicitCastExpr *CE = dyn_cast(Cond)) { - return getTrylockCallExpr(CE->getSubExpr(), C, Negate); + // Add locks. + SourceLocation Loc = Exp->getExprLoc(); + for (unsigned i=0,n=ExclusiveLocksToAdd.size(); iaddLock(FSet, ExclusiveLocksToAdd[i], + LockData(Loc, LK_Exclusive, isScopedVar)); } - else if (DeclRefExpr *DRE = dyn_cast(Cond)) { - Expr *E = LocalVarMap.lookupExpr(DRE->getDecl(), C); - return getTrylockCallExpr(E, C, Negate); - } - else if (UnaryOperator *UOP = dyn_cast(Cond)) { - if (UOP->getOpcode() == UO_LNot) { - Negate = !Negate; - return getTrylockCallExpr(UOP->getSubExpr(), C, Negate); - } + for (unsigned i=0,n=SharedLocksToAdd.size(); iaddLock(FSet, SharedLocksToAdd[i], + LockData(Loc, LK_Shared, isScopedVar)); } - // FIXME -- handle && and || as well. - return NULL; -} - - -/// \brief Process a conditional branch from a previous block to the current -/// block, looking for trylock calls. -void BuildLockset::handleTrylock(Stmt *Cond, const CFGBlock *PredBlock, - const CFGBlock *CurrBlock) { - bool Negate = false; - CallExpr *Exp = getTrylockCallExpr(Cond, LVarCtx, Negate); - if (!Exp) - return; - NamedDecl *FunDecl = dyn_cast_or_null(Exp->getCalleeDecl()); - if(!FunDecl || !FunDecl->hasAttrs()) - return; + // Add the managing object as a dummy mutex, mapped to the underlying mutex. + // FIXME -- this doesn't work if we acquire multiple locks. + if (isScopedVar) { + SourceLocation MLoc = VD->getLocation(); + DeclRefExpr DRE(VD, false, VD->getType(), VK_LValue, VD->getLocation()); + SExpr SMutex(&DRE, 0, 0); - // If the condition is a call to a Trylock function, then grab the attributes - AttrVec &ArgAttrs = FunDecl->getAttrs(); - for (unsigned i = 0; i < ArgAttrs.size(); ++i) { - Attr *Attr = ArgAttrs[i]; - switch (Attr->getKind()) { - case attr::ExclusiveTrylockFunction: { - ExclusiveTrylockFunctionAttr *A = - cast(Attr); - addTrylock(LK_Exclusive, A, Exp, FunDecl, PredBlock, CurrBlock, - A->getSuccessValue(), Negate); - break; - } - case attr::SharedTrylockFunction: { - SharedTrylockFunctionAttr *A = - cast(Attr); - addTrylock(LK_Shared, A, Exp, FunDecl, PredBlock, CurrBlock, - A->getSuccessValue(), Negate); - break; - } - default: - break; + for (unsigned i=0,n=ExclusiveLocksToAdd.size(); iaddLock(FSet, SMutex, LockData(MLoc, LK_Exclusive, + ExclusiveLocksToAdd[i])); } + for (unsigned i=0,n=SharedLocksToAdd.size(); iaddLock(FSet, SMutex, LockData(MLoc, LK_Shared, + SharedLocksToAdd[i])); + } + } + + // Remove locks. + // FIXME -- should only fully remove if the attribute refers to 'this'. + bool Dtor = isa(D); + for (unsigned i=0,n=LocksToRemove.size(); iremoveLock(FSet, LocksToRemove[i], Loc, Dtor); } } @@ -1351,7 +1909,7 @@ void BuildLockset::VisitBinaryOperator(BinaryOperator *BO) { return; // adjust the context - LVarCtx = LocalVarMap.getNextContext(CtxIndex, BO, LVarCtx); + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, BO, LVarCtx); Expr *LHSExp = BO->getLHS()->IgnoreParenCasts(); checkAccess(LHSExp, AK_Written); @@ -1383,13 +1941,17 @@ void BuildLockset::VisitCXXConstructExpr(CXXConstructExpr *Exp) { void BuildLockset::VisitDeclStmt(DeclStmt *S) { // adjust the context - LVarCtx = LocalVarMap.getNextContext(CtxIndex, S, LVarCtx); + LVarCtx = Analyzer->LocalVarMap.getNextContext(CtxIndex, S, LVarCtx); DeclGroupRef DGrp = S->getDeclGroup(); for (DeclGroupRef::iterator I = DGrp.begin(), E = DGrp.end(); I != E; ++I) { Decl *D = *I; if (VarDecl *VD = dyn_cast_or_null(D)) { Expr *E = VD->getInit(); + // handle constructors that involve temporaries + if (ExprWithCleanups *EWC = dyn_cast_or_null(E)) + E = EWC->getSubExpr(); + if (CXXConstructExpr *CE = dyn_cast_or_null(E)) { NamedDecl *CtorD = dyn_cast_or_null(CE->getConstructor()); if (!CtorD || !CtorD->hasAttrs()) @@ -1401,6 +1963,7 @@ void BuildLockset::VisitDeclStmt(DeclStmt *S) { } + /// \brief Compute the intersection of two locksets and issue warnings for any /// locks in the symmetric difference. /// @@ -1409,58 +1972,80 @@ void BuildLockset::VisitDeclStmt(DeclStmt *S) { /// A; if () then B; else C; D; we need to check that the lockset after B and C /// are the same. In the event of a difference, we use the intersection of these /// two locksets at the start of D. -Lockset ThreadSafetyAnalyzer::intersectAndWarn(const CFGBlockInfo &Block1, - CFGBlockSide Side1, - const CFGBlockInfo &Block2, - CFGBlockSide Side2, - LockErrorKind LEK) { - Lockset LSet1 = Block1.getSet(Side1); - Lockset LSet2 = Block2.getSet(Side2); - - Lockset Intersection = LSet1; - for (Lockset::iterator I = LSet2.begin(), E = LSet2.end(); I != E; ++I) { - const MutexID &LSet2Mutex = I.getKey(); - const LockData &LSet2LockData = I.getData(); - if (const LockData *LD = LSet1.lookup(LSet2Mutex)) { - if (LD->LKind != LSet2LockData.LKind) { - Handler.handleExclusiveAndShared(LSet2Mutex.getName(), - LSet2LockData.AcquireLoc, - LD->AcquireLoc); - if (LD->LKind != LK_Exclusive) - Intersection = LocksetFactory.add(Intersection, LSet2Mutex, - LSet2LockData); +/// +/// \param LSet1 The first lockset. +/// \param LSet2 The second lockset. +/// \param JoinLoc The location of the join point for error reporting +/// \param LEK1 The error message to report if a mutex is missing from LSet1 +/// \param LEK2 The error message to report if a mutex is missing from Lset2 +void ThreadSafetyAnalyzer::intersectAndWarn(FactSet &FSet1, + const FactSet &FSet2, + SourceLocation JoinLoc, + LockErrorKind LEK1, + LockErrorKind LEK2, + bool Modify) { + FactSet FSet1Orig = FSet1; + + for (FactSet::const_iterator I = FSet2.begin(), E = FSet2.end(); + I != E; ++I) { + const SExpr &FSet2Mutex = FactMan[*I].MutID; + const LockData &LDat2 = FactMan[*I].LDat; + + if (const LockData *LDat1 = FSet1.findLock(FactMan, FSet2Mutex)) { + if (LDat1->LKind != LDat2.LKind) { + Handler.handleExclusiveAndShared(FSet2Mutex.toString(), + LDat2.AcquireLoc, + LDat1->AcquireLoc); + if (Modify && LDat1->LKind != LK_Exclusive) { + FSet1.removeLock(FactMan, FSet2Mutex); + FSet1.addLock(FactMan, FSet2Mutex, LDat2); + } } } else { - Handler.handleMutexHeldEndOfScope(LSet2Mutex.getName(), - LSet2LockData.AcquireLoc, - Block1.getLocation(Side1), LEK); + if (LDat2.UnderlyingMutex.isValid()) { + if (FSet2.findLock(FactMan, LDat2.UnderlyingMutex)) { + // If this is a scoped lock that manages another mutex, and if the + // underlying mutex is still held, then warn about the underlying + // mutex. + Handler.handleMutexHeldEndOfScope(LDat2.UnderlyingMutex.toString(), + LDat2.AcquireLoc, + JoinLoc, LEK1); + } + } + else if (!LDat2.Managed) + Handler.handleMutexHeldEndOfScope(FSet2Mutex.toString(), + LDat2.AcquireLoc, + JoinLoc, LEK1); } } - for (Lockset::iterator I = LSet1.begin(), E = LSet1.end(); I != E; ++I) { - if (!LSet2.contains(I.getKey())) { - const MutexID &Mutex = I.getKey(); - const LockData &MissingLock = I.getData(); - Handler.handleMutexHeldEndOfScope(Mutex.getName(), - MissingLock.AcquireLoc, - Block2.getLocation(Side2), LEK); - Intersection = LocksetFactory.remove(Intersection, Mutex); + for (FactSet::const_iterator I = FSet1.begin(), E = FSet1.end(); + I != E; ++I) { + const SExpr &FSet1Mutex = FactMan[*I].MutID; + const LockData &LDat1 = FactMan[*I].LDat; + + if (!FSet2.findLock(FactMan, FSet1Mutex)) { + if (LDat1.UnderlyingMutex.isValid()) { + if (FSet1Orig.findLock(FactMan, LDat1.UnderlyingMutex)) { + // If this is a scoped lock that manages another mutex, and if the + // underlying mutex is still held, then warn about the underlying + // mutex. + Handler.handleMutexHeldEndOfScope(LDat1.UnderlyingMutex.toString(), + LDat1.AcquireLoc, + JoinLoc, LEK1); + } + } + else if (!LDat1.Managed) + Handler.handleMutexHeldEndOfScope(FSet1Mutex.toString(), + LDat1.AcquireLoc, + JoinLoc, LEK2); + if (Modify) + FSet1.removeLock(FactMan, FSet1Mutex); } } - return Intersection; } -Lockset ThreadSafetyAnalyzer::addLock(Lockset &LSet, Expr *MutexExp, - const NamedDecl *D, - LockKind LK, SourceLocation Loc) { - MutexID Mutex(MutexExp, 0, D); - if (!Mutex.isValid()) { - MutexID::warnInvalidLock(Handler, MutexExp, 0, D); - return LSet; - } - LockData NewLock(Loc, LK); - return LocksetFactory.add(LSet, Mutex, NewLock); -} + /// \brief Check a function's CFG for thread-safety violations. /// @@ -1472,6 +2057,8 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (!CFGraph) return; const NamedDecl *D = dyn_cast_or_null(AC.getDecl()); + // AC.dumpCFG(true); + if (!D) return; // Ignore anonymous functions for now. if (D->getAttr()) @@ -1485,8 +2072,8 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { if (isa(D)) return; // Don't check inside destructors. - std::vector BlockInfo(CFGraph->getNumBlockIDs(), - CFGBlockInfo::getEmptyBlockInfo(LocksetFactory, LocalVarMap)); + BlockInfo.resize(CFGraph->getNumBlockIDs(), + CFGBlockInfo::getEmptyBlockInfo(LocalVarMap)); // We need to explore the CFG via a "topological" ordering. // That way, we will be guaranteed to have information about required @@ -1505,27 +2092,22 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { // FIXME: is there a more intelligent way to check lock/unlock functions? if (!SortedGraph->empty() && D->hasAttrs()) { const CFGBlock *FirstBlock = *SortedGraph->begin(); - Lockset &InitialLockset = BlockInfo[FirstBlock->getBlockID()].EntrySet; + FactSet &InitialLockset = BlockInfo[FirstBlock->getBlockID()].EntrySet; const AttrVec &ArgAttrs = D->getAttrs(); + + MutexIDList ExclusiveLocksToAdd; + MutexIDList SharedLocksToAdd; + + SourceLocation Loc = D->getLocation(); for (unsigned i = 0; i < ArgAttrs.size(); ++i) { Attr *Attr = ArgAttrs[i]; - SourceLocation AttrLoc = Attr->getLocation(); - if (SharedLocksRequiredAttr *SLRAttr - = dyn_cast(Attr)) { - for (SharedLocksRequiredAttr::args_iterator - SLRIter = SLRAttr->args_begin(), - SLREnd = SLRAttr->args_end(); SLRIter != SLREnd; ++SLRIter) - InitialLockset = addLock(InitialLockset, - *SLRIter, D, LK_Shared, - AttrLoc); - } else if (ExclusiveLocksRequiredAttr *ELRAttr - = dyn_cast(Attr)) { - for (ExclusiveLocksRequiredAttr::args_iterator - ELRIter = ELRAttr->args_begin(), - ELREnd = ELRAttr->args_end(); ELRIter != ELREnd; ++ELRIter) - InitialLockset = addLock(InitialLockset, - *ELRIter, D, LK_Exclusive, - AttrLoc); + Loc = Attr->getLocation(); + if (ExclusiveLocksRequiredAttr *A + = dyn_cast(Attr)) { + getMutexIDs(ExclusiveLocksToAdd, A, (Expr*) 0, D); + } else if (SharedLocksRequiredAttr *A + = dyn_cast(Attr)) { + getMutexIDs(SharedLocksToAdd, A, (Expr*) 0, D); } else if (isa(Attr)) { // Don't try to check unlock functions for now return; @@ -1535,8 +2117,24 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { } else if (isa(Attr)) { // Don't try to check lock functions for now return; + } else if (isa(Attr)) { + // Don't try to check trylock functions for now + return; + } else if (isa(Attr)) { + // Don't try to check trylock functions for now + return; } } + + // FIXME -- Loc can be wrong here. + for (unsigned i=0,n=ExclusiveLocksToAdd.size(); ibegin(), @@ -1587,15 +2185,16 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { int PrevBlockID = (*PI)->getBlockID(); CFGBlockInfo *PrevBlockInfo = &BlockInfo[PrevBlockID]; + FactSet PrevLockset; + getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, *PI, CurrBlock); if (!LocksetInitialized) { - CurrBlockInfo->EntrySet = PrevBlockInfo->ExitSet; + CurrBlockInfo->EntrySet = PrevLockset; LocksetInitialized = true; } else { - CurrBlockInfo->EntrySet = - intersectAndWarn(*CurrBlockInfo, CBS_Entry, - *PrevBlockInfo, CBS_Exit, - LEK_LockedSomePredecessors); + intersectAndWarn(CurrBlockInfo->EntrySet, PrevLockset, + CurrBlockInfo->EntryLoc, + LEK_LockedSomePredecessors); } } @@ -1619,23 +2218,20 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { const Stmt *Terminator = PrevBlock->getTerminator(); bool IsLoop = Terminator && isa(Terminator); + FactSet PrevLockset; + getEdgeLockset(PrevLockset, PrevBlockInfo->ExitSet, + PrevBlock, CurrBlock); + // Do not update EntrySet. - intersectAndWarn(*CurrBlockInfo, CBS_Entry, *PrevBlockInfo, CBS_Exit, + intersectAndWarn(CurrBlockInfo->EntrySet, PrevLockset, + PrevBlockInfo->ExitLoc, IsLoop ? LEK_LockedSomeLoopIterations - : LEK_LockedSomePredecessors); + : LEK_LockedSomePredecessors, + false); } } BuildLockset LocksetBuilder(this, *CurrBlockInfo); - CFGBlock::const_pred_iterator PI = CurrBlock->pred_begin(), - PE = CurrBlock->pred_end(); - if (PI != PE) { - // If the predecessor ended in a branch, then process any trylocks. - // FIXME -- check to make sure there's only one predecessor. - if (Stmt *TCE = (*PI)->getTerminatorCondition()) { - LocksetBuilder.handleTrylock(TCE, *PI, CurrBlock); - } - } // Visit all the statements in the basic block. for (CFGBlock::const_iterator BI = CurrBlock->begin(), @@ -1665,7 +2261,7 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { break; } } - CurrBlockInfo->ExitSet = LocksetBuilder.LSet; + CurrBlockInfo->ExitSet = LocksetBuilder.FSet; // For every back edge from CurrBlock (the end of the loop) to another block // (FirstLoopBlock) we need to check that the Lockset of Block is equal to @@ -1679,19 +2275,24 @@ void ThreadSafetyAnalyzer::runAnalysis(AnalysisDeclContext &AC) { continue; CFGBlock *FirstLoopBlock = *SI; - CFGBlockInfo &PreLoop = BlockInfo[FirstLoopBlock->getBlockID()]; - CFGBlockInfo &LoopEnd = BlockInfo[CurrBlockID]; - intersectAndWarn(LoopEnd, CBS_Exit, PreLoop, CBS_Entry, - LEK_LockedSomeLoopIterations); + CFGBlockInfo *PreLoop = &BlockInfo[FirstLoopBlock->getBlockID()]; + CFGBlockInfo *LoopEnd = &BlockInfo[CurrBlockID]; + intersectAndWarn(LoopEnd->ExitSet, PreLoop->EntrySet, + PreLoop->EntryLoc, + LEK_LockedSomeLoopIterations, + false); } } - CFGBlockInfo &Initial = BlockInfo[CFGraph->getEntry().getBlockID()]; - CFGBlockInfo &Final = BlockInfo[CFGraph->getExit().getBlockID()]; + CFGBlockInfo *Initial = &BlockInfo[CFGraph->getEntry().getBlockID()]; + CFGBlockInfo *Final = &BlockInfo[CFGraph->getExit().getBlockID()]; // FIXME: Should we call this function for all blocks which exit the function? - intersectAndWarn(Initial, CBS_Entry, Final, CBS_Exit, - LEK_LockedAtEndOfFunction); + intersectAndWarn(Initial->EntrySet, Final->ExitSet, + Final->ExitLoc, + LEK_LockedAtEndOfFunction, + LEK_NotLockedAtEndOfFunction, + false); } } // end anonymous namespace diff --git a/lib/Analysis/UninitializedValues.cpp b/lib/Analysis/UninitializedValues.cpp index 1c7e6b6..858be45 100644 --- a/lib/Analysis/UninitializedValues.cpp +++ b/lib/Analysis/UninitializedValues.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/PackedVector.h" #include "llvm/ADT/DenseMap.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/AnalysisContext.h" @@ -25,6 +26,8 @@ using namespace clang; +#define DEBUG_LOGGING 0 + static bool isTrackedVar(const VarDecl *vd, const DeclContext *dc) { if (vd->isLocalVarDecl() && !vd->hasGlobalStorage() && !vd->isExceptionVariable() && @@ -95,143 +98,79 @@ static bool isAlwaysUninit(const Value v) { namespace { typedef llvm::PackedVector ValueVector; -typedef std::pair BVPair; class CFGBlockValues { const CFG &cfg; - BVPair *vals; + std::vector vals; ValueVector scratch; DeclToIndex declToIndex; - - ValueVector &lazyCreate(ValueVector *&bv); public: CFGBlockValues(const CFG &cfg); ~CFGBlockValues(); - + unsigned getNumEntries() const { return declToIndex.size(); } void computeSetOfDeclarations(const DeclContext &dc); - ValueVector &getValueVector(const CFGBlock *block, - const CFGBlock *dstBlock); - - BVPair &getValueVectors(const CFGBlock *block, bool shouldLazyCreate); + ValueVector &getValueVector(const CFGBlock *block) { + return *vals[block->getBlockID()]; + } + void setAllScratchValues(Value V); void mergeIntoScratch(ValueVector const &source, bool isFirst); bool updateValueVectorWithScratch(const CFGBlock *block); - bool updateValueVectors(const CFGBlock *block, const BVPair &newVals); bool hasNoDeclarations() const { return declToIndex.size() == 0; } void resetScratch(); - ValueVector &getScratch() { return scratch; } ValueVector::reference operator[](const VarDecl *vd); + + Value getValue(const CFGBlock *block, const CFGBlock *dstBlock, + const VarDecl *vd) { + const llvm::Optional &idx = declToIndex.getValueIndex(vd); + assert(idx.hasValue()); + return getValueVector(block)[idx.getValue()]; + } }; } // end anonymous namespace -CFGBlockValues::CFGBlockValues(const CFG &c) : cfg(c), vals(0) { - unsigned n = cfg.getNumBlockIDs(); - if (!n) - return; - vals = new std::pair[n]; - memset((void*)vals, 0, sizeof(*vals) * n); -} +CFGBlockValues::CFGBlockValues(const CFG &c) : cfg(c), vals(0) {} CFGBlockValues::~CFGBlockValues() { - unsigned n = cfg.getNumBlockIDs(); - if (n == 0) - return; - for (unsigned i = 0; i < n; ++i) { - delete vals[i].first; - delete vals[i].second; - } - delete [] vals; + for (std::vector::iterator I = vals.begin(), E = vals.end(); + I != E; ++I) + delete *I; } void CFGBlockValues::computeSetOfDeclarations(const DeclContext &dc) { declToIndex.computeMap(dc); - scratch.resize(declToIndex.size()); -} - -ValueVector &CFGBlockValues::lazyCreate(ValueVector *&bv) { - if (!bv) - bv = new ValueVector(declToIndex.size()); - return *bv; -} - -/// This function pattern matches for a '&&' or '||' that appears at -/// the beginning of a CFGBlock that also (1) has a terminator and -/// (2) has no other elements. If such an expression is found, it is returned. -static const BinaryOperator *getLogicalOperatorInChain(const CFGBlock *block) { - if (block->empty()) - return 0; - - CFGElement front = block->front(); - const CFGStmt *cstmt = front.getAs(); - if (!cstmt) - return 0; - - const BinaryOperator *b = dyn_cast_or_null(cstmt->getStmt()); - - if (!b || !b->isLogicalOp()) - return 0; - - if (block->pred_size() == 2) { - if (block->getTerminatorCondition() == b) { - if (block->succ_size() == 2) - return b; - } - else if (block->size() == 1) - return b; - } - - return 0; -} - -ValueVector &CFGBlockValues::getValueVector(const CFGBlock *block, - const CFGBlock *dstBlock) { - unsigned idx = block->getBlockID(); - if (dstBlock && getLogicalOperatorInChain(block)) { - if (*block->succ_begin() == dstBlock) - return lazyCreate(vals[idx].first); - assert(*(block->succ_begin()+1) == dstBlock); - return lazyCreate(vals[idx].second); - } - - assert(vals[idx].second == 0); - return lazyCreate(vals[idx].first); -} - -BVPair &CFGBlockValues::getValueVectors(const clang::CFGBlock *block, - bool shouldLazyCreate) { - unsigned idx = block->getBlockID(); - lazyCreate(vals[idx].first); - if (shouldLazyCreate) - lazyCreate(vals[idx].second); - return vals[idx]; + unsigned decls = declToIndex.size(); + scratch.resize(decls); + unsigned n = cfg.getNumBlockIDs(); + if (!n) + return; + vals.resize(n); + for (unsigned i = 0; i < n; ++i) + vals[i] = new ValueVector(decls); } -#if 0 +#if DEBUG_LOGGING static void printVector(const CFGBlock *block, ValueVector &bv, unsigned num) { - llvm::errs() << block->getBlockID() << " :"; for (unsigned i = 0; i < bv.size(); ++i) { llvm::errs() << ' ' << bv[i]; } llvm::errs() << " : " << num << '\n'; } +#endif -static void printVector(const char *name, ValueVector const &bv) { - llvm::errs() << name << " : "; - for (unsigned i = 0; i < bv.size(); ++i) { - llvm::errs() << ' ' << bv[i]; - } - llvm::errs() << "\n"; +void CFGBlockValues::setAllScratchValues(Value V) { + for (unsigned I = 0, E = scratch.size(); I != E; ++I) + scratch[I] = V; } -#endif void CFGBlockValues::mergeIntoScratch(ValueVector const &source, bool isFirst) { @@ -242,30 +181,16 @@ void CFGBlockValues::mergeIntoScratch(ValueVector const &source, } bool CFGBlockValues::updateValueVectorWithScratch(const CFGBlock *block) { - ValueVector &dst = getValueVector(block, 0); + ValueVector &dst = getValueVector(block); bool changed = (dst != scratch); if (changed) dst = scratch; -#if 0 +#if DEBUG_LOGGING printVector(block, scratch, 0); #endif return changed; } -bool CFGBlockValues::updateValueVectors(const CFGBlock *block, - const BVPair &newVals) { - BVPair &vals = getValueVectors(block, true); - bool changed = *newVals.first != *vals.first || - *newVals.second != *vals.second; - *vals.first = *newVals.first; - *vals.second = *newVals.second; -#if 0 - printVector(block, *vals.first, 1); - printVector(block, *vals.second, 2); -#endif - return changed; -} - void CFGBlockValues::resetScratch() { scratch.reset(); } @@ -321,7 +246,7 @@ const CFGBlock *DataflowWorklist::dequeue() { } //------------------------------------------------------------------------====// -// Transfer function for uninitialized values analysis. +// Classification of DeclRefExprs as use or initialization. //====------------------------------------------------------------------------// namespace { @@ -329,106 +254,339 @@ class FindVarResult { const VarDecl *vd; const DeclRefExpr *dr; public: - FindVarResult(VarDecl *vd, DeclRefExpr *dr) : vd(vd), dr(dr) {} - + FindVarResult(const VarDecl *vd, const DeclRefExpr *dr) : vd(vd), dr(dr) {} + const DeclRefExpr *getDeclRefExpr() const { return dr; } const VarDecl *getDecl() const { return vd; } }; - + +static const Expr *stripCasts(ASTContext &C, const Expr *Ex) { + while (Ex) { + Ex = Ex->IgnoreParenNoopCasts(C); + if (const CastExpr *CE = dyn_cast(Ex)) { + if (CE->getCastKind() == CK_LValueBitCast) { + Ex = CE->getSubExpr(); + continue; + } + } + break; + } + return Ex; +} + +/// If E is an expression comprising a reference to a single variable, find that +/// variable. +static FindVarResult findVar(const Expr *E, const DeclContext *DC) { + if (const DeclRefExpr *DRE = + dyn_cast(stripCasts(DC->getParentASTContext(), E))) + if (const VarDecl *VD = dyn_cast(DRE->getDecl())) + if (isTrackedVar(VD, DC)) + return FindVarResult(VD, DRE); + return FindVarResult(0, 0); +} + +/// \brief Classify each DeclRefExpr as an initialization or a use. Any +/// DeclRefExpr which isn't explicitly classified will be assumed to have +/// escaped the analysis and will be treated as an initialization. +class ClassifyRefs : public StmtVisitor { +public: + enum Class { + Init, + Use, + SelfInit, + Ignore + }; + +private: + const DeclContext *DC; + llvm::DenseMap Classification; + + bool isTrackedVar(const VarDecl *VD) const { + return ::isTrackedVar(VD, DC); + } + + void classify(const Expr *E, Class C); + +public: + ClassifyRefs(AnalysisDeclContext &AC) : DC(cast(AC.getDecl())) {} + + void VisitDeclStmt(DeclStmt *DS); + void VisitUnaryOperator(UnaryOperator *UO); + void VisitBinaryOperator(BinaryOperator *BO); + void VisitCallExpr(CallExpr *CE); + void VisitCastExpr(CastExpr *CE); + + void operator()(Stmt *S) { Visit(S); } + + Class get(const DeclRefExpr *DRE) const { + llvm::DenseMap::const_iterator I + = Classification.find(DRE); + if (I != Classification.end()) + return I->second; + + const VarDecl *VD = dyn_cast(DRE->getDecl()); + if (!VD || !isTrackedVar(VD)) + return Ignore; + + return Init; + } +}; +} + +static const DeclRefExpr *getSelfInitExpr(VarDecl *VD) { + if (Expr *Init = VD->getInit()) { + const DeclRefExpr *DRE + = dyn_cast(stripCasts(VD->getASTContext(), Init)); + if (DRE && DRE->getDecl() == VD) + return DRE; + } + return 0; +} + +void ClassifyRefs::classify(const Expr *E, Class C) { + FindVarResult Var = findVar(E, DC); + if (const DeclRefExpr *DRE = Var.getDeclRefExpr()) + Classification[DRE] = std::max(Classification[DRE], C); +} + +void ClassifyRefs::VisitDeclStmt(DeclStmt *DS) { + for (DeclStmt::decl_iterator DI = DS->decl_begin(), DE = DS->decl_end(); + DI != DE; ++DI) { + VarDecl *VD = dyn_cast(*DI); + if (VD && isTrackedVar(VD)) + if (const DeclRefExpr *DRE = getSelfInitExpr(VD)) + Classification[DRE] = SelfInit; + } +} + +void ClassifyRefs::VisitBinaryOperator(BinaryOperator *BO) { + // Ignore the evaluation of a DeclRefExpr on the LHS of an assignment. If this + // is not a compound-assignment, we will treat it as initializing the variable + // when TransferFunctions visits it. A compound-assignment does not affect + // whether a variable is uninitialized, and there's no point counting it as a + // use. + if (BO->isCompoundAssignmentOp()) + classify(BO->getLHS(), Use); + else if (BO->getOpcode() == BO_Assign) + classify(BO->getLHS(), Ignore); +} + +void ClassifyRefs::VisitUnaryOperator(UnaryOperator *UO) { + // Increment and decrement are uses despite there being no lvalue-to-rvalue + // conversion. + if (UO->isIncrementDecrementOp()) + classify(UO->getSubExpr(), Use); +} + +void ClassifyRefs::VisitCallExpr(CallExpr *CE) { + // If a value is passed by const reference to a function, we should not assume + // that it is initialized by the call, and we conservatively do not assume + // that it is used. + for (CallExpr::arg_iterator I = CE->arg_begin(), E = CE->arg_end(); + I != E; ++I) + if ((*I)->getType().isConstQualified() && (*I)->isGLValue()) + classify(*I, Ignore); +} + +void ClassifyRefs::VisitCastExpr(CastExpr *CE) { + if (CE->getCastKind() == CK_LValueToRValue) + classify(CE->getSubExpr(), Use); + else if (CStyleCastExpr *CSE = dyn_cast(CE)) { + if (CSE->getType()->isVoidType()) { + // Squelch any detected load of an uninitialized value if + // we cast it to void. + // e.g. (void) x; + classify(CSE->getSubExpr(), Ignore); + } + } +} + +//------------------------------------------------------------------------====// +// Transfer function for uninitialized values analysis. +//====------------------------------------------------------------------------// + +namespace { class TransferFunctions : public StmtVisitor { CFGBlockValues &vals; const CFG &cfg; + const CFGBlock *block; AnalysisDeclContext ∾ + const ClassifyRefs &classification; UninitVariablesHandler *handler; - - /// The last DeclRefExpr seen when analyzing a block. Used to - /// cheat when detecting cases when the address of a variable is taken. - DeclRefExpr *lastDR; - - /// The last lvalue-to-rvalue conversion of a variable whose value - /// was uninitialized. Normally this results in a warning, but it is - /// possible to either silence the warning in some cases, or we - /// propagate the uninitialized value. - CastExpr *lastLoad; - - /// For some expressions, we want to ignore any post-processing after - /// visitation. - bool skipProcessUses; - + public: TransferFunctions(CFGBlockValues &vals, const CFG &cfg, - AnalysisDeclContext &ac, + const CFGBlock *block, AnalysisDeclContext &ac, + const ClassifyRefs &classification, UninitVariablesHandler *handler) - : vals(vals), cfg(cfg), ac(ac), handler(handler), - lastDR(0), lastLoad(0), - skipProcessUses(false) {} - - void reportUninit(const DeclRefExpr *ex, const VarDecl *vd, - bool isAlwaysUninit); + : vals(vals), cfg(cfg), block(block), ac(ac), + classification(classification), handler(handler) {} + void reportUse(const Expr *ex, const VarDecl *vd); + + void VisitObjCForCollectionStmt(ObjCForCollectionStmt *FS); void VisitBlockExpr(BlockExpr *be); + void VisitCallExpr(CallExpr *ce); void VisitDeclStmt(DeclStmt *ds); void VisitDeclRefExpr(DeclRefExpr *dr); - void VisitUnaryOperator(UnaryOperator *uo); void VisitBinaryOperator(BinaryOperator *bo); - void VisitCastExpr(CastExpr *ce); - void VisitObjCForCollectionStmt(ObjCForCollectionStmt *fs); - void Visit(Stmt *s); - + bool isTrackedVar(const VarDecl *vd) { return ::isTrackedVar(vd, cast(ac.getDecl())); } - - FindVarResult findBlockVarDecl(Expr *ex); - - void ProcessUses(Stmt *s = 0); -}; -} -static const Expr *stripCasts(ASTContext &C, const Expr *Ex) { - while (Ex) { - Ex = Ex->IgnoreParenNoopCasts(C); - if (const CastExpr *CE = dyn_cast(Ex)) { - if (CE->getCastKind() == CK_LValueBitCast) { - Ex = CE->getSubExpr(); - continue; + FindVarResult findVar(const Expr *ex) { + return ::findVar(ex, cast(ac.getDecl())); + } + + UninitUse getUninitUse(const Expr *ex, const VarDecl *vd, Value v) { + UninitUse Use(ex, isAlwaysUninit(v)); + + assert(isUninitialized(v)); + if (Use.getKind() == UninitUse::Always) + return Use; + + // If an edge which leads unconditionally to this use did not initialize + // the variable, we can say something stronger than 'may be uninitialized': + // we can say 'either it's used uninitialized or you have dead code'. + // + // We track the number of successors of a node which have been visited, and + // visit a node once we have visited all of its successors. Only edges where + // the variable might still be uninitialized are followed. Since a variable + // can't transfer from being initialized to being uninitialized, this will + // trace out the subgraph which inevitably leads to the use and does not + // initialize the variable. We do not want to skip past loops, since their + // non-termination might be correlated with the initialization condition. + // + // For example: + // + // void f(bool a, bool b) { + // block1: int n; + // if (a) { + // block2: if (b) + // block3: n = 1; + // block4: } else if (b) { + // block5: while (!a) { + // block6: do_work(&a); + // n = 2; + // } + // } + // block7: if (a) + // block8: g(); + // block9: return n; + // } + // + // Starting from the maybe-uninitialized use in block 9: + // * Block 7 is not visited because we have only visited one of its two + // successors. + // * Block 8 is visited because we've visited its only successor. + // From block 8: + // * Block 7 is visited because we've now visited both of its successors. + // From block 7: + // * Blocks 1, 2, 4, 5, and 6 are not visited because we didn't visit all + // of their successors (we didn't visit 4, 3, 5, 6, and 5, respectively). + // * Block 3 is not visited because it initializes 'n'. + // Now the algorithm terminates, having visited blocks 7 and 8, and having + // found the frontier is blocks 2, 4, and 5. + // + // 'n' is definitely uninitialized for two edges into block 7 (from blocks 2 + // and 4), so we report that any time either of those edges is taken (in + // each case when 'b == false'), 'n' is used uninitialized. + llvm::SmallVector Queue; + llvm::SmallVector SuccsVisited(cfg.getNumBlockIDs(), 0); + Queue.push_back(block); + // Specify that we've already visited all successors of the starting block. + // This has the dual purpose of ensuring we never add it to the queue, and + // of marking it as not being a candidate element of the frontier. + SuccsVisited[block->getBlockID()] = block->succ_size(); + while (!Queue.empty()) { + const CFGBlock *B = Queue.back(); + Queue.pop_back(); + for (CFGBlock::const_pred_iterator I = B->pred_begin(), E = B->pred_end(); + I != E; ++I) { + const CFGBlock *Pred = *I; + if (vals.getValue(Pred, B, vd) == Initialized) + // This block initializes the variable. + continue; + + unsigned &SV = SuccsVisited[Pred->getBlockID()]; + if (!SV) { + // When visiting the first successor of a block, mark all NULL + // successors as having been visited. + for (CFGBlock::const_succ_iterator SI = Pred->succ_begin(), + SE = Pred->succ_end(); + SI != SE; ++SI) + if (!*SI) + ++SV; + } + + if (++SV == Pred->succ_size()) + // All paths from this block lead to the use and don't initialize the + // variable. + Queue.push_back(Pred); + } + } + + // Scan the frontier, looking for blocks where the variable was + // uninitialized. + for (CFG::const_iterator BI = cfg.begin(), BE = cfg.end(); BI != BE; ++BI) { + const CFGBlock *Block = *BI; + unsigned BlockID = Block->getBlockID(); + const Stmt *Term = Block->getTerminator(); + if (SuccsVisited[BlockID] && SuccsVisited[BlockID] < Block->succ_size() && + Term) { + // This block inevitably leads to the use. If we have an edge from here + // to a post-dominator block, and the variable is uninitialized on that + // edge, we have found a bug. + for (CFGBlock::const_succ_iterator I = Block->succ_begin(), + E = Block->succ_end(); I != E; ++I) { + const CFGBlock *Succ = *I; + if (Succ && SuccsVisited[Succ->getBlockID()] >= Succ->succ_size() && + vals.getValue(Block, Succ, vd) == Uninitialized) { + // Switch cases are a special case: report the label to the caller + // as the 'terminator', not the switch statement itself. Suppress + // situations where no label matched: we can't be sure that's + // possible. + if (isa(Term)) { + const Stmt *Label = Succ->getLabel(); + if (!Label || !isa(Label)) + // Might not be possible. + continue; + UninitUse::Branch Branch; + Branch.Terminator = Label; + Branch.Output = 0; // Ignored. + Use.addUninitBranch(Branch); + } else { + UninitUse::Branch Branch; + Branch.Terminator = Term; + Branch.Output = I - Block->succ_begin(); + Use.addUninitBranch(Branch); + } + } + } } } - break; - } - return Ex; -} -void TransferFunctions::reportUninit(const DeclRefExpr *ex, - const VarDecl *vd, bool isAlwaysUnit) { - if (handler) handler->handleUseOfUninitVariable(ex, vd, isAlwaysUnit); + return Use; + } +}; } -FindVarResult TransferFunctions::findBlockVarDecl(Expr *ex) { - if (DeclRefExpr *dr = dyn_cast(ex->IgnoreParenCasts())) - if (VarDecl *vd = dyn_cast(dr->getDecl())) - if (isTrackedVar(vd)) - return FindVarResult(vd, dr); - return FindVarResult(0, 0); +void TransferFunctions::reportUse(const Expr *ex, const VarDecl *vd) { + if (!handler) + return; + Value v = vals[vd]; + if (isUninitialized(v)) + handler->handleUseOfUninitVariable(vd, getUninitUse(ex, vd, v)); } -void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *fs) { +void TransferFunctions::VisitObjCForCollectionStmt(ObjCForCollectionStmt *FS) { // This represents an initialization of the 'element' value. - Stmt *element = fs->getElement(); - const VarDecl *vd = 0; - - if (DeclStmt *ds = dyn_cast(element)) { - vd = cast(ds->getSingleDecl()); - if (!isTrackedVar(vd)) - vd = 0; - } else { - // Initialize the value of the reference variable. - const FindVarResult &res = findBlockVarDecl(cast(element)); - vd = res.getDecl(); + if (DeclStmt *DS = dyn_cast(FS->getElement())) { + const VarDecl *VD = cast(DS->getSingleDecl()); + if (isTrackedVar(VD)) + vals[VD] = Initialized; } - - if (vd) - vals[vd] = Initialized; } void TransferFunctions::VisitBlockExpr(BlockExpr *be) { @@ -442,231 +600,112 @@ void TransferFunctions::VisitBlockExpr(BlockExpr *be) { vals[vd] = Initialized; continue; } - Value v = vals[vd]; - if (handler && isUninitialized(v)) - handler->handleUseOfUninitVariable(be, vd, isAlwaysUninit(v)); + reportUse(be, vd); } } -void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *dr) { - // Record the last DeclRefExpr seen. This is an lvalue computation. - // We use this value to later detect if a variable "escapes" the analysis. - if (const VarDecl *vd = dyn_cast(dr->getDecl())) - if (isTrackedVar(vd)) { - ProcessUses(); - lastDR = dr; - } -} - -void TransferFunctions::VisitDeclStmt(DeclStmt *ds) { - for (DeclStmt::decl_iterator DI = ds->decl_begin(), DE = ds->decl_end(); - DI != DE; ++DI) { - if (VarDecl *vd = dyn_cast(*DI)) { - if (isTrackedVar(vd)) { - if (Expr *init = vd->getInit()) { - // If the initializer consists solely of a reference to itself, we - // explicitly mark the variable as uninitialized. This allows code - // like the following: - // - // int x = x; - // - // to deliberately leave a variable uninitialized. Different analysis - // clients can detect this pattern and adjust their reporting - // appropriately, but we need to continue to analyze subsequent uses - // of the variable. - if (init == lastLoad) { - const DeclRefExpr *DR - = cast(stripCasts(ac.getASTContext(), - lastLoad->getSubExpr())); - if (DR->getDecl() == vd) { - // int x = x; - // Propagate uninitialized value, but don't immediately report - // a problem. - vals[vd] = Uninitialized; - lastLoad = 0; - lastDR = 0; - if (handler) - handler->handleSelfInit(vd); - return; - } - } - - // All other cases: treat the new variable as initialized. - // This is a minor optimization to reduce the propagation - // of the analysis, since we will have already reported - // the use of the uninitialized value (which visiting the - // initializer). - vals[vd] = Initialized; - } - } - } - } +void TransferFunctions::VisitCallExpr(CallExpr *ce) { + // After a call to a function like setjmp or vfork, any variable which is + // initialized anywhere within this function may now be initialized. For now, + // just assume such a call initializes all variables. + // FIXME: Only mark variables as initialized if they have an initializer which + // is reachable from here. + Decl *Callee = ce->getCalleeDecl(); + if (Callee && Callee->hasAttr()) + vals.setAllScratchValues(Initialized); } -void TransferFunctions::VisitBinaryOperator(clang::BinaryOperator *bo) { - if (bo->isAssignmentOp()) { - const FindVarResult &res = findBlockVarDecl(bo->getLHS()); - if (const VarDecl *vd = res.getDecl()) { - ValueVector::reference val = vals[vd]; - if (isUninitialized(val)) { - if (bo->getOpcode() != BO_Assign) - reportUninit(res.getDeclRefExpr(), vd, isAlwaysUninit(val)); - else - val = Initialized; - } - } +void TransferFunctions::VisitDeclRefExpr(DeclRefExpr *dr) { + switch (classification.get(dr)) { + case ClassifyRefs::Ignore: + break; + case ClassifyRefs::Use: + reportUse(dr, cast(dr->getDecl())); + break; + case ClassifyRefs::Init: + vals[cast(dr->getDecl())] = Initialized; + break; + case ClassifyRefs::SelfInit: + if (handler) + handler->handleSelfInit(cast(dr->getDecl())); + break; } } -void TransferFunctions::VisitUnaryOperator(clang::UnaryOperator *uo) { - switch (uo->getOpcode()) { - case clang::UO_PostDec: - case clang::UO_PostInc: - case clang::UO_PreDec: - case clang::UO_PreInc: { - const FindVarResult &res = findBlockVarDecl(uo->getSubExpr()); - if (const VarDecl *vd = res.getDecl()) { - assert(res.getDeclRefExpr() == lastDR); - // We null out lastDR to indicate we have fully processed it - // and we don't want the auto-value setting in Visit(). - lastDR = 0; - - ValueVector::reference val = vals[vd]; - if (isUninitialized(val)) - reportUninit(res.getDeclRefExpr(), vd, isAlwaysUninit(val)); - } - break; - } - default: - break; +void TransferFunctions::VisitBinaryOperator(BinaryOperator *BO) { + if (BO->getOpcode() == BO_Assign) { + FindVarResult Var = findVar(BO->getLHS()); + if (const VarDecl *VD = Var.getDecl()) + vals[VD] = Initialized; } } -void TransferFunctions::VisitCastExpr(clang::CastExpr *ce) { - if (ce->getCastKind() == CK_LValueToRValue) { - const FindVarResult &res = findBlockVarDecl(ce->getSubExpr()); - if (res.getDecl()) { - assert(res.getDeclRefExpr() == lastDR); - lastLoad = ce; - } - } - else if (ce->getCastKind() == CK_NoOp || - ce->getCastKind() == CK_LValueBitCast) { - skipProcessUses = true; - } - else if (CStyleCastExpr *cse = dyn_cast(ce)) { - if (cse->getType()->isVoidType()) { - // e.g. (void) x; - if (lastLoad == cse->getSubExpr()) { - // Squelch any detected load of an uninitialized value if - // we cast it to void. - lastLoad = 0; - lastDR = 0; +void TransferFunctions::VisitDeclStmt(DeclStmt *DS) { + for (DeclStmt::decl_iterator DI = DS->decl_begin(), DE = DS->decl_end(); + DI != DE; ++DI) { + VarDecl *VD = dyn_cast(*DI); + if (VD && isTrackedVar(VD)) { + if (getSelfInitExpr(VD)) { + // If the initializer consists solely of a reference to itself, we + // explicitly mark the variable as uninitialized. This allows code + // like the following: + // + // int x = x; + // + // to deliberately leave a variable uninitialized. Different analysis + // clients can detect this pattern and adjust their reporting + // appropriately, but we need to continue to analyze subsequent uses + // of the variable. + vals[VD] = Uninitialized; + } else if (VD->getInit()) { + // Treat the new variable as initialized. + vals[VD] = Initialized; + } else { + // No initializer: the variable is now uninitialized. This matters + // for cases like: + // while (...) { + // int n; + // use(n); + // n = 0; + // } + // FIXME: Mark the variable as uninitialized whenever its scope is + // left, since its scope could be re-entered by a jump over the + // declaration. + vals[VD] = Uninitialized; } } } } -void TransferFunctions::Visit(clang::Stmt *s) { - skipProcessUses = false; - StmtVisitor::Visit(s); - if (!skipProcessUses) - ProcessUses(s); -} - -void TransferFunctions::ProcessUses(Stmt *s) { - // This method is typically called after visiting a CFGElement statement - // in the CFG. We delay processing of reporting many loads of uninitialized - // values until here. - if (lastLoad) { - // If we just visited the lvalue-to-rvalue cast, there is nothing - // left to do. - if (lastLoad == s) - return; - - const DeclRefExpr *DR = - cast(stripCasts(ac.getASTContext(), - lastLoad->getSubExpr())); - const VarDecl *VD = cast(DR->getDecl()); - - // If we reach here, we may have seen a load of an uninitialized value - // and it hasn't been casted to void or otherwise handled. In this - // situation, report the incident. - if (isUninitialized(vals[VD])) - reportUninit(DR, VD, isAlwaysUninit(vals[VD])); - - lastLoad = 0; - - if (DR == lastDR) { - lastDR = 0; - return; - } - } - - // Any other uses of 'lastDR' involve taking an lvalue of variable. - // In this case, it "escapes" the analysis. - if (lastDR && lastDR != s) { - vals[cast(lastDR->getDecl())] = Initialized; - lastDR = 0; - } -} - //------------------------------------------------------------------------====// // High-level "driver" logic for uninitialized values analysis. //====------------------------------------------------------------------------// static bool runOnBlock(const CFGBlock *block, const CFG &cfg, AnalysisDeclContext &ac, CFGBlockValues &vals, + const ClassifyRefs &classification, llvm::BitVector &wasAnalyzed, UninitVariablesHandler *handler = 0) { - wasAnalyzed[block->getBlockID()] = true; - - if (const BinaryOperator *b = getLogicalOperatorInChain(block)) { - CFGBlock::const_pred_iterator itr = block->pred_begin(); - BVPair vA = vals.getValueVectors(*itr, false); - ++itr; - BVPair vB = vals.getValueVectors(*itr, false); - - BVPair valsAB; - - if (b->getOpcode() == BO_LAnd) { - // Merge the 'F' bits from the first and second. - vals.mergeIntoScratch(*(vA.second ? vA.second : vA.first), true); - vals.mergeIntoScratch(*(vB.second ? vB.second : vB.first), false); - valsAB.first = vA.first; - valsAB.second = &vals.getScratch(); - } else { - // Merge the 'T' bits from the first and second. - assert(b->getOpcode() == BO_LOr); - vals.mergeIntoScratch(*vA.first, true); - vals.mergeIntoScratch(*vB.first, false); - valsAB.first = &vals.getScratch(); - valsAB.second = vA.second ? vA.second : vA.first; - } - return vals.updateValueVectors(block, valsAB); - } - - // Default behavior: merge in values of predecessor blocks. vals.resetScratch(); + // Merge in values of predecessor blocks. bool isFirst = true; for (CFGBlock::const_pred_iterator I = block->pred_begin(), E = block->pred_end(); I != E; ++I) { const CFGBlock *pred = *I; if (wasAnalyzed[pred->getBlockID()]) { - vals.mergeIntoScratch(vals.getValueVector(pred, block), isFirst); + vals.mergeIntoScratch(vals.getValueVector(pred), isFirst); isFirst = false; } } // Apply the transfer function. - TransferFunctions tf(vals, cfg, ac, handler); + TransferFunctions tf(vals, cfg, block, ac, classification, handler); for (CFGBlock::const_iterator I = block->begin(), E = block->end(); I != E; ++I) { if (const CFGStmt *cs = dyn_cast(&*I)) { tf.Visit(const_cast(cs->getStmt())); } } - tf.ProcessUses(); return vals.updateValueVectorWithScratch(block); } @@ -683,17 +722,16 @@ void clang::runUninitializedVariablesAnalysis( stats.NumVariablesAnalyzed = vals.getNumEntries(); + // Precompute which expressions are uses and which are initializations. + ClassifyRefs classification(ac); + cfg.VisitBlockStmts(classification); + // Mark all variables uninitialized at the entry. const CFGBlock &entry = cfg.getEntry(); - for (CFGBlock::const_succ_iterator i = entry.succ_begin(), - e = entry.succ_end(); i != e; ++i) { - if (const CFGBlock *succ = *i) { - ValueVector &vec = vals.getValueVector(&entry, succ); - const unsigned n = vals.getNumEntries(); - for (unsigned j = 0; j < n ; ++j) { - vec[j] = Uninitialized; - } - } + ValueVector &vec = vals.getValueVector(&entry); + const unsigned n = vals.getNumEntries(); + for (unsigned j = 0; j < n ; ++j) { + vec[j] = Uninitialized; } // Proceed with the workist. @@ -705,7 +743,8 @@ void clang::runUninitializedVariablesAnalysis( while (const CFGBlock *block = worklist.dequeue()) { // Did the block change? - bool changed = runOnBlock(block, cfg, ac, vals, wasAnalyzed); + bool changed = runOnBlock(block, cfg, ac, vals, + classification, wasAnalyzed); ++stats.NumBlockVisits; if (changed || !previouslyVisited[block->getBlockID()]) worklist.enqueueSuccessors(block); @@ -716,7 +755,7 @@ void clang::runUninitializedVariablesAnalysis( for (CFG::const_iterator BI = cfg.begin(), BE = cfg.end(); BI != BE; ++BI) { const CFGBlock *block = *BI; if (wasAnalyzed[block->getBlockID()]) { - runOnBlock(block, cfg, ac, vals, wasAnalyzed, &handler); + runOnBlock(block, cfg, ac, vals, classification, wasAnalyzed, &handler); ++stats.NumBlockVisits; } } diff --git a/lib/Basic/CMakeLists.txt b/lib/Basic/CMakeLists.txt index ef2e93c..73e693b 100644 --- a/lib/Basic/CMakeLists.txt +++ b/lib/Basic/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS mc) add_clang_library(clangBasic Builtins.cpp ConvertUTF.c + ConvertUTFWrapper.cpp Diagnostic.cpp DiagnosticIDs.cpp FileManager.cpp @@ -10,6 +11,7 @@ add_clang_library(clangBasic IdentifierTable.cpp LangOptions.cpp Module.cpp + ObjCRuntime.cpp SourceLocation.cpp SourceManager.cpp TargetInfo.cpp @@ -31,18 +33,19 @@ if (Subversion_FOUND AND EXISTS "${CLANG_SOURCE_DIR}/.svn") PROPERTIES COMPILE_DEFINITIONS "SVN_REVISION=\"${CLANG_WC_REVISION}\"") endif() -add_dependencies(clangBasic - ClangARMNeon - ClangAttrList - ClangDiagnosticAnalysis - ClangDiagnosticAST - ClangDiagnosticCommon - ClangDiagnosticDriver - ClangDiagnosticFrontend - ClangDiagnosticGroups - ClangDiagnosticLex - ClangDiagnosticParse - ClangDiagnosticSema - ClangDiagnosticSerialization - ClangDiagnosticIndexName) - +add_dependencies(clangBasic + ClangARMNeon + ClangAttrList + ClangDiagnosticAnalysis + ClangDiagnosticAST + ClangDiagnosticComment + ClangDiagnosticCommon + ClangDiagnosticDriver + ClangDiagnosticFrontend + ClangDiagnosticGroups + ClangDiagnosticIndexName + ClangDiagnosticLex + ClangDiagnosticParse + ClangDiagnosticSema + ClangDiagnosticSerialization + ) diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c index e197003..4793b25 100644 --- a/lib/Basic/ConvertUTF.c +++ b/lib/Basic/ConvertUTF.c @@ -285,6 +285,7 @@ ConversionResult ConvertUTF16toUTF8 ( *targetStart = target; return result; } +#endif /* --------------------------------------------------------------------- */ @@ -339,8 +340,6 @@ ConversionResult ConvertUTF32toUTF8 ( return result; } -#endif - /* --------------------------------------------------------------------- */ /* diff --git a/lib/Basic/ConvertUTFWrapper.cpp b/lib/Basic/ConvertUTFWrapper.cpp new file mode 100644 index 0000000..a1b3f7f --- /dev/null +++ b/lib/Basic/ConvertUTFWrapper.cpp @@ -0,0 +1,70 @@ +//===-- ConvertUTFWrapper.cpp - Wrap ConvertUTF.h with clang data types -----=== +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/ConvertUTF.h" +#include "clang/Basic/LLVM.h" + +namespace clang { + +bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, + char *&ResultPtr) { + assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); + ConversionResult result = conversionOK; + // Copy the character span over. + if (WideCharWidth == 1) { + if (!isLegalUTF8String(reinterpret_cast(Source.begin()), + reinterpret_cast(Source.end()))) + result = sourceIllegal; + memcpy(ResultPtr, Source.data(), Source.size()); + ResultPtr += Source.size(); + } else if (WideCharWidth == 2) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF16 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF16( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 2*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + } else if (WideCharWidth == 4) { + const UTF8 *sourceStart = (const UTF8*)Source.data(); + // FIXME: Make the type of the result buffer correct instead of + // using reinterpret_cast. + UTF32 *targetStart = reinterpret_cast(ResultPtr); + ConversionFlags flags = strictConversion; + result = ConvertUTF8toUTF32( + &sourceStart, sourceStart + Source.size(), + &targetStart, targetStart + 4*Source.size(), flags); + if (result == conversionOK) + ResultPtr = reinterpret_cast(targetStart); + } + assert((result != targetExhausted) + && "ConvertUTF8toUTFXX exhausted target buffer"); + return result == conversionOK; +} + +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast(TargetStart); + return true; +} + +} // end namespace clang + diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp index f7d5d87..e689502 100644 --- a/lib/Basic/Diagnostic.cpp +++ b/lib/Basic/Diagnostic.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/CrashRecoveryContext.h" +#include using namespace clang; @@ -48,6 +49,9 @@ DiagnosticsEngine::DiagnosticsEngine( ErrorsAsFatal = false; SuppressSystemWarnings = false; SuppressAllDiagnostics = false; + ElideType = true; + PrintTemplateTree = false; + ShowColors = false; ShowOverloads = Ovl_All; ExtBehavior = Ext_Ignore; @@ -115,7 +119,7 @@ void DiagnosticsEngine::Reset() { // Create a DiagState and DiagStatePoint representing diagnostic changes // through command-line. DiagStates.push_back(DiagState()); - PushDiagStatePoint(&DiagStates.back(), SourceLocation()); + DiagStatePoints.push_back(DiagStatePoint(&DiagStates.back(), FullSourceLoc())); } void DiagnosticsEngine::SetDelayedDiagnostic(unsigned DiagID, StringRef Arg1, @@ -155,12 +159,6 @@ DiagnosticsEngine::GetDiagStatePointForLoc(SourceLocation L) const { return Pos; } -/// \brief This allows the client to specify that certain -/// warnings are ignored. Notes can never be mapped, errors can only be -/// mapped to fatal, and WARNINGs and EXTENSIONs can be mapped arbitrarily. -/// -/// \param The source location that this change of diagnostic state should -/// take affect. It can be null if we are setting the latest state. void DiagnosticsEngine::setDiagnosticMapping(diag::kind Diag, diag::Mapping Map, SourceLocation L) { assert(Diag < diag::DIAG_UPPER_LIMIT && @@ -385,17 +383,34 @@ void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) { CurDiagID = ~0U; } -bool DiagnosticsEngine::EmitCurrentDiagnostic() { - // Process the diagnostic, sending the accumulated information to the - // DiagnosticConsumer. - bool Emitted = ProcessDiag(); +bool DiagnosticsEngine::EmitCurrentDiagnostic(bool Force) { + assert(getClient() && "DiagnosticClient not set!"); + + bool Emitted; + if (Force) { + Diagnostic Info(this); + + // Figure out the diagnostic level of this message. + DiagnosticIDs::Level DiagLevel + = Diags->getDiagnosticLevel(Info.getID(), Info.getLocation(), *this); + + Emitted = (DiagLevel != DiagnosticIDs::Ignored); + if (Emitted) { + // Emit the diagnostic regardless of suppression level. + Diags->EmitDiag(*this, DiagLevel); + } + } else { + // Process the diagnostic, sending the accumulated information to the + // DiagnosticConsumer. + Emitted = ProcessDiag(); + } // Clear out the current diagnostic object. unsigned DiagID = CurDiagID; Clear(); // If there was a delayed diagnostic, emit it now. - if (DelayedDiagID && DelayedDiagID != DiagID) + if (!Force && DelayedDiagID && DelayedDiagID != DiagID) ReportDelayed(); return Emitted; @@ -666,6 +681,8 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, /// QualTypeVals - Pass a vector of arrays so that QualType names can be /// compared to see if more information is needed to be printed. SmallVector QualTypeVals; + SmallVector Tree; + for (unsigned i = 0, e = getNumArgs(); i < e; ++i) if (getArgKind(i) == DiagnosticsEngine::ak_qualtype) QualTypeVals.push_back(getRawArg(i)); @@ -717,7 +734,20 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, assert(isdigit(*DiagStr) && "Invalid format for argument in diagnostic"); unsigned ArgNo = *DiagStr++ - '0'; + // Only used for type diffing. + unsigned ArgNo2 = ArgNo; + DiagnosticsEngine::ArgumentKind Kind = getArgKind(ArgNo); + if (Kind == DiagnosticsEngine::ak_qualtype && + ModifierIs(Modifier, ModifierLen, "diff")) { + Kind = DiagnosticsEngine::ak_qualtype_pair; + assert(*DiagStr == ',' && isdigit(*(DiagStr + 1)) && + "Invalid format for diff modifier"); + ++DiagStr; // Comma. + ArgNo2 = *DiagStr++ - '0'; + assert(getArgKind(ArgNo2) == DiagnosticsEngine::ak_qualtype && + "Second value of type diff must be a qualtype"); + } switch (Kind) { // ---- STRINGS ---- @@ -802,18 +832,91 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd, FormattedArgs.data(), FormattedArgs.size(), OutStr, QualTypeVals); break; + case DiagnosticsEngine::ak_qualtype_pair: + // Create a struct with all the info needed for printing. + TemplateDiffTypes TDT; + TDT.FromType = getRawArg(ArgNo); + TDT.ToType = getRawArg(ArgNo2); + TDT.ElideType = getDiags()->ElideType; + TDT.ShowColors = getDiags()->ShowColors; + TDT.TemplateDiffUsed = false; + intptr_t val = reinterpret_cast(&TDT); + + const char *ArgumentEnd = Argument + ArgumentLen; + const char *Pipe = ScanFormat(Argument, ArgumentEnd, '|'); + + // Print the tree. If this diagnostic already has a tree, skip the + // second tree. + if (getDiags()->PrintTemplateTree && Tree.empty()) { + TDT.PrintFromType = true; + TDT.PrintTree = true; + getDiags()->ConvertArgToString(Kind, val, + Modifier, ModifierLen, + Argument, ArgumentLen, + FormattedArgs.data(), + FormattedArgs.size(), + Tree, QualTypeVals); + // If there is no tree information, fall back to regular printing. + if (!Tree.empty()) { + FormatDiagnostic(Pipe + 1, ArgumentEnd, OutStr); + break; + } + } + + // Non-tree printing, also the fall-back when tree printing fails. + // The fall-back is triggered when the types compared are not templates. + const char *FirstDollar = ScanFormat(Argument, ArgumentEnd, '$'); + const char *SecondDollar = ScanFormat(FirstDollar + 1, ArgumentEnd, '$'); + + // Append before text + FormatDiagnostic(Argument, FirstDollar, OutStr); + + // Append first type + TDT.PrintTree = false; + TDT.PrintFromType = true; + getDiags()->ConvertArgToString(Kind, val, + Modifier, ModifierLen, + Argument, ArgumentLen, + FormattedArgs.data(), FormattedArgs.size(), + OutStr, QualTypeVals); + if (!TDT.TemplateDiffUsed) + FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, + TDT.FromType)); + + // Append middle text + FormatDiagnostic(FirstDollar + 1, SecondDollar, OutStr); + + // Append second type + TDT.PrintFromType = false; + getDiags()->ConvertArgToString(Kind, val, + Modifier, ModifierLen, + Argument, ArgumentLen, + FormattedArgs.data(), FormattedArgs.size(), + OutStr, QualTypeVals); + if (!TDT.TemplateDiffUsed) + FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_qualtype, + TDT.ToType)); + + // Append end text + FormatDiagnostic(SecondDollar + 1, Pipe, OutStr); + break; } // Remember this argument info for subsequent formatting operations. Turn // std::strings into a null terminated string to make it be the same case as // all the other ones. - if (Kind != DiagnosticsEngine::ak_std_string) + if (Kind == DiagnosticsEngine::ak_qualtype_pair) + continue; + else if (Kind != DiagnosticsEngine::ak_std_string) FormattedArgs.push_back(std::make_pair(Kind, getRawArg(ArgNo))); else FormattedArgs.push_back(std::make_pair(DiagnosticsEngine::ak_c_string, (intptr_t)getArgStdStr(ArgNo).c_str())); } + + // Append the type tree to the end of the diagnostics. + OutStr.append(Tree.begin(), Tree.end()); } StoredDiagnostic::StoredDiagnostic() { } diff --git a/lib/Basic/DiagnosticIDs.cpp b/lib/Basic/DiagnosticIDs.cpp index 8c33a96..ca96fd2 100644 --- a/lib/Basic/DiagnosticIDs.cpp +++ b/lib/Basic/DiagnosticIDs.cpp @@ -79,6 +79,7 @@ static const StaticDiagInfoRec StaticDiagInfo[] = { #include "clang/Basic/DiagnosticLexKinds.inc" #include "clang/Basic/DiagnosticParseKinds.inc" #include "clang/Basic/DiagnosticASTKinds.inc" +#include "clang/Basic/DiagnosticCommentKinds.inc" #include "clang/Basic/DiagnosticSemaKinds.inc" #include "clang/Basic/DiagnosticAnalysisKinds.inc" #undef DIAG @@ -357,7 +358,7 @@ DiagnosticIDs::getDiagnosticLevel(unsigned DiagID, SourceLocation Loc, return CustomDiagInfo->getLevel(DiagID); unsigned DiagClass = getBuiltinDiagClass(DiagID); - assert(DiagClass != CLASS_NOTE && "Cannot get diagnostic level of a note!"); + if (DiagClass == CLASS_NOTE) return DiagnosticIDs::Note; return getDiagnosticLevel(DiagID, DiagClass, Loc, Diag); } @@ -583,24 +584,9 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { assert(Diag.getClient() && "DiagnosticClient not set!"); // Figure out the diagnostic level of this message. - DiagnosticIDs::Level DiagLevel; unsigned DiagID = Info.getID(); - - if (DiagID >= diag::DIAG_UPPER_LIMIT) { - // Handle custom diagnostics, which cannot be mapped. - DiagLevel = CustomDiagInfo->getLevel(DiagID); - } else { - // Get the class of the diagnostic. If this is a NOTE, map it onto whatever - // the diagnostic level was for the previous diagnostic so that it is - // filtered the same as the previous diagnostic. - unsigned DiagClass = getBuiltinDiagClass(DiagID); - if (DiagClass == CLASS_NOTE) { - DiagLevel = DiagnosticIDs::Note; - } else { - DiagLevel = getDiagnosticLevel(DiagID, DiagClass, Info.getLocation(), - Diag); - } - } + DiagnosticIDs::Level DiagLevel + = getDiagnosticLevel(DiagID, Info.getLocation(), Diag); if (DiagLevel != DiagnosticIDs::Note) { // Record that a fatal error occurred only when we see a second @@ -658,6 +644,14 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { } // Finally, report it. + EmitDiag(Diag, DiagLevel); + return true; +} + +void DiagnosticIDs::EmitDiag(DiagnosticsEngine &Diag, Level DiagLevel) const { + Diagnostic Info(&Diag); + assert(DiagLevel != DiagnosticIDs::Ignored && "Cannot emit ignored diagnostics!"); + Diag.Client->HandleDiagnostic((DiagnosticsEngine::Level)DiagLevel, Info); if (Diag.Client->IncludeInDiagnosticCounts()) { if (DiagLevel == DiagnosticIDs::Warning) @@ -665,8 +659,6 @@ bool DiagnosticIDs::ProcessDiag(DiagnosticsEngine &Diag) const { } Diag.CurDiagID = ~0U; - - return true; } bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const { diff --git a/lib/Basic/FileManager.cpp b/lib/Basic/FileManager.cpp index fd6d334..c6b894c 100644 --- a/lib/Basic/FileManager.cpp +++ b/lib/Basic/FileManager.cpp @@ -111,6 +111,14 @@ public: } size_t size() const { return UniqueFiles.size(); } + + void erase(const FileEntry *Entry) { + std::string FullPath(GetFullPath(Entry->getName())); + + // Lowercase string because Windows filesystem is case insensitive. + FullPath = StringRef(FullPath).lower(); + UniqueFiles.erase(FullPath); + } }; //===----------------------------------------------------------------------===// @@ -152,6 +160,8 @@ public: } size_t size() const { return UniqueFiles.size(); } + + void erase(const FileEntry *Entry) { UniqueFiles.erase(*Entry); } }; #endif @@ -213,6 +223,10 @@ void FileManager::removeStatCache(FileSystemStatCache *statCache) { PrevCache->setNextStatCache(statCache->getNextStatCache()); } +void FileManager::clearStatCaches() { + StatCache.reset(0); +} + /// \brief Retrieve the directory that the given file name resides in. /// Filename can point to either a real file or a virtual file. static const DirectoryEntry *getDirectoryFromFile(FileManager &FileMgr, @@ -259,16 +273,14 @@ void FileManager::addAncestorsAsVirtualDirs(StringRef Path) { addAncestorsAsVirtualDirs(DirName); } -/// getDirectory - Lookup, cache, and verify the specified directory -/// (real or virtual). This returns NULL if the directory doesn't -/// exist. -/// const DirectoryEntry *FileManager::getDirectory(StringRef DirName, bool CacheFailure) { - // stat doesn't like trailing separators. + // stat doesn't like trailing separators except for root directory. // At least, on Win32 MSVCRT, stat() cannot strip trailing '/'. // (though it can strip '\\') - if (DirName.size() > 1 && llvm::sys::path::is_separator(DirName.back())) + if (DirName.size() > 1 && + DirName != llvm::sys::path::root_path(DirName) && + llvm::sys::path::is_separator(DirName.back())) DirName = DirName.substr(0, DirName.size()-1); ++NumDirLookups; @@ -315,9 +327,6 @@ const DirectoryEntry *FileManager::getDirectory(StringRef DirName, return &UDE; } -/// getFile - Lookup, cache, and verify the specified file (real or -/// virtual). This returns NULL if the file doesn't exist. -/// const FileEntry *FileManager::getFile(StringRef Filename, bool openFile, bool CacheFailure) { ++NumFileLookups; @@ -483,15 +492,21 @@ void FileManager::FixupRelativePath(SmallVectorImpl &path) const { } llvm::MemoryBuffer *FileManager:: -getBufferForFile(const FileEntry *Entry, std::string *ErrorStr) { +getBufferForFile(const FileEntry *Entry, std::string *ErrorStr, + bool isVolatile) { OwningPtr Result; llvm::error_code ec; + uint64_t FileSize = Entry->getSize(); + // If there's a high enough chance that the file have changed since we + // got its size, force a stat before opening it. + if (isVolatile) + FileSize = -1; + const char *Filename = Entry->getName(); // If the file is already open, use the open file descriptor. if (Entry->FD != -1) { - ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result, - Entry->getSize()); + ec = llvm::MemoryBuffer::getOpenFile(Entry->FD, Filename, Result, FileSize); if (ErrorStr) *ErrorStr = ec.message(); @@ -503,7 +518,7 @@ getBufferForFile(const FileEntry *Entry, std::string *ErrorStr) { // Otherwise, open the file. if (FileSystemOpts.WorkingDir.empty()) { - ec = llvm::MemoryBuffer::getFile(Filename, Result, Entry->getSize()); + ec = llvm::MemoryBuffer::getFile(Filename, Result, FileSize); if (ec && ErrorStr) *ErrorStr = ec.message(); return Result.take(); @@ -511,7 +526,7 @@ getBufferForFile(const FileEntry *Entry, std::string *ErrorStr) { SmallString<128> FilePath(Entry->getName()); FixupRelativePath(FilePath); - ec = llvm::MemoryBuffer::getFile(FilePath.str(), Result, Entry->getSize()); + ec = llvm::MemoryBuffer::getFile(FilePath.str(), Result, FileSize); if (ec && ErrorStr) *ErrorStr = ec.message(); return Result.take(); @@ -564,6 +579,18 @@ bool FileManager::getNoncachedStatValue(StringRef Path, return ::stat(FilePath.c_str(), &StatBuf) != 0; } +void FileManager::invalidateCache(const FileEntry *Entry) { + assert(Entry && "Cannot invalidate a NULL FileEntry"); + + SeenFileEntries.erase(Entry->getName()); + + // FileEntry invalidation should not block future optimizations in the file + // caches. Possible alternatives are cache truncation (invalidate last N) or + // invalidation of the whole cache. + UniqueRealFiles.erase(Entry); +} + + void FileManager::GetUniqueIDMapping( SmallVectorImpl &UIDToFiles) const { UIDToFiles.clear(); @@ -584,6 +611,12 @@ void FileManager::GetUniqueIDMapping( UIDToFiles[(*VFE)->getUID()] = *VFE; } +void FileManager::modifyFileEntry(FileEntry *File, + off_t Size, time_t ModificationTime) { + File->Size = Size; + File->ModTime = ModificationTime; +} + void FileManager::PrintStats() const { llvm::errs() << "\n*** File Manager Stats:\n"; diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp index 43899f0..4869ae1 100644 --- a/lib/Basic/IdentifierTable.cpp +++ b/lib/Basic/IdentifierTable.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" +#include #include using namespace clang; @@ -103,7 +104,8 @@ namespace { KEYOPENCL = 0x200, KEYC11 = 0x400, KEYARC = 0x800, - KEYALL = 0x0fff + KEYNOMS = 0x01000, + KEYALL = (0xffff & ~KEYNOMS) // Because KEYNOMS is used to exclude. }; } @@ -136,6 +138,9 @@ static void AddKeyword(StringRef Keyword, else if (LangOpts.ObjC2 && (Flags & KEYARC)) AddResult = 2; else if (LangOpts.CPlusPlus && (Flags & KEYCXX0X)) AddResult = 3; + // Don't add this keyword under MicrosoftMode. + if (LangOpts.MicrosoftMode && (Flags & KEYNOMS)) + return; // Don't add this keyword if disabled in this language. if (AddResult == 0) return; @@ -154,8 +159,8 @@ static void AddCXXOperatorKeyword(StringRef Keyword, Info.setIsCPlusPlusOperatorKeyword(); } -/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or -/// "property". +/// AddObjCKeyword - Register an Objective-C \@keyword like "class" "selector" +/// or "property". static void AddObjCKeyword(StringRef Name, tok::ObjCKeywordKind ObjCID, IdentifierTable &Table) { @@ -335,22 +340,22 @@ public: unsigned Selector::getNumArgs() const { unsigned IIF = getIdentifierInfoFlag(); - if (IIF == ZeroArg) + if (IIF <= ZeroArg) return 0; if (IIF == OneArg) return 1; - // We point to a MultiKeywordSelector (pointer doesn't contain any flags). - MultiKeywordSelector *SI = reinterpret_cast(InfoPtr); + // We point to a MultiKeywordSelector. + MultiKeywordSelector *SI = getMultiKeywordSelector(); return SI->getNumArgs(); } IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const { - if (getIdentifierInfoFlag()) { + if (getIdentifierInfoFlag() < MultiArg) { assert(argIndex == 0 && "illegal keyword index"); return getAsIdentifierInfo(); } - // We point to a MultiKeywordSelector (pointer doesn't contain any flags). - MultiKeywordSelector *SI = reinterpret_cast(InfoPtr); + // We point to a MultiKeywordSelector. + MultiKeywordSelector *SI = getMultiKeywordSelector(); return SI->getIdentifierInfoForSlot(argIndex); } @@ -375,7 +380,7 @@ std::string Selector::getAsString() const { if (InfoPtr == 0) return ""; - if (InfoPtr & ArgFlags) { + if (getIdentifierInfoFlag() < MultiArg) { IdentifierInfo *II = getAsIdentifierInfo(); // If the number of arguments is 0 then II is guaranteed to not be null. @@ -388,8 +393,8 @@ std::string Selector::getAsString() const { return II->getName().str() + ":"; } - // We have a multiple keyword selector (no embedded flags). - return reinterpret_cast(InfoPtr)->getName(); + // We have a multiple keyword selector. + return getMultiKeywordSelector()->getName(); } /// Interpreting the given string using the normal CamelCase diff --git a/lib/Basic/ObjCRuntime.cpp b/lib/Basic/ObjCRuntime.cpp new file mode 100644 index 0000000..9bd433a --- /dev/null +++ b/lib/Basic/ObjCRuntime.cpp @@ -0,0 +1,86 @@ +//===- ObjCRuntime.cpp - Objective-C Runtime Handling -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ObjCRuntime class, which represents the +// target Objective-C runtime. +// +//===----------------------------------------------------------------------===// +#include "clang/Basic/ObjCRuntime.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; + +std::string ObjCRuntime::getAsString() const { + std::string Result; + { + llvm::raw_string_ostream Out(Result); + Out << *this; + } + return Result; +} + +raw_ostream &clang::operator<<(raw_ostream &out, const ObjCRuntime &value) { + switch (value.getKind()) { + case ObjCRuntime::MacOSX: out << "macosx"; break; + case ObjCRuntime::FragileMacOSX: out << "macosx-fragile"; break; + case ObjCRuntime::iOS: out << "ios"; break; + case ObjCRuntime::GNUstep: out << "gnustep"; break; + case ObjCRuntime::GCC: out << "gcc"; break; + case ObjCRuntime::ObjFW: out << "objfw"; break; + } + if (value.getVersion() > VersionTuple(0)) { + out << '-' << value.getVersion(); + } + return out; +} + +bool ObjCRuntime::tryParse(StringRef input) { + // Look for the last dash. + std::size_t dash = input.rfind('-'); + + // We permit dashes in the runtime name, and we also permit the + // version to be omitted, so if we see a dash not followed by a + // digit then we need to ignore it. + if (dash != StringRef::npos && dash + 1 != input.size() && + (input[dash+1] < '0' || input[dash+1] > '9')) { + dash = StringRef::npos; + } + + // Everything prior to that must be a valid string name. + Kind kind; + StringRef runtimeName = input.substr(0, dash); + Version = VersionTuple(0); + if (runtimeName == "macosx") { + kind = ObjCRuntime::MacOSX; + } else if (runtimeName == "macosx-fragile") { + kind = ObjCRuntime::FragileMacOSX; + } else if (runtimeName == "ios") { + kind = ObjCRuntime::iOS; + } else if (runtimeName == "gnustep") { + // If no version is specified then default to the most recent one that we + // know about. + Version = VersionTuple(1, 6); + kind = ObjCRuntime::GNUstep; + } else if (runtimeName == "gcc") { + kind = ObjCRuntime::GCC; + } else if (runtimeName == "objfw") { + kind = ObjCRuntime::ObjFW; + } else { + return true; + } + TheKind = kind; + + if (dash != StringRef::npos) { + StringRef verString = input.substr(dash + 1); + if (Version.tryParse(verString)) + return true; + } + + return false; +} diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp index cef091c..9ec2474 100644 --- a/lib/Basic/SourceManager.cpp +++ b/lib/Basic/SourceManager.cpp @@ -71,7 +71,7 @@ unsigned ContentCache::getSize() const { void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree) { - if (B == Buffer.getPointer()) { + if (B && B == Buffer.getPointer()) { assert(0 && "Replacing with the same buffer"); Buffer.setInt(DoNotFree? DoNotFreeFlag : 0); return; @@ -97,7 +97,10 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag, } std::string ErrorStr; - Buffer.setPointer(SM.getFileManager().getBufferForFile(ContentsEntry, &ErrorStr)); + bool isVolatile = SM.userFilesAreVolatile() && !IsSystemFile; + Buffer.setPointer(SM.getFileManager().getBufferForFile(ContentsEntry, + &ErrorStr, + isVolatile)); // If we were unable to open the file, then we are in an inconsistent // situation where the content cache referenced a file which no longer @@ -189,9 +192,9 @@ unsigned LineTableInfo::getLineTableFilenameID(StringRef Name) { } /// AddLineNote - Add a line note to the line table that indicates that there -/// is a #line at the specified FID/Offset location which changes the presumed +/// is a \#line at the specified FID/Offset location which changes the presumed /// location to LineNo/FilenameID. -void LineTableInfo::AddLineNote(int FID, unsigned Offset, +void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo, int FilenameID) { std::vector &Entries = LineEntries[FID]; @@ -219,10 +222,10 @@ void LineTableInfo::AddLineNote(int FID, unsigned Offset, /// AddLineNote This is the same as the previous version of AddLineNote, but is /// used for GNU line markers. If EntryExit is 0, then this doesn't change the -/// presumed #include stack. If it is 1, this is a file entry, if it is 2 then +/// presumed \#include stack. If it is 1, this is a file entry, if it is 2 then /// this is a file exit. FileKind specifies whether this is a system header or /// extern C system header. -void LineTableInfo::AddLineNote(int FID, unsigned Offset, +void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo, int FilenameID, unsigned EntryExit, SrcMgr::CharacteristicKind FileKind) { @@ -256,7 +259,7 @@ void LineTableInfo::AddLineNote(int FID, unsigned Offset, /// FindNearestLineEntry - Find the line entry nearest to FID that is before /// it. If there is no line entry before Offset in FID, return null. -const LineEntry *LineTableInfo::FindNearestLineEntry(int FID, +const LineEntry *LineTableInfo::FindNearestLineEntry(FileID FID, unsigned Offset) { const std::vector &Entries = LineEntries[FID]; assert(!Entries.empty() && "No #line entries for this FID after all!"); @@ -275,7 +278,7 @@ const LineEntry *LineTableInfo::FindNearestLineEntry(int FID, /// \brief Add a new line entry that has already been encoded into /// the internal representation of the line table. -void LineTableInfo::AddEntry(int FID, +void LineTableInfo::AddEntry(FileID FID, const std::vector &Entries) { LineEntries[FID] = Entries; } @@ -308,7 +311,7 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, if (LineTable == 0) LineTable = new LineTableInfo(); - LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID); + LineTable->AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID); } /// AddLineNote - Add a GNU line marker to the line table. @@ -353,7 +356,7 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, else if (IsFileExit) EntryExit = 2; - LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID, + LineTable->AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID, EntryExit, FileKind); } @@ -367,8 +370,10 @@ LineTableInfo &SourceManager::getLineTable() { // Private 'Create' methods. //===----------------------------------------------------------------------===// -SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr) +SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr, + bool UserFilesAreVolatile) : Diag(Diag), FileMgr(FileMgr), OverridenFilesKeepOriginalName(true), + UserFilesAreVolatile(UserFilesAreVolatile), ExternalSLocEntries(0), LineTable(0), NumLinearScans(0), NumBinaryProbes(0), FakeBufferForRecovery(0), FakeContentCacheForRecovery(0) { @@ -426,7 +431,8 @@ void SourceManager::clearIDTables() { /// getOrCreateContentCache - Create or return a cached ContentCache for the /// specified file. const ContentCache * -SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) { +SourceManager::getOrCreateContentCache(const FileEntry *FileEnt, + bool isSystemFile) { assert(FileEnt && "Didn't specify a file entry to use?"); // Do we already have information about this file? @@ -440,16 +446,22 @@ SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) { EntryAlign = std::max(8U, EntryAlign); Entry = ContentCacheAlloc.Allocate(1, EntryAlign); - // If the file contents are overridden with contents from another file, - // pass that file to ContentCache. - llvm::DenseMap::iterator - overI = OverriddenFiles.find(FileEnt); - if (overI == OverriddenFiles.end()) + if (OverriddenFilesInfo) { + // If the file contents are overridden with contents from another file, + // pass that file to ContentCache. + llvm::DenseMap::iterator + overI = OverriddenFilesInfo->OverriddenFiles.find(FileEnt); + if (overI == OverriddenFilesInfo->OverriddenFiles.end()) + new (Entry) ContentCache(FileEnt); + else + new (Entry) ContentCache(OverridenFilesKeepOriginalName ? FileEnt + : overI->second, + overI->second); + } else { new (Entry) ContentCache(FileEnt); - else - new (Entry) ContentCache(OverridenFilesKeepOriginalName ? FileEnt - : overI->second, - overI->second); + } + + Entry->IsSystemFile = isSystemFile; return Entry; } @@ -622,6 +634,8 @@ void SourceManager::overrideFileContents(const FileEntry *SourceFile, const_cast(IR)->replaceBuffer(Buffer, DoNotFree); const_cast(IR)->BufferOverridden = true; + + getOverriddenFilesInfo().OverriddenFilesWithBuffer.insert(SourceFile); } void SourceManager::overrideFileContents(const FileEntry *SourceFile, @@ -632,7 +646,20 @@ void SourceManager::overrideFileContents(const FileEntry *SourceFile, assert(FileInfos.count(SourceFile) == 0 && "This function should be called at the initialization stage, before " "any parsing occurs."); - OverriddenFiles[SourceFile] = NewFile; + getOverriddenFilesInfo().OverriddenFiles[SourceFile] = NewFile; +} + +void SourceManager::disableFileContentsOverride(const FileEntry *File) { + if (!isFileOverridden(File)) + return; + + const SrcMgr::ContentCache *IR = getOrCreateContentCache(File); + const_cast(IR)->replaceBuffer(0); + const_cast(IR)->ContentsEntry = IR->OrigEntry; + + assert(OverriddenFilesInfo); + OverriddenFilesInfo->OverriddenFiles.erase(File); + OverriddenFilesInfo->OverriddenFilesWithBuffer.erase(File); } StringRef SourceManager::getBufferData(FileID FID, bool *Invalid) const { @@ -995,9 +1022,10 @@ unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos, if (MyInvalid) return 1; - if (FilePos >= MemBuf->getBufferSize()) { + // It is okay to request a position just past the end of the buffer. + if (FilePos > MemBuf->getBufferSize()) { if (Invalid) - *Invalid = MyInvalid; + *Invalid = true; return 1; } @@ -1295,7 +1323,7 @@ SourceManager::getFileCharacteristic(SourceLocation Loc) const { assert(LineTable && "Can't have linetable entries without a LineTable!"); // See if there is a #line directive before the location. const LineEntry *Entry = - LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second); + LineTable->FindNearestLineEntry(LocInfo.first, LocInfo.second); // If this is before the first line marker, use the file characteristic. if (!Entry) @@ -1305,7 +1333,7 @@ SourceManager::getFileCharacteristic(SourceLocation Loc) const { } /// Return the filename or buffer identifier of the buffer the location is in. -/// Note that this name does not respect #line directives. Use getPresumedLoc +/// Note that this name does not respect \#line directives. Use getPresumedLoc /// for normal clients. const char *SourceManager::getBufferName(SourceLocation Loc, bool *Invalid) const { @@ -1316,7 +1344,7 @@ const char *SourceManager::getBufferName(SourceLocation Loc, /// getPresumedLoc - This method returns the "presumed" location of a -/// SourceLocation specifies. A "presumed location" can be modified by #line +/// SourceLocation specifies. A "presumed location" can be modified by \#line /// or GNU line marker directives. This provides a view on the data that a /// user should see in diagnostics, for example. /// @@ -1360,7 +1388,7 @@ PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const { assert(LineTable && "Can't have linetable entries without a LineTable!"); // See if there is a #line directive before this. If so, get it. if (const LineEntry *Entry = - LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) { + LineTable->FindNearestLineEntry(LocInfo.first, LocInfo.second)) { // If the LineEntry indicates a filename, use it. if (Entry->FilenameID != -1) Filename = LineTable->getFilename(Entry->FilenameID); @@ -1834,8 +1862,6 @@ bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS, return LOffs.first < ROffs.first; } -/// PrintStats - Print statistics to stderr. -/// void SourceManager::PrintStats() const { llvm::errs() << "\n*** Source Manager Stats:\n"; llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size() @@ -1887,10 +1913,14 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const { } size_t SourceManager::getDataStructureSizes() const { - return llvm::capacity_in_bytes(MemBufferInfos) + size_t size = llvm::capacity_in_bytes(MemBufferInfos) + llvm::capacity_in_bytes(LocalSLocEntryTable) + llvm::capacity_in_bytes(LoadedSLocEntryTable) + llvm::capacity_in_bytes(SLocEntryLoaded) - + llvm::capacity_in_bytes(FileInfos) - + llvm::capacity_in_bytes(OverriddenFiles); + + llvm::capacity_in_bytes(FileInfos); + + if (OverriddenFilesInfo) + size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles); + + return size; } diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp index 8c49486..db5941a 100644 --- a/lib/Basic/TargetInfo.cpp +++ b/lib/Basic/TargetInfo.cpp @@ -47,6 +47,7 @@ TargetInfo::TargetInfo(const std::string &T) : Triple(T) { LargeArrayMinWidth = 0; LargeArrayAlign = 0; MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0; + MaxVectorAlign = 0; SizeType = UnsignedLong; PtrDiffType = SignedLong; IntMaxType = SignedLongLong; diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index dd2a89a..883864f 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -316,6 +316,8 @@ protected: DefineStd(Builder, "linux", Opts); Builder.defineMacro("__gnu_linux__"); Builder.defineMacro("__ELF__"); + if (Triple.getEnvironment() == llvm::Triple::ANDROIDEABI) + Builder.defineMacro("__ANDROID__", "1"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) @@ -371,6 +373,7 @@ public: OpenBSDTargetInfo(const std::string &triple) : OSTargetInfo(triple) { this->UserLabelPrefix = ""; + this->TLSSupported = false; llvm::Triple Triple(triple); switch (Triple.getArch()) { @@ -391,6 +394,29 @@ public: } }; +// Bitrig Target +template +class BitrigTargetInfo : public OSTargetInfo { +protected: + virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, + MacroBuilder &Builder) const { + // Bitrig defines; list based off of gcc output + + Builder.defineMacro("__Bitrig__"); + DefineStd(Builder, "unix", Opts); + Builder.defineMacro("__ELF__"); + if (Opts.POSIXThreads) + Builder.defineMacro("_REENTRANT"); + } +public: + BitrigTargetInfo(const std::string &triple) + : OSTargetInfo(triple) { + this->UserLabelPrefix = ""; + this->TLSSupported = false; + this->MCountName = "__mcount"; + } +}; + // PSP Target template class PSPTargetInfo : public OSTargetInfo { @@ -573,12 +599,60 @@ class PPCTargetInfo : public TargetInfo { static const Builtin::Info BuiltinInfo[]; static const char * const GCCRegNames[]; static const TargetInfo::GCCRegAlias GCCRegAliases[]; + std::string CPU; public: PPCTargetInfo(const std::string& triple) : TargetInfo(triple) { LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble; } + /// \brief Flags for architecture specific defines. + typedef enum { + ArchDefineNone = 0, + ArchDefineName = 1 << 0, // is substituted for arch name. + ArchDefinePpcgr = 1 << 1, + ArchDefinePpcsq = 1 << 2, + ArchDefine440 = 1 << 3, + ArchDefine603 = 1 << 4, + ArchDefine604 = 1 << 5, + ArchDefinePwr4 = 1 << 6, + ArchDefinePwr6 = 1 << 7 + } ArchDefineTypes; + + virtual bool setCPU(const std::string &Name) { + bool CPUKnown = llvm::StringSwitch(Name) + .Case("generic", true) + .Case("440", true) + .Case("450", true) + .Case("601", true) + .Case("602", true) + .Case("603", true) + .Case("603e", true) + .Case("603ev", true) + .Case("604", true) + .Case("604e", true) + .Case("620", true) + .Case("g3", true) + .Case("7400", true) + .Case("g4", true) + .Case("7450", true) + .Case("g4+", true) + .Case("750", true) + .Case("970", true) + .Case("g5", true) + .Case("a2", true) + .Case("pwr6", true) + .Case("pwr7", true) + .Case("ppc", true) + .Case("ppc64", true) + .Default(false); + + if (CPUKnown) + CPU = Name; + + return CPUKnown; + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const { Records = BuiltinInfo; @@ -718,8 +792,6 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__POWERPC__"); if (PointerWidth == 64) { Builder.defineMacro("_ARCH_PPC64"); - Builder.defineMacro("_LP64"); - Builder.defineMacro("__LP64__"); Builder.defineMacro("__powerpc64__"); Builder.defineMacro("__ppc64__"); } else { @@ -727,7 +799,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, } // Target properties. - if (getTriple().getOS() != llvm::Triple::NetBSD) + if (getTriple().getOS() != llvm::Triple::NetBSD && + getTriple().getOS() != llvm::Triple::OpenBSD) Builder.defineMacro("_BIG_ENDIAN"); Builder.defineMacro("__BIG_ENDIAN__"); @@ -742,6 +815,47 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__VEC__", "10206"); Builder.defineMacro("__ALTIVEC__"); } + + // CPU identification. + ArchDefineTypes defs = (ArchDefineTypes)llvm::StringSwitch(CPU) + .Case("440", ArchDefineName) + .Case("450", ArchDefineName | ArchDefine440) + .Case("601", ArchDefineName) + .Case("602", ArchDefineName | ArchDefinePpcgr) + .Case("603", ArchDefineName | ArchDefinePpcgr) + .Case("603e", ArchDefineName | ArchDefine603 | ArchDefinePpcgr) + .Case("603ev", ArchDefineName | ArchDefine603 | ArchDefinePpcgr) + .Case("604", ArchDefineName | ArchDefinePpcgr) + .Case("604e", ArchDefineName | ArchDefine604 | ArchDefinePpcgr) + .Case("620", ArchDefineName | ArchDefinePpcgr) + .Case("7400", ArchDefineName | ArchDefinePpcgr) + .Case("7450", ArchDefineName | ArchDefinePpcgr) + .Case("750", ArchDefineName | ArchDefinePpcgr) + .Case("970", ArchDefineName | ArchDefinePwr4 | ArchDefinePpcgr + | ArchDefinePpcsq) + .Case("pwr6", ArchDefinePwr6 | ArchDefinePpcgr | ArchDefinePpcsq) + .Case("pwr7", ArchDefineName | ArchDefinePwr6 | ArchDefinePpcgr + | ArchDefinePpcsq) + .Default(ArchDefineNone); + + if (defs & ArchDefineName) + Builder.defineMacro(Twine("_ARCH_", StringRef(CPU).upper())); + if (defs & ArchDefinePpcgr) + Builder.defineMacro("_ARCH_PPCGR"); + if (defs & ArchDefinePpcsq) + Builder.defineMacro("_ARCH_PPCSQ"); + if (defs & ArchDefine440) + Builder.defineMacro("_ARCH_440"); + if (defs & ArchDefine603) + Builder.defineMacro("_ARCH_603"); + if (defs & ArchDefine604) + Builder.defineMacro("_ARCH_604"); + if (defs & (ArchDefinePwr4 | ArchDefinePwr6)) + Builder.defineMacro("_ARCH_PWR4"); + if (defs & ArchDefinePwr6) { + Builder.defineMacro("_ARCH_PWR5"); + Builder.defineMacro("_ARCH_PWR6"); + } } bool PPCTargetInfo::hasFeature(StringRef Feature) const { @@ -878,15 +992,9 @@ public: } } - virtual const char *getVAListDeclaration() const { + virtual BuiltinVaListKind getBuiltinVaListKind() const { // This is the ELF definition, and is overridden by the Darwin sub-target - return "typedef struct __va_list_tag {" - " unsigned char gpr;" - " unsigned char fpr;" - " unsigned short reserved;" - " void* overflow_arg_area;" - " void* reg_save_area;" - "} __builtin_va_list[1];"; + return TargetInfo::PowerABIBuiltinVaList; } }; } // end anonymous namespace. @@ -907,8 +1015,8 @@ public: LongDoubleFormat = &llvm::APFloat::IEEEdouble; } } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } }; } // end anonymous namespace. @@ -927,8 +1035,8 @@ public: DescriptionString = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" "i64:32:64-f32:32:32-f64:64:64-v128:128:128-n32"; } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } }; @@ -944,54 +1052,40 @@ public: } // end anonymous namespace. namespace { - static const unsigned PTXAddrSpaceMap[] = { - 0, // opencl_global - 4, // opencl_local - 1 // opencl_constant + static const unsigned NVPTXAddrSpaceMap[] = { + 1, // opencl_global + 3, // opencl_local + 4, // opencl_constant + 1, // cuda_device + 4, // cuda_constant + 3, // cuda_shared }; - class PTXTargetInfo : public TargetInfo { + class NVPTXTargetInfo : public TargetInfo { static const char * const GCCRegNames[]; static const Builtin::Info BuiltinInfo[]; std::vector AvailableFeatures; public: - PTXTargetInfo(const std::string& triple) : TargetInfo(triple) { + NVPTXTargetInfo(const std::string& triple) : TargetInfo(triple) { BigEndian = false; TLSSupported = false; LongWidth = LongAlign = 64; - AddrSpaceMap = &PTXAddrSpaceMap; + AddrSpaceMap = &NVPTXAddrSpaceMap; // Define available target features - // These must be defined in sorted order! - AvailableFeatures.push_back("compute10"); - AvailableFeatures.push_back("compute11"); - AvailableFeatures.push_back("compute12"); - AvailableFeatures.push_back("compute13"); - AvailableFeatures.push_back("compute20"); - AvailableFeatures.push_back("double"); - AvailableFeatures.push_back("no-fma"); - AvailableFeatures.push_back("ptx20"); - AvailableFeatures.push_back("ptx21"); - AvailableFeatures.push_back("ptx22"); - AvailableFeatures.push_back("ptx23"); - AvailableFeatures.push_back("sm10"); - AvailableFeatures.push_back("sm11"); - AvailableFeatures.push_back("sm12"); - AvailableFeatures.push_back("sm13"); - AvailableFeatures.push_back("sm20"); - AvailableFeatures.push_back("sm21"); - AvailableFeatures.push_back("sm22"); - AvailableFeatures.push_back("sm23"); + // These must be defined in sorted order! + NoAsmVariants = true; } virtual void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); + Builder.defineMacro("__NVPTX__"); } virtual void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const { Records = BuiltinInfo; - NumRecords = clang::PTX::LastTSBuiltin-Builtin::FirstTSBuiltin; + NumRecords = clang::NVPTX::LastTSBuiltin-Builtin::FirstTSBuiltin; } virtual bool hasFeature(StringRef Feature) const { - return Feature == "ptx"; + return Feature == "ptx" || Feature == "nvptx"; } virtual void getGCCRegNames(const char * const *&Names, @@ -1011,36 +1105,38 @@ namespace { // FIXME: Is this really right? return ""; } - virtual const char *getVAListDeclaration() const { + virtual BuiltinVaListKind getBuiltinVaListKind() const { // FIXME: implement - return "typedef char* __builtin_va_list;"; + return TargetInfo::CharPtrBuiltinVaList; + } + virtual bool setCPU(const std::string &Name) { + return Name == "sm_10" || Name == "sm_13" || Name == "sm_20"; } - virtual bool setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const; }; - const Builtin::Info PTXTargetInfo::BuiltinInfo[] = { + const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ ALL_LANGUAGES }, -#include "clang/Basic/BuiltinsPTX.def" +#include "clang/Basic/BuiltinsNVPTX.def" }; - const char * const PTXTargetInfo::GCCRegNames[] = { + const char * const NVPTXTargetInfo::GCCRegNames[] = { "r0" }; - void PTXTargetInfo::getGCCRegNames(const char * const *&Names, + void NVPTXTargetInfo::getGCCRegNames(const char * const *&Names, unsigned &NumNames) const { Names = GCCRegNames; NumNames = llvm::array_lengthof(GCCRegNames); } - bool PTXTargetInfo::setFeatureEnabled(llvm::StringMap &Features, - StringRef Name, - bool Enabled) const { + bool NVPTXTargetInfo::setFeatureEnabled(llvm::StringMap &Features, + StringRef Name, + bool Enabled) const { if(std::binary_search(AvailableFeatures.begin(), AvailableFeatures.end(), Name)) { Features[Name] = Enabled; @@ -1050,24 +1146,28 @@ namespace { } } - class PTX32TargetInfo : public PTXTargetInfo { + class NVPTX32TargetInfo : public NVPTXTargetInfo { public: - PTX32TargetInfo(const std::string& triple) : PTXTargetInfo(triple) { + NVPTX32TargetInfo(const std::string& triple) : NVPTXTargetInfo(triple) { PointerWidth = PointerAlign = 32; - SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedInt; + SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedInt; DescriptionString - = "e-p:32:32-i64:64:64-f64:64:64-n1:8:16:32:64"; - } + = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + } }; - class PTX64TargetInfo : public PTXTargetInfo { + class NVPTX64TargetInfo : public NVPTXTargetInfo { public: - PTX64TargetInfo(const std::string& triple) : PTXTargetInfo(triple) { + NVPTX64TargetInfo(const std::string& triple) : NVPTXTargetInfo(triple) { PointerWidth = PointerAlign = 64; - SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedLongLong; + SizeType = PtrDiffType = IntPtrType = TargetInfo::UnsignedLongLong; DescriptionString - = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64"; - } + = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-" + "f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-" + "n16:32:64"; + } }; } @@ -1096,8 +1196,8 @@ public: return Feature == "mblaze"; } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } virtual const char *getTargetPrefix() const { return "mblaze"; @@ -1245,11 +1345,16 @@ class X86TargetInfo : public TargetInfo { } MMX3DNowLevel; bool HasAES; + bool HasPCLMUL; bool HasLZCNT; + bool HasRDRND; bool HasBMI; bool HasBMI2; bool HasPOPCNT; + bool HasSSE4a; bool HasFMA4; + bool HasFMA; + bool HasXOP; /// \brief Enumeration of all of the X86 CPUs supported by Clang. /// @@ -1394,8 +1499,9 @@ class X86TargetInfo : public TargetInfo { public: X86TargetInfo(const std::string& triple) : TargetInfo(triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow), - HasAES(false), HasLZCNT(false), HasBMI(false), HasBMI2(false), - HasPOPCNT(false), HasFMA4(false), CPU(CK_Generic) { + HasAES(false), HasPCLMUL(false), HasLZCNT(false), HasRDRND(false), + HasBMI(false), HasBMI2(false), HasPOPCNT(false), HasSSE4a(false), + HasFMA4(false), HasFMA(false), HasXOP(false), CPU(CK_Generic) { BigEndian = false; LongDoubleFormat = &llvm::APFloat::x87DoubleExtended; } @@ -1577,13 +1683,17 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { Features["sse42"] = false; Features["sse4a"] = false; Features["aes"] = false; + Features["pclmul"] = false; Features["avx"] = false; Features["avx2"] = false; Features["lzcnt"] = false; + Features["rdrand"] = false; Features["bmi"] = false; Features["bmi2"] = false; Features["popcnt"] = false; Features["fma4"] = false; + Features["fma"] = false; + Features["xop"] = false; // FIXME: This *really* should not be here. @@ -1637,23 +1747,30 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { case CK_Corei7: setFeatureEnabled(Features, "mmx", true); setFeatureEnabled(Features, "sse4", true); - setFeatureEnabled(Features, "aes", true); break; case CK_Corei7AVX: + setFeatureEnabled(Features, "mmx", true); + setFeatureEnabled(Features, "avx", true); + setFeatureEnabled(Features, "aes", true); + setFeatureEnabled(Features, "pclmul", true); + break; case CK_CoreAVXi: setFeatureEnabled(Features, "mmx", true); - setFeatureEnabled(Features, "sse4", true); + setFeatureEnabled(Features, "avx", true); setFeatureEnabled(Features, "aes", true); - //setFeatureEnabled(Features, "avx", true); + setFeatureEnabled(Features, "pclmul", true); + setFeatureEnabled(Features, "rdrnd", true); break; case CK_CoreAVX2: setFeatureEnabled(Features, "mmx", true); - setFeatureEnabled(Features, "sse4", true); + setFeatureEnabled(Features, "avx2", true); setFeatureEnabled(Features, "aes", true); + setFeatureEnabled(Features, "pclmul", true); setFeatureEnabled(Features, "lzcnt", true); + setFeatureEnabled(Features, "rdrnd", true); setFeatureEnabled(Features, "bmi", true); setFeatureEnabled(Features, "bmi2", true); - //setFeatureEnabled(Features, "avx2", true); + setFeatureEnabled(Features, "fma", true); break; case CK_K6: case CK_WinChipC6: @@ -1697,11 +1814,13 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap &Features) const { case CK_BTVER1: setFeatureEnabled(Features, "ssse3", true); setFeatureEnabled(Features, "sse4a", true); + break; case CK_BDVER1: case CK_BDVER2: - setFeatureEnabled(Features, "sse4", true); - setFeatureEnabled(Features, "sse4a", true); + setFeatureEnabled(Features, "avx", true); + setFeatureEnabled(Features, "xop", true); setFeatureEnabled(Features, "aes", true); + setFeatureEnabled(Features, "pclmul", true); break; case CK_C3_2: setFeatureEnabled(Features, "mmx", true); @@ -1716,7 +1835,8 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, // FIXME: This *really* should not be here. We need some way of translating // options into llvm subtarget features. if (!Features.count(Name) && - (Name != "sse4" && Name != "sse4.2" && Name != "sse4.1")) + (Name != "sse4" && Name != "sse4.2" && Name != "sse4.1" && + Name != "rdrnd")) return false; // FIXME: this should probably use a switch with fall through. @@ -1746,7 +1866,9 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, else if (Name == "3dnowa") Features["mmx"] = Features["3dnow"] = Features["3dnowa"] = true; else if (Name == "aes") - Features["aes"] = true; + Features["sse"] = Features["sse2"] = Features["aes"] = true; + else if (Name == "pclmul") + Features["sse"] = Features["sse2"] = Features["pclmul"] = true; else if (Name == "avx") Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = @@ -1755,15 +1877,27 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = Features["popcnt"] = Features["avx"] = Features["avx2"] = true; + else if (Name == "fma") + Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = + Features["ssse3"] = Features["sse41"] = Features["sse42"] = + Features["popcnt"] = Features["avx"] = Features["fma"] = true; else if (Name == "fma4") Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = - Features["popcnt"] = Features["avx"] = Features["fma4"] = true; + Features["popcnt"] = Features["avx"] = Features["sse4a"] = + Features["fma4"] = true; + else if (Name == "xop") + Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = + Features["ssse3"] = Features["sse41"] = Features["sse42"] = + Features["popcnt"] = Features["avx"] = Features["sse4a"] = + Features["fma4"] = Features["xop"] = true; else if (Name == "sse4a") Features["mmx"] = Features["sse"] = Features["sse2"] = Features["sse3"] = - Features["lzcnt"] = Features["popcnt"] = Features["sse4a"] = true; + Features["sse4a"] = true; else if (Name == "lzcnt") Features["lzcnt"] = true; + else if (Name == "rdrnd") + Features["rdrand"] = true; else if (Name == "bmi") Features["bmi"] = true; else if (Name == "bmi2") @@ -1776,33 +1910,50 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, else if (Name == "sse") Features["sse"] = Features["sse2"] = Features["sse3"] = Features["ssse3"] = Features["sse41"] = Features["sse42"] = - Features["sse4a"] = false; + Features["sse4a"] = Features["avx"] = Features["avx2"] = + Features["fma"] = Features["fma4"] = Features["aes"] = + Features["pclmul"] = Features["xop"] = false; else if (Name == "sse2") Features["sse2"] = Features["sse3"] = Features["ssse3"] = - Features["sse41"] = Features["sse42"] = Features["sse4a"] = false; + Features["sse41"] = Features["sse42"] = Features["sse4a"] = + Features["avx"] = Features["avx2"] = Features["fma"] = + Features["fma4"] = Features["aes"] = Features["pclmul"] = + Features["xop"] = false; else if (Name == "sse3") Features["sse3"] = Features["ssse3"] = Features["sse41"] = - Features["sse42"] = Features["sse4a"] = false; + Features["sse42"] = Features["sse4a"] = Features["avx"] = + Features["avx2"] = Features["fma"] = Features["fma4"] = + Features["xop"] = false; else if (Name == "ssse3") - Features["ssse3"] = Features["sse41"] = Features["sse42"] = false; + Features["ssse3"] = Features["sse41"] = Features["sse42"] = + Features["avx"] = Features["avx2"] = Features["fma"] = false; else if (Name == "sse4" || Name == "sse4.1") - Features["sse41"] = Features["sse42"] = false; + Features["sse41"] = Features["sse42"] = Features["avx"] = + Features["avx2"] = Features["fma"] = false; else if (Name == "sse4.2") - Features["sse42"] = false; + Features["sse42"] = Features["avx"] = Features["avx2"] = + Features["fma"] = false; else if (Name == "3dnow") Features["3dnow"] = Features["3dnowa"] = false; else if (Name == "3dnowa") Features["3dnowa"] = false; else if (Name == "aes") Features["aes"] = false; + else if (Name == "pclmul") + Features["pclmul"] = false; else if (Name == "avx") - Features["avx"] = Features["avx2"] = Features["fma4"] = false; + Features["avx"] = Features["avx2"] = Features["fma"] = + Features["fma4"] = Features["xop"] = false; else if (Name == "avx2") Features["avx2"] = false; + else if (Name == "fma") + Features["fma"] = false; else if (Name == "sse4a") - Features["sse4a"] = false; + Features["sse4a"] = Features["fma4"] = Features["xop"] = false; else if (Name == "lzcnt") Features["lzcnt"] = false; + else if (Name == "rdrnd") + Features["rdrand"] = false; else if (Name == "bmi") Features["bmi"] = false; else if (Name == "bmi2") @@ -1810,7 +1961,9 @@ bool X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, else if (Name == "popcnt") Features["popcnt"] = false; else if (Name == "fma4") - Features["fma4"] = false; + Features["fma4"] = Features["xop"] = false; + else if (Name == "xop") + Features["xop"] = false; } return true; @@ -1832,11 +1985,21 @@ void X86TargetInfo::HandleTargetFeatures(std::vector &Features) { continue; } + if (Feature == "pclmul") { + HasPCLMUL = true; + continue; + } + if (Feature == "lzcnt") { HasLZCNT = true; continue; } + if (Feature == "rdrand") { + HasRDRND = true; + continue; + } + if (Feature == "bmi") { HasBMI = true; continue; @@ -1852,11 +2015,26 @@ void X86TargetInfo::HandleTargetFeatures(std::vector &Features) { continue; } + if (Feature == "sse4a") { + HasSSE4a = true; + continue; + } + if (Feature == "fma4") { HasFMA4 = true; continue; } + if (Feature == "fma") { + HasFMA = true; + continue; + } + + if (Feature == "xop") { + HasXOP = true; + continue; + } + assert(Features[i][0] == '+' && "Invalid target feature!"); X86SSEEnum Level = llvm::StringSwitch(Feature) .Case("avx2", AVX2) @@ -1894,10 +2072,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. if (PointerWidth == 64) { - if (getLongWidth() == 64) { - Builder.defineMacro("_LP64"); - Builder.defineMacro("__LP64__"); - } Builder.defineMacro("__amd64__"); Builder.defineMacro("__amd64"); Builder.defineMacro("__x86_64"); @@ -2039,9 +2213,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasAES) Builder.defineMacro("__AES__"); + if (HasPCLMUL) + Builder.defineMacro("__PCLMUL__"); + if (HasLZCNT) Builder.defineMacro("__LZCNT__"); + if (HasRDRND) + Builder.defineMacro("__RDRND__"); + if (HasBMI) Builder.defineMacro("__BMI__"); @@ -2051,9 +2231,18 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasPOPCNT) Builder.defineMacro("__POPCNT__"); + if (HasSSE4a) + Builder.defineMacro("__SSE4A__"); + if (HasFMA4) Builder.defineMacro("__FMA4__"); + if (HasFMA) + Builder.defineMacro("__FMA__"); + + if (HasXOP) + Builder.defineMacro("__XOP__"); + // Each case falls through to the previous one here. switch (SSELevel) { case AVX2: @@ -2117,11 +2306,14 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx2", SSELevel >= AVX2) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) + .Case("fma", HasFMA) .Case("fma4", HasFMA4) .Case("lzcnt", HasLZCNT) + .Case("rdrnd", HasRDRND) .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) .Case("mmx", MMX3DNowLevel >= MMX) + .Case("pclmul", HasPCLMUL) .Case("popcnt", HasPOPCNT) .Case("sse", SSELevel >= SSE1) .Case("sse2", SSELevel >= SSE2) @@ -2129,9 +2321,11 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("ssse3", SSELevel >= SSSE3) .Case("sse41", SSELevel >= SSE41) .Case("sse42", SSELevel >= SSE42) + .Case("sse4a", HasSSE4a) .Case("x86", true) .Case("x86_32", PointerWidth == 32) .Case("x86_64", PointerWidth == 64) + .Case("xop", HasXOP) .Default(false); } @@ -2227,8 +2421,8 @@ public: // MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.) MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } int getEHDataRegisterNumber(unsigned RegNo) const { @@ -2266,6 +2460,18 @@ public: } // end anonymous namespace namespace { +class BitrigI386TargetInfo : public BitrigTargetInfo { +public: + BitrigI386TargetInfo(const std::string& triple) : + BitrigTargetInfo(triple) { + SizeType = UnsignedLong; + IntPtrType = SignedLong; + PtrDiffType = SignedLong; + } +}; +} // end anonymous namespace + +namespace { class DarwinI386TargetInfo : public DarwinTargetInfo { public: DarwinI386TargetInfo(const std::string& triple) : @@ -2273,6 +2479,7 @@ public: LongDoubleWidth = 128; LongDoubleAlign = 128; SuitableAlign = 128; + MaxVectorAlign = 256; SizeType = UnsignedLong; IntPtrType = SignedLong; DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" @@ -2487,14 +2694,8 @@ public: MaxAtomicPromoteWidth = 128; MaxAtomicInlineWidth = 64; } - virtual const char *getVAListDeclaration() const { - return "typedef struct __va_list_tag {" - " unsigned gp_offset;" - " unsigned fp_offset;" - " void* overflow_arg_area;" - " void* reg_save_area;" - "} __va_list_tag;" - "typedef __va_list_tag __builtin_va_list[1];"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::X86_64ABIBuiltinVaList; } int getEHDataRegisterNumber(unsigned RegNo) const { @@ -2528,8 +2729,8 @@ public: WindowsTargetInfo::getTargetDefines(Opts, Builder); Builder.defineMacro("_WIN64"); } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } }; } // end anonymous namespace @@ -2586,6 +2787,7 @@ public: DarwinX86_64TargetInfo(const std::string& triple) : DarwinTargetInfo(triple) { Int64Type = SignedLongLong; + MaxVectorAlign = 256; } }; } // end anonymous namespace @@ -2603,6 +2805,18 @@ public: } // end anonymous namespace namespace { +class BitrigX86_64TargetInfo : public BitrigTargetInfo { +public: + BitrigX86_64TargetInfo(const std::string& triple) + : BitrigTargetInfo(triple) { + IntMaxType = SignedLongLong; + UIntMaxType = UnsignedLongLong; + Int64Type = SignedLongLong; + } +}; +} // end anonymous namespace + +namespace { class ARMTargetInfo : public TargetInfo { // Possible FPU choices. enum FPUMode { @@ -2860,8 +3074,8 @@ public: NumRecords = clang::ARM::LastTSBuiltin-Builtin::FirstTSBuiltin; } virtual bool isCLZForZeroUndef() const { return false; } - virtual const char *getVAListDeclaration() const { - return "typedef void* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::VoidPtrBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const; @@ -3015,8 +3229,8 @@ public: HexagonTargetInfo(const std::string& triple) : TargetInfo(triple) { BigEndian = false; DescriptionString = ("e-p:32:32:32-" - "i64:64:64-i32:32:32-" - "i16:16:16-i1:32:32-a:0:0"); + "i64:64:64-i32:32:32-i16:16:16-i1:32:32" + "f64:64:64-f32:32:32-a0:0-n32"); // {} in inline assembly are packet specifiers, not assembly variant // specifiers. @@ -3041,8 +3255,8 @@ public: return Feature == "hexagon"; } - virtual const char *getVAListDeclaration() const { - return "typedef char* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::CharPtrBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const; @@ -3057,6 +3271,7 @@ public: .Case("hexagonv2", "2") .Case("hexagonv3", "3") .Case("hexagonv4", "4") + .Case("hexagonv5", "5") .Default(0); } @@ -3111,6 +3326,14 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__QDSP6_ARCH__", "4"); } } + else if(CPU == "hexagonv5") { + Builder.defineMacro("__HEXAGON_V5__"); + Builder.defineMacro("__HEXAGON_ARCH__", "5"); + if(Opts.HexagonQdsp6Compat) { + Builder.defineMacro("__QDSP6_V5__"); + Builder.defineMacro("__QDSP6_ARCH__", "5"); + } + } } const char * const HexagonTargetInfo::GCCRegNames[] = { @@ -3159,7 +3382,6 @@ class SparcV8TargetInfo : public TargetInfo { public: SparcV8TargetInfo(const std::string& triple) : TargetInfo(triple) { // FIXME: Support Sparc quad-precision long double? - BigEndian = false; DescriptionString = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" "i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"; } @@ -3200,8 +3422,8 @@ public: unsigned &NumRecords) const { // FIXME: Implement! } - virtual const char *getVAListDeclaration() const { - return "typedef void* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::VoidPtrBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const; @@ -3344,9 +3566,9 @@ namespace { // FIXME: Is this really right? return ""; } - virtual const char *getVAListDeclaration() const { + virtual BuiltinVaListKind getBuiltinVaListKind() const { // FIXME: implement - return "typedef char* __builtin_va_list;"; + return TargetInfo::CharPtrBuiltinVaList; } }; @@ -3375,7 +3597,10 @@ namespace { static const unsigned TCEOpenCLAddrSpaceMap[] = { 3, // opencl_global 4, // opencl_local - 5 // opencl_constant + 5, // opencl_constant + 0, // cuda_device + 0, // cuda_constant + 0 // cuda_shared }; class TCETargetInfo : public TargetInfo{ @@ -3425,8 +3650,8 @@ namespace { virtual const char *getClobbers() const { return ""; } - virtual const char *getVAListDeclaration() const { - return "typedef void* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::VoidPtrBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const {} @@ -3441,9 +3666,15 @@ namespace { namespace { class MipsTargetInfoBase : public TargetInfo { + static const Builtin::Info BuiltinInfo[]; std::string CPU; - bool SoftFloat; - bool SingleFloat; + bool IsMips16; + enum MipsFloatABI { + HardFloat, SingleFloat, SoftFloat + } FloatABI; + enum DspRevEnum { + NoDSP, DSP1, DSP2 + } DspRev; protected: std::string ABI; @@ -3454,7 +3685,9 @@ public: const std::string& CPUStr) : TargetInfo(triple), CPU(CPUStr), - SoftFloat(false), SingleFloat(false), + IsMips16(false), + FloatABI(HardFloat), + DspRev(NoDSP), ABI(ABIStr) {} @@ -3471,14 +3704,35 @@ public: virtual void getArchDefines(const LangOptions &Opts, MacroBuilder &Builder) const { - if (SoftFloat) - Builder.defineMacro("__mips_soft_float", Twine(1)); - else if (SingleFloat) - Builder.defineMacro("__mips_single_float", Twine(1)); - else if (!SoftFloat && !SingleFloat) + switch (FloatABI) { + case HardFloat: Builder.defineMacro("__mips_hard_float", Twine(1)); - else - llvm_unreachable("Invalid float ABI for Mips."); + break; + case SingleFloat: + Builder.defineMacro("__mips_hard_float", Twine(1)); + Builder.defineMacro("__mips_single_float", Twine(1)); + break; + case SoftFloat: + Builder.defineMacro("__mips_soft_float", Twine(1)); + break; + } + + if (IsMips16) + Builder.defineMacro("__mips16", Twine(1)); + + switch (DspRev) { + default: + break; + case DSP1: + Builder.defineMacro("__mips_dsp_rev", Twine(1)); + Builder.defineMacro("__mips_dsp", Twine(1)); + break; + case DSP2: + Builder.defineMacro("__mips_dsp_rev", Twine(2)); + Builder.defineMacro("__mips_dspr2", Twine(1)); + Builder.defineMacro("__mips_dsp", Twine(1)); + break; + } Builder.defineMacro("_MIPS_SZPTR", Twine(getPointerWidth(0))); Builder.defineMacro("_MIPS_SZINT", Twine(getIntWidth())); @@ -3489,13 +3743,14 @@ public: MacroBuilder &Builder) const = 0; virtual void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const { - // FIXME: Implement! + Records = BuiltinInfo; + NumRecords = clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin; } virtual bool hasFeature(StringRef Feature) const { return Feature == "mips"; } - virtual const char *getVAListDeclaration() const { - return "typedef void* __builtin_va_list;"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::VoidPtrBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const { @@ -3549,7 +3804,8 @@ public: if (Name == "soft-float" || Name == "single-float" || Name == "o32" || Name == "n32" || Name == "n64" || Name == "eabi" || Name == "mips32" || Name == "mips32r2" || - Name == "mips64" || Name == "mips64r2") { + Name == "mips64" || Name == "mips64r2" || + Name == "mips16" || Name == "dsp" || Name == "dspr2") { Features[Name] = Enabled; return true; } @@ -3557,27 +3813,39 @@ public: } virtual void HandleTargetFeatures(std::vector &Features) { - SoftFloat = false; - SingleFloat = false; + IsMips16 = false; + FloatABI = HardFloat; + DspRev = NoDSP; for (std::vector::iterator it = Features.begin(), ie = Features.end(); it != ie; ++it) { - if (*it == "+single-float") { - SingleFloat = true; - break; - } - - if (*it == "+soft-float") { - SoftFloat = true; - // This option is front-end specific. - // Do not need to pass it to the backend. - Features.erase(it); - break; - } + if (*it == "+single-float") + FloatABI = SingleFloat; + else if (*it == "+soft-float") + FloatABI = SoftFloat; + else if (*it == "+mips16") + IsMips16 = true; + else if (*it == "+dsp") + DspRev = std::max(DspRev, DSP1); + else if (*it == "+dspr2") + DspRev = std::max(DspRev, DSP2); } + + // Remove front-end specific option. + std::vector::iterator it = + std::find(Features.begin(), Features.end(), "+soft-float"); + if (it != Features.end()) + Features.erase(it); } }; +const Builtin::Info MipsTargetInfoBase::BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, +#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ + ALL_LANGUAGES }, +#include "clang/Basic/BuiltinsMips.def" +}; + class Mips32TargetInfoBase : public MipsTargetInfoBase { public: Mips32TargetInfoBase(const std::string& triple) : @@ -3868,8 +4136,8 @@ public: virtual void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const { } - virtual const char *getVAListDeclaration() const { - return "typedef int __builtin_va_list[4];"; + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::PNaClABIBuiltinVaList; } virtual void getGCCRegNames(const char * const *&Names, unsigned &NumNames) const; @@ -3926,6 +4194,10 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new FreeBSDTargetInfo(T); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(T); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(T); + case llvm::Triple::Bitrig: + return new BitrigTargetInfo(T); case llvm::Triple::RTEMS: return new RTEMSTargetInfo(T); default: @@ -3973,6 +4245,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new FreeBSDTargetInfo(T); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(T); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(T); default: return new Mips64EBTargetInfo(T); } @@ -3987,6 +4261,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new FreeBSDTargetInfo(T); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(T); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(T); default: return new Mips64ELTargetInfo(T); } @@ -4009,6 +4285,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new FreeBSDTargetInfo(T); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(T); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(T); case llvm::Triple::RTEMS: return new RTEMSTargetInfo(T); default: @@ -4031,10 +4309,10 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new PPC64TargetInfo(T); } - case llvm::Triple::ptx32: - return new PTX32TargetInfo(T); - case llvm::Triple::ptx64: - return new PTX64TargetInfo(T); + case llvm::Triple::nvptx: + return new NVPTX32TargetInfo(T); + case llvm::Triple::nvptx64: + return new NVPTX64TargetInfo(T); case llvm::Triple::mblaze: return new MBlazeTargetInfo(T); @@ -4049,6 +4327,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new SolarisSparcV8TargetInfo(T); case llvm::Triple::NetBSD: return new NetBSDTargetInfo(T); + case llvm::Triple::OpenBSD: + return new OpenBSDTargetInfo(T); case llvm::Triple::RTEMS: return new RTEMSTargetInfo(T); default: @@ -4077,6 +4357,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new NetBSDI386TargetInfo(T); case llvm::Triple::OpenBSD: return new OpenBSDI386TargetInfo(T); + case llvm::Triple::Bitrig: + return new BitrigI386TargetInfo(T); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(T); case llvm::Triple::Minix: @@ -4112,6 +4394,8 @@ static TargetInfo *AllocateTarget(const std::string &T) { return new NetBSDTargetInfo(T); case llvm::Triple::OpenBSD: return new OpenBSDX86_64TargetInfo(T); + case llvm::Triple::Bitrig: + return new BitrigX86_64TargetInfo(T); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(T); case llvm::Triple::Solaris: diff --git a/lib/Basic/Version.cpp b/lib/Basic/Version.cpp index 8cb2386..9daa30a 100644 --- a/lib/Basic/Version.cpp +++ b/lib/Basic/Version.cpp @@ -32,7 +32,7 @@ std::string getClangRepositoryPath() { // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/branches/release_31/lib/Basic/Version.cpp $"); + static StringRef SVNRepository("$URL: http://llvm.org/svn/llvm-project/cfe/trunk/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); @@ -136,8 +136,7 @@ std::string getClangFullCPPVersion() { #ifdef CLANG_VENDOR OS << CLANG_VENDOR; #endif - OS << "Clang " CLANG_VERSION_STRING " (" - << getClangFullRepositoryVersion() << ')'; + OS << "Clang " CLANG_VERSION_STRING " " << getClangFullRepositoryVersion(); return OS.str(); } diff --git a/lib/Basic/VersionTuple.cpp b/lib/Basic/VersionTuple.cpp index 77aad39..4f479d0 100644 --- a/lib/Basic/VersionTuple.cpp +++ b/lib/Basic/VersionTuple.cpp @@ -34,3 +34,55 @@ raw_ostream& clang::operator<<(raw_ostream &Out, Out << '.' << *Subminor; return Out; } + +static bool parseInt(StringRef &input, unsigned &value) { + assert(value == 0); + if (input.empty()) return true; + + char next = input[0]; + input = input.substr(1); + if (next < '0' || next > '9') return true; + value = (unsigned) (next - '0'); + + while (!input.empty()) { + next = input[0]; + if (next < '0' || next > '9') return false; + input = input.substr(1); + value = value * 10 + (unsigned) (next - '0'); + } + + return false; +} + +bool VersionTuple::tryParse(StringRef input) { + unsigned major = 0, minor = 0, micro = 0; + + // Parse the major version, [0-9]+ + if (parseInt(input, major)) return true; + + if (input.empty()) { + *this = VersionTuple(major); + return false; + } + + // If we're not done, parse the minor version, \.[0-9]+ + if (input[0] != '.') return true; + input = input.substr(1); + if (parseInt(input, minor)) return true; + + if (input.empty()) { + *this = VersionTuple(major, minor); + return false; + } + + // If we're not done, parse the micro version, \.[0-9]+ + if (input[0] != '.') return true; + input = input.substr(1); + if (parseInt(input, micro)) return true; + + // If we have characters left over, it's an error. + if (!input.empty()) return true; + + *this = VersionTuple(major, minor, micro); + return false; +} diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index dfb9d61..206c228 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(Basic) add_subdirectory(Lex) add_subdirectory(Parse) add_subdirectory(AST) +add_subdirectory(ASTMatchers) add_subdirectory(Sema) add_subdirectory(CodeGen) add_subdirectory(Analysis) diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index 2853bc8..86f5380 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -74,31 +74,42 @@ namespace clang { unsigned UIntData; bool BoolData0; bool BoolData1; + bool InReg; - ABIArgInfo(Kind K, llvm::Type *TD=0, unsigned UI=0, - bool B0 = false, bool B1 = false, llvm::Type* P = 0) + ABIArgInfo(Kind K, llvm::Type *TD, unsigned UI, bool B0, bool B1, bool IR, + llvm::Type* P) : TheKind(K), TypeData(TD), PaddingType(P), UIntData(UI), BoolData0(B0), - BoolData1(B1) {} + BoolData1(B1), InReg(IR) {} public: ABIArgInfo() : TheKind(Direct), TypeData(0), UIntData(0) {} static ABIArgInfo getDirect(llvm::Type *T = 0, unsigned Offset = 0, llvm::Type *Padding = 0) { - return ABIArgInfo(Direct, T, Offset, false, false, Padding); + return ABIArgInfo(Direct, T, Offset, false, false, false, Padding); + } + static ABIArgInfo getDirectInReg(llvm::Type *T) { + return ABIArgInfo(Direct, T, 0, false, false, true, 0); } static ABIArgInfo getExtend(llvm::Type *T = 0) { - return ABIArgInfo(Extend, T, 0); + return ABIArgInfo(Extend, T, 0, false, false, false, 0); + } + static ABIArgInfo getExtendInReg(llvm::Type *T = 0) { + return ABIArgInfo(Extend, T, 0, false, false, true, 0); } static ABIArgInfo getIgnore() { - return ABIArgInfo(Ignore); + return ABIArgInfo(Ignore, 0, 0, false, false, false, 0); } static ABIArgInfo getIndirect(unsigned Alignment, bool ByVal = true , bool Realign = false) { - return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign); + return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, 0); + } + static ABIArgInfo getIndirectInReg(unsigned Alignment, bool ByVal = true + , bool Realign = false) { + return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, true, 0); } static ABIArgInfo getExpand() { - return ABIArgInfo(Expand); + return ABIArgInfo(Expand, 0, 0, false, false, false, 0); } Kind getKind() const { return TheKind; } @@ -132,6 +143,11 @@ namespace clang { TypeData = T; } + bool getInReg() const { + assert((isDirect() || isExtend() || isIndirect()) && "Invalid kind!"); + return InReg; + } + // Indirect accessors unsigned getIndirectAlign() const { assert(TheKind == Indirect && "Invalid kind!"); diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp index 2f44711..0a1915b 100644 --- a/lib/CodeGen/BackendUtil.cpp +++ b/lib/CodeGen/BackendUtil.cpp @@ -121,6 +121,12 @@ static void addObjCARCOptPass(const PassManagerBuilder &Builder, PassManagerBase PM.add(createObjCARCOptPass()); } +static unsigned BoundsChecking; +static void addBoundsCheckingPass(const PassManagerBuilder &Builder, + PassManagerBase &PM) { + PM.add(createBoundsCheckingPass(BoundsChecking)); +} + static void addAddressSanitizerPass(const PassManagerBuilder &Builder, PassManagerBase &PM) { PM.add(createAddressSanitizerPass()); @@ -160,6 +166,14 @@ void EmitAssemblyHelper::CreatePasses() { addObjCARCOptPass); } + if (CodeGenOpts.BoundsChecking > 0) { + BoundsChecking = CodeGenOpts.BoundsChecking; + PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, + addBoundsCheckingPass); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addBoundsCheckingPass); + } + if (LangOpts.AddressSanitizer) { PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addAddressSanitizerPass); @@ -219,7 +233,7 @@ void EmitAssemblyHelper::CreatePasses() { CodeGenOpts.EmitGcovArcs, TargetTriple.isMacOSX())); - if (!CodeGenOpts.DebugInfo) + if (CodeGenOpts.DebugInfo == CodeGenOptions::NoDebugInfo) MPM->add(createStripSymbolsPass(true)); } @@ -324,6 +338,9 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, Options.NoFramePointerElimNonLeaf = true; } + if (CodeGenOpts.UseInitArray) + Options.UseInitArray = true; + // Set float ABI type. if (CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp") Options.FloatABIType = llvm::FloatABI::Soft; @@ -334,6 +351,19 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action, Options.FloatABIType = llvm::FloatABI::Default; } + // Set FP fusion mode. + switch (LangOpts.getFPContractMode()) { + case LangOptions::FPC_Off: + Options.AllowFPOpFusion = llvm::FPOpFusion::Strict; + break; + case LangOptions::FPC_On: + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; + break; + case LangOptions::FPC_Fast: + Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; + break; + } + Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD; Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath; Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath; diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp index f8c7bcd..37ef4af 100644 --- a/lib/CodeGen/CGBlocks.cpp +++ b/lib/CodeGen/CGBlocks.cpp @@ -458,19 +458,23 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF, } } + assert(endAlign == getLowBit(blockSize)); + // At this point, we just have to add padding if the end align still // isn't aligned right. if (endAlign < maxFieldAlign) { - CharUnits padding = maxFieldAlign - endAlign; + CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign); + CharUnits padding = newBlockSize - blockSize; elementTypes.push_back(llvm::ArrayType::get(CGM.Int8Ty, padding.getQuantity())); - blockSize += padding; - - endAlign = getLowBit(blockSize); - assert(endAlign >= maxFieldAlign); + blockSize = newBlockSize; + endAlign = getLowBit(blockSize); // might be > maxFieldAlign } + assert(endAlign >= maxFieldAlign); + assert(endAlign == getLowBit(blockSize)); + // Slam everything else on now. This works because they have // strictly decreasing alignment and we expect that size is always a // multiple of alignment. @@ -626,7 +630,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Using the computed layout, generate the actual block function. bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda(); llvm::Constant *blockFn - = CodeGenFunction(CGM).GenerateBlockFunction(CurGD, blockInfo, + = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo, CurFuncDecl, LocalDeclMap, isLambdaConv); blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy); @@ -694,7 +698,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { // Compute the address of the thing we're going to move into the // block literal. llvm::Value *src; - if (ci->isNested()) { + if (BlockInfo && ci->isNested()) { // We need to use the capture from the enclosing block. const CGBlockInfo::Capture &enclosingCapture = BlockInfo->getCapture(variable); @@ -872,7 +876,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr* E, const FunctionType *FuncTy = FnType->castAs(); const CGFunctionInfo &FnInfo = - CGM.getTypes().arrangeFunctionCall(Args, FuncTy); + CGM.getTypes().arrangeFreeFunctionCall(Args, FuncTy); // Cast the function pointer to the right type. llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo); @@ -999,7 +1003,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, // Check if we should generate debug info for this block function. if (CGM.getModuleDebugInfo()) DebugInfo = CGM.getModuleDebugInfo(); - + CurGD = GD; + BlockInfo = &blockInfo; // Arrange for local static and local extern declarations to appear @@ -1130,15 +1135,17 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD, const VarDecl *variable = ci->getVariable(); DI->EmitLocation(Builder, variable->getLocation()); - const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); - if (capture.isConstant()) { - DI->EmitDeclareOfAutoVariable(variable, LocalDeclMap[variable], - Builder); - continue; - } + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { + const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable); + if (capture.isConstant()) { + DI->EmitDeclareOfAutoVariable(variable, LocalDeclMap[variable], + Builder); + continue; + } - DI->EmitDeclareOfBlockDeclRefVariable(variable, BlockPointer, - Builder, blockInfo); + DI->EmitDeclareOfBlockDeclRefVariable(variable, BlockPointer, + Builder, blockInfo); + } } } diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h index 8120217..a790a74 100644 --- a/lib/CodeGen/CGBuilder.h +++ b/lib/CodeGen/CGBuilder.h @@ -10,7 +10,7 @@ #ifndef CLANG_CODEGEN_CGBUILDER_H #define CLANG_CODEGEN_CGBUILDER_H -#include "llvm/Support/IRBuilder.h" +#include "llvm/IRBuilder.h" namespace clang { namespace CodeGen { diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index e30b513..65c782e 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -335,6 +335,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Builder.CreateCall(F, ArgValue)); } case Builtin::BI__builtin_object_size: { + // We rely on constant folding to deal with expressions with side effects. + assert(!E->getArg(0)->HasSideEffects(getContext()) && + "should have been constant folded"); + // We pass this builtin onto the optimizer so that it can // figure out the object size in more complex cases. llvm::Type *ResType = ConvertType(E->getType()); @@ -348,9 +352,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType); - return RValue::get(Builder.CreateCall2(F, - EmitScalarExpr(E->getArg(0)), - CI)); + return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); @@ -363,6 +365,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *F = CGM.getIntrinsic(Intrinsic::prefetch); return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); } + case Builtin::BI__builtin_readcyclecounter: { + Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + return RValue::get(Builder.CreateCall(F)); + } case Builtin::BI__builtin_trap: { Value *F = CGM.getIntrinsic(Intrinsic::trap); return RValue::get(Builder.CreateCall(F)); @@ -982,9 +988,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), getContext().VoidPtrTy); const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFunctionCall(E->getType(), Args, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, + FunctionType::ExtInfo(), + RequiredArgs::All); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); @@ -1376,8 +1382,6 @@ Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, case llvm::Triple::ppc: case llvm::Triple::ppc64: return EmitPPCBuiltinExpr(BuiltinID, E); - case llvm::Triple::hexagon: - return EmitHexagonBuiltinExpr(BuiltinID, E); default: return 0; } @@ -1629,13 +1633,17 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } case ARM::BI__builtin_neon_vclz_v: case ARM::BI__builtin_neon_vclzq_v: { - Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, Ty); + // Generate target-independent intrinsic; also need to add second argument + // for whether or not clz of zero is undefined; on ARM it isn't. + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty); + Ops.push_back(Builder.getInt1(Target.isCLZForZeroUndef())); return EmitNeonCall(F, Ops, "vclz"); } case ARM::BI__builtin_neon_vcnt_v: case ARM::BI__builtin_neon_vcntq_v: { - Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, Ty); - return EmitNeonCall(F, Ops, "vcnt"); + // generate target-independent intrinsic + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty); + return EmitNeonCall(F, Ops, "vctpop"); } case ARM::BI__builtin_neon_vcvt_f16_v: { assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad && @@ -2411,8 +2419,11 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return llvm::Constant::getNullValue(ConvertType(E->getType())); } case X86::BI__builtin_ia32_movntps: + case X86::BI__builtin_ia32_movntps256: case X86::BI__builtin_ia32_movntpd: + case X86::BI__builtin_ia32_movntpd256: case X86::BI__builtin_ia32_movntdq: + case X86::BI__builtin_ia32_movntdq256: case X86::BI__builtin_ia32_movnti: { llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(), Builder.getInt32(1)); @@ -2444,1996 +2455,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, name); } + case X86::BI__builtin_ia32_rdrand16_step: + case X86::BI__builtin_ia32_rdrand32_step: + case X86::BI__builtin_ia32_rdrand64_step: { + Intrinsic::ID ID; + switch (BuiltinID) { + default: llvm_unreachable("Unsupported intrinsic!"); + case X86::BI__builtin_ia32_rdrand16_step: + ID = Intrinsic::x86_rdrand_16; + break; + case X86::BI__builtin_ia32_rdrand32_step: + ID = Intrinsic::x86_rdrand_32; + break; + case X86::BI__builtin_ia32_rdrand64_step: + ID = Intrinsic::x86_rdrand_64; + break; + } + + Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); + Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); + return Builder.CreateExtractValue(Call, 1); + } } } -Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - llvm::SmallVector Ops; - - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - - Intrinsic::ID ID = Intrinsic::not_intrinsic; - - switch (BuiltinID) { - default: return 0; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpeq: - ID = Intrinsic::hexagon_C2_cmpeq; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgt: - ID = Intrinsic::hexagon_C2_cmpgt; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgtu: - ID = Intrinsic::hexagon_C2_cmpgtu; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpeqp: - ID = Intrinsic::hexagon_C2_cmpeqp; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgtp: - ID = Intrinsic::hexagon_C2_cmpgtp; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgtup: - ID = Intrinsic::hexagon_C2_cmpgtup; break; - - case Hexagon::BI__builtin_HEXAGON_C2_bitsset: - ID = Intrinsic::hexagon_C2_bitsset; break; - - case Hexagon::BI__builtin_HEXAGON_C2_bitsclr: - ID = Intrinsic::hexagon_C2_bitsclr; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpeqi: - ID = Intrinsic::hexagon_C2_cmpeqi; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgti: - ID = Intrinsic::hexagon_C2_cmpgti; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgtui: - ID = Intrinsic::hexagon_C2_cmpgtui; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgei: - ID = Intrinsic::hexagon_C2_cmpgei; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpgeui: - ID = Intrinsic::hexagon_C2_cmpgeui; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmplt: - ID = Intrinsic::hexagon_C2_cmplt; break; - - case Hexagon::BI__builtin_HEXAGON_C2_cmpltu: - ID = Intrinsic::hexagon_C2_cmpltu; break; - - case Hexagon::BI__builtin_HEXAGON_C2_bitsclri: - ID = Intrinsic::hexagon_C2_bitsclri; break; - - case Hexagon::BI__builtin_HEXAGON_C2_and: - ID = Intrinsic::hexagon_C2_and; break; - - case Hexagon::BI__builtin_HEXAGON_C2_or: - ID = Intrinsic::hexagon_C2_or; break; - - case Hexagon::BI__builtin_HEXAGON_C2_xor: - ID = Intrinsic::hexagon_C2_xor; break; - - case Hexagon::BI__builtin_HEXAGON_C2_andn: - ID = Intrinsic::hexagon_C2_andn; break; - - case Hexagon::BI__builtin_HEXAGON_C2_not: - ID = Intrinsic::hexagon_C2_not; break; - - case Hexagon::BI__builtin_HEXAGON_C2_orn: - ID = Intrinsic::hexagon_C2_orn; break; - - case Hexagon::BI__builtin_HEXAGON_C2_pxfer_map: - ID = Intrinsic::hexagon_C2_pxfer_map; break; - - case Hexagon::BI__builtin_HEXAGON_C2_any8: - ID = Intrinsic::hexagon_C2_any8; break; - - case Hexagon::BI__builtin_HEXAGON_C2_all8: - ID = Intrinsic::hexagon_C2_all8; break; - - case Hexagon::BI__builtin_HEXAGON_C2_vitpack: - ID = Intrinsic::hexagon_C2_vitpack; break; - - case Hexagon::BI__builtin_HEXAGON_C2_mux: - ID = Intrinsic::hexagon_C2_mux; break; - - case Hexagon::BI__builtin_HEXAGON_C2_muxii: - ID = Intrinsic::hexagon_C2_muxii; break; - - case Hexagon::BI__builtin_HEXAGON_C2_muxir: - ID = Intrinsic::hexagon_C2_muxir; break; - - case Hexagon::BI__builtin_HEXAGON_C2_muxri: - ID = Intrinsic::hexagon_C2_muxri; break; - - case Hexagon::BI__builtin_HEXAGON_C2_vmux: - ID = Intrinsic::hexagon_C2_vmux; break; - - case Hexagon::BI__builtin_HEXAGON_C2_mask: - ID = Intrinsic::hexagon_C2_mask; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpbeq: - ID = Intrinsic::hexagon_A2_vcmpbeq; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpbgtu: - ID = Intrinsic::hexagon_A2_vcmpbgtu; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpheq: - ID = Intrinsic::hexagon_A2_vcmpheq; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmphgt: - ID = Intrinsic::hexagon_A2_vcmphgt; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmphgtu: - ID = Intrinsic::hexagon_A2_vcmphgtu; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpweq: - ID = Intrinsic::hexagon_A2_vcmpweq; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpwgt: - ID = Intrinsic::hexagon_A2_vcmpwgt; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vcmpwgtu: - ID = Intrinsic::hexagon_A2_vcmpwgtu; break; - - case Hexagon::BI__builtin_HEXAGON_C2_tfrpr: - ID = Intrinsic::hexagon_C2_tfrpr; break; - - case Hexagon::BI__builtin_HEXAGON_C2_tfrrp: - ID = Intrinsic::hexagon_C2_tfrrp; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_acc_sat_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_acc_sat_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_nac_sat_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_nac_sat_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_rnd_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_rnd_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_rnd_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_rnd_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_rnd_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_rnd_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_rnd_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_rnd_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_rnd_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1: - ID = Intrinsic::hexagon_M2_mpy_sat_rnd_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_hh_s0: - ID = Intrinsic::hexagon_M2_mpyd_acc_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_hh_s1: - ID = Intrinsic::hexagon_M2_mpyd_acc_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_hl_s0: - ID = Intrinsic::hexagon_M2_mpyd_acc_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_hl_s1: - ID = Intrinsic::hexagon_M2_mpyd_acc_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_lh_s0: - ID = Intrinsic::hexagon_M2_mpyd_acc_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_lh_s1: - ID = Intrinsic::hexagon_M2_mpyd_acc_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_ll_s0: - ID = Intrinsic::hexagon_M2_mpyd_acc_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_acc_ll_s1: - ID = Intrinsic::hexagon_M2_mpyd_acc_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_hh_s0: - ID = Intrinsic::hexagon_M2_mpyd_nac_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_hh_s1: - ID = Intrinsic::hexagon_M2_mpyd_nac_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_hl_s0: - ID = Intrinsic::hexagon_M2_mpyd_nac_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_hl_s1: - ID = Intrinsic::hexagon_M2_mpyd_nac_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_lh_s0: - ID = Intrinsic::hexagon_M2_mpyd_nac_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_lh_s1: - ID = Intrinsic::hexagon_M2_mpyd_nac_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_ll_s0: - ID = Intrinsic::hexagon_M2_mpyd_nac_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_nac_ll_s1: - ID = Intrinsic::hexagon_M2_mpyd_nac_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_hh_s0: - ID = Intrinsic::hexagon_M2_mpyd_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_hh_s1: - ID = Intrinsic::hexagon_M2_mpyd_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_hl_s0: - ID = Intrinsic::hexagon_M2_mpyd_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_hl_s1: - ID = Intrinsic::hexagon_M2_mpyd_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_lh_s0: - ID = Intrinsic::hexagon_M2_mpyd_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_lh_s1: - ID = Intrinsic::hexagon_M2_mpyd_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_ll_s0: - ID = Intrinsic::hexagon_M2_mpyd_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_ll_s1: - ID = Intrinsic::hexagon_M2_mpyd_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_hh_s0: - ID = Intrinsic::hexagon_M2_mpyd_rnd_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_hh_s1: - ID = Intrinsic::hexagon_M2_mpyd_rnd_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_hl_s0: - ID = Intrinsic::hexagon_M2_mpyd_rnd_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_hl_s1: - ID = Intrinsic::hexagon_M2_mpyd_rnd_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_lh_s0: - ID = Intrinsic::hexagon_M2_mpyd_rnd_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_lh_s1: - ID = Intrinsic::hexagon_M2_mpyd_rnd_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_ll_s0: - ID = Intrinsic::hexagon_M2_mpyd_rnd_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyd_rnd_ll_s1: - ID = Intrinsic::hexagon_M2_mpyd_rnd_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_hh_s0: - ID = Intrinsic::hexagon_M2_mpyu_acc_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_hh_s1: - ID = Intrinsic::hexagon_M2_mpyu_acc_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_hl_s0: - ID = Intrinsic::hexagon_M2_mpyu_acc_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_hl_s1: - ID = Intrinsic::hexagon_M2_mpyu_acc_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_lh_s0: - ID = Intrinsic::hexagon_M2_mpyu_acc_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_lh_s1: - ID = Intrinsic::hexagon_M2_mpyu_acc_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_ll_s0: - ID = Intrinsic::hexagon_M2_mpyu_acc_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_acc_ll_s1: - ID = Intrinsic::hexagon_M2_mpyu_acc_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_hh_s0: - ID = Intrinsic::hexagon_M2_mpyu_nac_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_hh_s1: - ID = Intrinsic::hexagon_M2_mpyu_nac_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_hl_s0: - ID = Intrinsic::hexagon_M2_mpyu_nac_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_hl_s1: - ID = Intrinsic::hexagon_M2_mpyu_nac_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_lh_s0: - ID = Intrinsic::hexagon_M2_mpyu_nac_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_lh_s1: - ID = Intrinsic::hexagon_M2_mpyu_nac_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_ll_s0: - ID = Intrinsic::hexagon_M2_mpyu_nac_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_nac_ll_s1: - ID = Intrinsic::hexagon_M2_mpyu_nac_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_hh_s0: - ID = Intrinsic::hexagon_M2_mpyu_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_hh_s1: - ID = Intrinsic::hexagon_M2_mpyu_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_hl_s0: - ID = Intrinsic::hexagon_M2_mpyu_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_hl_s1: - ID = Intrinsic::hexagon_M2_mpyu_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_lh_s0: - ID = Intrinsic::hexagon_M2_mpyu_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_lh_s1: - ID = Intrinsic::hexagon_M2_mpyu_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_ll_s0: - ID = Intrinsic::hexagon_M2_mpyu_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_ll_s1: - ID = Intrinsic::hexagon_M2_mpyu_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_hh_s0: - ID = Intrinsic::hexagon_M2_mpyud_acc_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_hh_s1: - ID = Intrinsic::hexagon_M2_mpyud_acc_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_hl_s0: - ID = Intrinsic::hexagon_M2_mpyud_acc_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_hl_s1: - ID = Intrinsic::hexagon_M2_mpyud_acc_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_lh_s0: - ID = Intrinsic::hexagon_M2_mpyud_acc_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_lh_s1: - ID = Intrinsic::hexagon_M2_mpyud_acc_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_ll_s0: - ID = Intrinsic::hexagon_M2_mpyud_acc_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_acc_ll_s1: - ID = Intrinsic::hexagon_M2_mpyud_acc_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_hh_s0: - ID = Intrinsic::hexagon_M2_mpyud_nac_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_hh_s1: - ID = Intrinsic::hexagon_M2_mpyud_nac_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_hl_s0: - ID = Intrinsic::hexagon_M2_mpyud_nac_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_hl_s1: - ID = Intrinsic::hexagon_M2_mpyud_nac_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_lh_s0: - ID = Intrinsic::hexagon_M2_mpyud_nac_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_lh_s1: - ID = Intrinsic::hexagon_M2_mpyud_nac_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_ll_s0: - ID = Intrinsic::hexagon_M2_mpyud_nac_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_nac_ll_s1: - ID = Intrinsic::hexagon_M2_mpyud_nac_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_hh_s0: - ID = Intrinsic::hexagon_M2_mpyud_hh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_hh_s1: - ID = Intrinsic::hexagon_M2_mpyud_hh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_hl_s0: - ID = Intrinsic::hexagon_M2_mpyud_hl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_hl_s1: - ID = Intrinsic::hexagon_M2_mpyud_hl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_lh_s0: - ID = Intrinsic::hexagon_M2_mpyud_lh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_lh_s1: - ID = Intrinsic::hexagon_M2_mpyud_lh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_ll_s0: - ID = Intrinsic::hexagon_M2_mpyud_ll_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyud_ll_s1: - ID = Intrinsic::hexagon_M2_mpyud_ll_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpysmi: - ID = Intrinsic::hexagon_M2_mpysmi; break; - - case Hexagon::BI__builtin_HEXAGON_M2_macsip: - ID = Intrinsic::hexagon_M2_macsip; break; - - case Hexagon::BI__builtin_HEXAGON_M2_macsin: - ID = Intrinsic::hexagon_M2_macsin; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyss_s0: - ID = Intrinsic::hexagon_M2_dpmpyss_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyss_acc_s0: - ID = Intrinsic::hexagon_M2_dpmpyss_acc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyss_nac_s0: - ID = Intrinsic::hexagon_M2_dpmpyss_nac_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyuu_s0: - ID = Intrinsic::hexagon_M2_dpmpyuu_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyuu_acc_s0: - ID = Intrinsic::hexagon_M2_dpmpyuu_acc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyuu_nac_s0: - ID = Intrinsic::hexagon_M2_dpmpyuu_nac_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpy_up: - ID = Intrinsic::hexagon_M2_mpy_up; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyu_up: - ID = Intrinsic::hexagon_M2_mpyu_up; break; - - case Hexagon::BI__builtin_HEXAGON_M2_dpmpyss_rnd_s0: - ID = Intrinsic::hexagon_M2_dpmpyss_rnd_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyi: - ID = Intrinsic::hexagon_M2_mpyi; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mpyui: - ID = Intrinsic::hexagon_M2_mpyui; break; - - case Hexagon::BI__builtin_HEXAGON_M2_maci: - ID = Intrinsic::hexagon_M2_maci; break; - - case Hexagon::BI__builtin_HEXAGON_M2_acci: - ID = Intrinsic::hexagon_M2_acci; break; - - case Hexagon::BI__builtin_HEXAGON_M2_accii: - ID = Intrinsic::hexagon_M2_accii; break; - - case Hexagon::BI__builtin_HEXAGON_M2_nacci: - ID = Intrinsic::hexagon_M2_nacci; break; - - case Hexagon::BI__builtin_HEXAGON_M2_naccii: - ID = Intrinsic::hexagon_M2_naccii; break; - - case Hexagon::BI__builtin_HEXAGON_M2_subacc: - ID = Intrinsic::hexagon_M2_subacc; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2s_s0: - ID = Intrinsic::hexagon_M2_vmpy2s_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2s_s1: - ID = Intrinsic::hexagon_M2_vmpy2s_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2s_s0: - ID = Intrinsic::hexagon_M2_vmac2s_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2s_s1: - ID = Intrinsic::hexagon_M2_vmac2s_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2s_s0pack: - ID = Intrinsic::hexagon_M2_vmpy2s_s0pack; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2s_s1pack: - ID = Intrinsic::hexagon_M2_vmpy2s_s1pack; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2: - ID = Intrinsic::hexagon_M2_vmac2; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2es_s0: - ID = Intrinsic::hexagon_M2_vmpy2es_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmpy2es_s1: - ID = Intrinsic::hexagon_M2_vmpy2es_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2es_s0: - ID = Intrinsic::hexagon_M2_vmac2es_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2es_s1: - ID = Intrinsic::hexagon_M2_vmac2es_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vmac2es: - ID = Intrinsic::hexagon_M2_vmac2es; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrmac_s0: - ID = Intrinsic::hexagon_M2_vrmac_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrmpy_s0: - ID = Intrinsic::hexagon_M2_vrmpy_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmpyrs_s0: - ID = Intrinsic::hexagon_M2_vdmpyrs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmpyrs_s1: - ID = Intrinsic::hexagon_M2_vdmpyrs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmacs_s0: - ID = Intrinsic::hexagon_M2_vdmacs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmacs_s1: - ID = Intrinsic::hexagon_M2_vdmacs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmpys_s0: - ID = Intrinsic::hexagon_M2_vdmpys_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vdmpys_s1: - ID = Intrinsic::hexagon_M2_vdmpys_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyrs_s0: - ID = Intrinsic::hexagon_M2_cmpyrs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyrs_s1: - ID = Intrinsic::hexagon_M2_cmpyrs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyrsc_s0: - ID = Intrinsic::hexagon_M2_cmpyrsc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyrsc_s1: - ID = Intrinsic::hexagon_M2_cmpyrsc_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmacs_s0: - ID = Intrinsic::hexagon_M2_cmacs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmacs_s1: - ID = Intrinsic::hexagon_M2_cmacs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmacsc_s0: - ID = Intrinsic::hexagon_M2_cmacsc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmacsc_s1: - ID = Intrinsic::hexagon_M2_cmacsc_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpys_s0: - ID = Intrinsic::hexagon_M2_cmpys_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpys_s1: - ID = Intrinsic::hexagon_M2_cmpys_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpysc_s0: - ID = Intrinsic::hexagon_M2_cmpysc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpysc_s1: - ID = Intrinsic::hexagon_M2_cmpysc_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cnacs_s0: - ID = Intrinsic::hexagon_M2_cnacs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cnacs_s1: - ID = Intrinsic::hexagon_M2_cnacs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cnacsc_s0: - ID = Intrinsic::hexagon_M2_cnacsc_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cnacsc_s1: - ID = Intrinsic::hexagon_M2_cnacsc_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpys_s1: - ID = Intrinsic::hexagon_M2_vrcmpys_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpys_acc_s1: - ID = Intrinsic::hexagon_M2_vrcmpys_acc_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpys_s1rp: - ID = Intrinsic::hexagon_M2_vrcmpys_s1rp; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacls_s0: - ID = Intrinsic::hexagon_M2_mmacls_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacls_s1: - ID = Intrinsic::hexagon_M2_mmacls_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmachs_s0: - ID = Intrinsic::hexagon_M2_mmachs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmachs_s1: - ID = Intrinsic::hexagon_M2_mmachs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyl_s0: - ID = Intrinsic::hexagon_M2_mmpyl_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyl_s1: - ID = Intrinsic::hexagon_M2_mmpyl_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyh_s0: - ID = Intrinsic::hexagon_M2_mmpyh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyh_s1: - ID = Intrinsic::hexagon_M2_mmpyh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacls_rs0: - ID = Intrinsic::hexagon_M2_mmacls_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacls_rs1: - ID = Intrinsic::hexagon_M2_mmacls_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmachs_rs0: - ID = Intrinsic::hexagon_M2_mmachs_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmachs_rs1: - ID = Intrinsic::hexagon_M2_mmachs_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyl_rs0: - ID = Intrinsic::hexagon_M2_mmpyl_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyl_rs1: - ID = Intrinsic::hexagon_M2_mmpyl_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyh_rs0: - ID = Intrinsic::hexagon_M2_mmpyh_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyh_rs1: - ID = Intrinsic::hexagon_M2_mmpyh_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_hmmpyl_rs1: - ID = Intrinsic::hexagon_M2_hmmpyl_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_hmmpyh_rs1: - ID = Intrinsic::hexagon_M2_hmmpyh_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmaculs_s0: - ID = Intrinsic::hexagon_M2_mmaculs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmaculs_s1: - ID = Intrinsic::hexagon_M2_mmaculs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacuhs_s0: - ID = Intrinsic::hexagon_M2_mmacuhs_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacuhs_s1: - ID = Intrinsic::hexagon_M2_mmacuhs_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyul_s0: - ID = Intrinsic::hexagon_M2_mmpyul_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyul_s1: - ID = Intrinsic::hexagon_M2_mmpyul_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyuh_s0: - ID = Intrinsic::hexagon_M2_mmpyuh_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyuh_s1: - ID = Intrinsic::hexagon_M2_mmpyuh_s1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmaculs_rs0: - ID = Intrinsic::hexagon_M2_mmaculs_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmaculs_rs1: - ID = Intrinsic::hexagon_M2_mmaculs_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacuhs_rs0: - ID = Intrinsic::hexagon_M2_mmacuhs_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmacuhs_rs1: - ID = Intrinsic::hexagon_M2_mmacuhs_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyul_rs0: - ID = Intrinsic::hexagon_M2_mmpyul_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyul_rs1: - ID = Intrinsic::hexagon_M2_mmpyul_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyuh_rs0: - ID = Intrinsic::hexagon_M2_mmpyuh_rs0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_mmpyuh_rs1: - ID = Intrinsic::hexagon_M2_mmpyuh_rs1; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmaci_s0: - ID = Intrinsic::hexagon_M2_vrcmaci_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmacr_s0: - ID = Intrinsic::hexagon_M2_vrcmacr_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmaci_s0c: - ID = Intrinsic::hexagon_M2_vrcmaci_s0c; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmacr_s0c: - ID = Intrinsic::hexagon_M2_vrcmacr_s0c; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmaci_s0: - ID = Intrinsic::hexagon_M2_cmaci_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmacr_s0: - ID = Intrinsic::hexagon_M2_cmacr_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpyi_s0: - ID = Intrinsic::hexagon_M2_vrcmpyi_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpyr_s0: - ID = Intrinsic::hexagon_M2_vrcmpyr_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpyi_s0c: - ID = Intrinsic::hexagon_M2_vrcmpyi_s0c; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vrcmpyr_s0c: - ID = Intrinsic::hexagon_M2_vrcmpyr_s0c; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyi_s0: - ID = Intrinsic::hexagon_M2_cmpyi_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_cmpyr_s0: - ID = Intrinsic::hexagon_M2_cmpyr_s0; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmpy_s0_sat_i: - ID = Intrinsic::hexagon_M2_vcmpy_s0_sat_i; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmpy_s0_sat_r: - ID = Intrinsic::hexagon_M2_vcmpy_s0_sat_r; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmpy_s1_sat_i: - ID = Intrinsic::hexagon_M2_vcmpy_s1_sat_i; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmpy_s1_sat_r: - ID = Intrinsic::hexagon_M2_vcmpy_s1_sat_r; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmac_s0_sat_i: - ID = Intrinsic::hexagon_M2_vcmac_s0_sat_i; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vcmac_s0_sat_r: - ID = Intrinsic::hexagon_M2_vcmac_s0_sat_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vcrotate: - ID = Intrinsic::hexagon_S2_vcrotate; break; - - case Hexagon::BI__builtin_HEXAGON_A2_add: - ID = Intrinsic::hexagon_A2_add; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sub: - ID = Intrinsic::hexagon_A2_sub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addsat: - ID = Intrinsic::hexagon_A2_addsat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subsat: - ID = Intrinsic::hexagon_A2_subsat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addi: - ID = Intrinsic::hexagon_A2_addi; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_l16_ll: - ID = Intrinsic::hexagon_A2_addh_l16_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_l16_hl: - ID = Intrinsic::hexagon_A2_addh_l16_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_l16_sat_ll: - ID = Intrinsic::hexagon_A2_addh_l16_sat_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_l16_sat_hl: - ID = Intrinsic::hexagon_A2_addh_l16_sat_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_l16_ll: - ID = Intrinsic::hexagon_A2_subh_l16_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_l16_hl: - ID = Intrinsic::hexagon_A2_subh_l16_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_l16_sat_ll: - ID = Intrinsic::hexagon_A2_subh_l16_sat_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_l16_sat_hl: - ID = Intrinsic::hexagon_A2_subh_l16_sat_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_ll: - ID = Intrinsic::hexagon_A2_addh_h16_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_lh: - ID = Intrinsic::hexagon_A2_addh_h16_lh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_hl: - ID = Intrinsic::hexagon_A2_addh_h16_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_hh: - ID = Intrinsic::hexagon_A2_addh_h16_hh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_sat_ll: - ID = Intrinsic::hexagon_A2_addh_h16_sat_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_sat_lh: - ID = Intrinsic::hexagon_A2_addh_h16_sat_lh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_sat_hl: - ID = Intrinsic::hexagon_A2_addh_h16_sat_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addh_h16_sat_hh: - ID = Intrinsic::hexagon_A2_addh_h16_sat_hh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_ll: - ID = Intrinsic::hexagon_A2_subh_h16_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_lh: - ID = Intrinsic::hexagon_A2_subh_h16_lh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_hl: - ID = Intrinsic::hexagon_A2_subh_h16_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_hh: - ID = Intrinsic::hexagon_A2_subh_h16_hh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_sat_ll: - ID = Intrinsic::hexagon_A2_subh_h16_sat_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_sat_lh: - ID = Intrinsic::hexagon_A2_subh_h16_sat_lh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_sat_hl: - ID = Intrinsic::hexagon_A2_subh_h16_sat_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subh_h16_sat_hh: - ID = Intrinsic::hexagon_A2_subh_h16_sat_hh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_aslh: - ID = Intrinsic::hexagon_A2_aslh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_asrh: - ID = Intrinsic::hexagon_A2_asrh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addp: - ID = Intrinsic::hexagon_A2_addp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addpsat: - ID = Intrinsic::hexagon_A2_addpsat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_addsp: - ID = Intrinsic::hexagon_A2_addsp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subp: - ID = Intrinsic::hexagon_A2_subp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_neg: - ID = Intrinsic::hexagon_A2_neg; break; - - case Hexagon::BI__builtin_HEXAGON_A2_negsat: - ID = Intrinsic::hexagon_A2_negsat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_abs: - ID = Intrinsic::hexagon_A2_abs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_abssat: - ID = Intrinsic::hexagon_A2_abssat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vconj: - ID = Intrinsic::hexagon_A2_vconj; break; - - case Hexagon::BI__builtin_HEXAGON_A2_negp: - ID = Intrinsic::hexagon_A2_negp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_absp: - ID = Intrinsic::hexagon_A2_absp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_max: - ID = Intrinsic::hexagon_A2_max; break; - - case Hexagon::BI__builtin_HEXAGON_A2_maxu: - ID = Intrinsic::hexagon_A2_maxu; break; - - case Hexagon::BI__builtin_HEXAGON_A2_min: - ID = Intrinsic::hexagon_A2_min; break; - - case Hexagon::BI__builtin_HEXAGON_A2_minu: - ID = Intrinsic::hexagon_A2_minu; break; - - case Hexagon::BI__builtin_HEXAGON_A2_maxp: - ID = Intrinsic::hexagon_A2_maxp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_maxup: - ID = Intrinsic::hexagon_A2_maxup; break; - - case Hexagon::BI__builtin_HEXAGON_A2_minp: - ID = Intrinsic::hexagon_A2_minp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_minup: - ID = Intrinsic::hexagon_A2_minup; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfr: - ID = Intrinsic::hexagon_A2_tfr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfrsi: - ID = Intrinsic::hexagon_A2_tfrsi; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfrp: - ID = Intrinsic::hexagon_A2_tfrp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfrpi: - ID = Intrinsic::hexagon_A2_tfrpi; break; - - case Hexagon::BI__builtin_HEXAGON_A2_zxtb: - ID = Intrinsic::hexagon_A2_zxtb; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sxtb: - ID = Intrinsic::hexagon_A2_sxtb; break; - - case Hexagon::BI__builtin_HEXAGON_A2_zxth: - ID = Intrinsic::hexagon_A2_zxth; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sxth: - ID = Intrinsic::hexagon_A2_sxth; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combinew: - ID = Intrinsic::hexagon_A2_combinew; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combineii: - ID = Intrinsic::hexagon_A2_combineii; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combine_hh: - ID = Intrinsic::hexagon_A2_combine_hh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combine_hl: - ID = Intrinsic::hexagon_A2_combine_hl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combine_lh: - ID = Intrinsic::hexagon_A2_combine_lh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_combine_ll: - ID = Intrinsic::hexagon_A2_combine_ll; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfril: - ID = Intrinsic::hexagon_A2_tfril; break; - - case Hexagon::BI__builtin_HEXAGON_A2_tfrih: - ID = Intrinsic::hexagon_A2_tfrih; break; - - case Hexagon::BI__builtin_HEXAGON_A2_and: - ID = Intrinsic::hexagon_A2_and; break; - - case Hexagon::BI__builtin_HEXAGON_A2_or: - ID = Intrinsic::hexagon_A2_or; break; - - case Hexagon::BI__builtin_HEXAGON_A2_xor: - ID = Intrinsic::hexagon_A2_xor; break; - - case Hexagon::BI__builtin_HEXAGON_A2_not: - ID = Intrinsic::hexagon_A2_not; break; - - case Hexagon::BI__builtin_HEXAGON_M2_xor_xacc: - ID = Intrinsic::hexagon_M2_xor_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_A2_subri: - ID = Intrinsic::hexagon_A2_subri; break; - - case Hexagon::BI__builtin_HEXAGON_A2_andir: - ID = Intrinsic::hexagon_A2_andir; break; - - case Hexagon::BI__builtin_HEXAGON_A2_orir: - ID = Intrinsic::hexagon_A2_orir; break; - - case Hexagon::BI__builtin_HEXAGON_A2_andp: - ID = Intrinsic::hexagon_A2_andp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_orp: - ID = Intrinsic::hexagon_A2_orp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_xorp: - ID = Intrinsic::hexagon_A2_xorp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_notp: - ID = Intrinsic::hexagon_A2_notp; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sxtw: - ID = Intrinsic::hexagon_A2_sxtw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sat: - ID = Intrinsic::hexagon_A2_sat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_sath: - ID = Intrinsic::hexagon_A2_sath; break; - - case Hexagon::BI__builtin_HEXAGON_A2_satuh: - ID = Intrinsic::hexagon_A2_satuh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_satub: - ID = Intrinsic::hexagon_A2_satub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_satb: - ID = Intrinsic::hexagon_A2_satb; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddub: - ID = Intrinsic::hexagon_A2_vaddub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddubs: - ID = Intrinsic::hexagon_A2_vaddubs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddh: - ID = Intrinsic::hexagon_A2_vaddh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddhs: - ID = Intrinsic::hexagon_A2_vaddhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vadduhs: - ID = Intrinsic::hexagon_A2_vadduhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddw: - ID = Intrinsic::hexagon_A2_vaddw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vaddws: - ID = Intrinsic::hexagon_A2_vaddws; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svavgh: - ID = Intrinsic::hexagon_A2_svavgh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svavghs: - ID = Intrinsic::hexagon_A2_svavghs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svnavgh: - ID = Intrinsic::hexagon_A2_svnavgh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svaddh: - ID = Intrinsic::hexagon_A2_svaddh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svaddhs: - ID = Intrinsic::hexagon_A2_svaddhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svadduhs: - ID = Intrinsic::hexagon_A2_svadduhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svsubh: - ID = Intrinsic::hexagon_A2_svsubh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svsubhs: - ID = Intrinsic::hexagon_A2_svsubhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_svsubuhs: - ID = Intrinsic::hexagon_A2_svsubuhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vraddub: - ID = Intrinsic::hexagon_A2_vraddub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vraddub_acc: - ID = Intrinsic::hexagon_A2_vraddub_acc; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vradduh: - ID = Intrinsic::hexagon_M2_vradduh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubub: - ID = Intrinsic::hexagon_A2_vsubub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsububs: - ID = Intrinsic::hexagon_A2_vsububs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubh: - ID = Intrinsic::hexagon_A2_vsubh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubhs: - ID = Intrinsic::hexagon_A2_vsubhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubuhs: - ID = Intrinsic::hexagon_A2_vsubuhs; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubw: - ID = Intrinsic::hexagon_A2_vsubw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vsubws: - ID = Intrinsic::hexagon_A2_vsubws; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vabsh: - ID = Intrinsic::hexagon_A2_vabsh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vabshsat: - ID = Intrinsic::hexagon_A2_vabshsat; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vabsw: - ID = Intrinsic::hexagon_A2_vabsw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vabswsat: - ID = Intrinsic::hexagon_A2_vabswsat; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vabsdiffw: - ID = Intrinsic::hexagon_M2_vabsdiffw; break; - - case Hexagon::BI__builtin_HEXAGON_M2_vabsdiffh: - ID = Intrinsic::hexagon_M2_vabsdiffh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vrsadub: - ID = Intrinsic::hexagon_A2_vrsadub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vrsadub_acc: - ID = Intrinsic::hexagon_A2_vrsadub_acc; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgub: - ID = Intrinsic::hexagon_A2_vavgub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavguh: - ID = Intrinsic::hexagon_A2_vavguh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgh: - ID = Intrinsic::hexagon_A2_vavgh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavgh: - ID = Intrinsic::hexagon_A2_vnavgh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgw: - ID = Intrinsic::hexagon_A2_vavgw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavgw: - ID = Intrinsic::hexagon_A2_vnavgw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgwr: - ID = Intrinsic::hexagon_A2_vavgwr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavgwr: - ID = Intrinsic::hexagon_A2_vnavgwr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgwcr: - ID = Intrinsic::hexagon_A2_vavgwcr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavgwcr: - ID = Intrinsic::hexagon_A2_vnavgwcr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavghcr: - ID = Intrinsic::hexagon_A2_vavghcr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavghcr: - ID = Intrinsic::hexagon_A2_vnavghcr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavguw: - ID = Intrinsic::hexagon_A2_vavguw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavguwr: - ID = Intrinsic::hexagon_A2_vavguwr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavgubr: - ID = Intrinsic::hexagon_A2_vavgubr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavguhr: - ID = Intrinsic::hexagon_A2_vavguhr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vavghr: - ID = Intrinsic::hexagon_A2_vavghr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vnavghr: - ID = Intrinsic::hexagon_A2_vnavghr; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vminh: - ID = Intrinsic::hexagon_A2_vminh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vmaxh: - ID = Intrinsic::hexagon_A2_vmaxh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vminub: - ID = Intrinsic::hexagon_A2_vminub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vmaxub: - ID = Intrinsic::hexagon_A2_vmaxub; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vminuh: - ID = Intrinsic::hexagon_A2_vminuh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vmaxuh: - ID = Intrinsic::hexagon_A2_vmaxuh; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vminw: - ID = Intrinsic::hexagon_A2_vminw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vmaxw: - ID = Intrinsic::hexagon_A2_vmaxw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vminuw: - ID = Intrinsic::hexagon_A2_vminuw; break; - - case Hexagon::BI__builtin_HEXAGON_A2_vmaxuw: - ID = Intrinsic::hexagon_A2_vmaxuw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r: - ID = Intrinsic::hexagon_S2_asr_r_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r: - ID = Intrinsic::hexagon_S2_asl_r_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_r: - ID = Intrinsic::hexagon_S2_lsr_r_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_r: - ID = Intrinsic::hexagon_S2_lsl_r_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_p: - ID = Intrinsic::hexagon_S2_asr_r_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_p: - ID = Intrinsic::hexagon_S2_asl_r_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_p: - ID = Intrinsic::hexagon_S2_lsr_r_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_p: - ID = Intrinsic::hexagon_S2_lsl_r_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r_acc: - ID = Intrinsic::hexagon_S2_asr_r_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r_acc: - ID = Intrinsic::hexagon_S2_asl_r_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_r_acc: - ID = Intrinsic::hexagon_S2_lsr_r_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_r_acc: - ID = Intrinsic::hexagon_S2_lsl_r_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_p_acc: - ID = Intrinsic::hexagon_S2_asr_r_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_p_acc: - ID = Intrinsic::hexagon_S2_asl_r_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_p_acc: - ID = Intrinsic::hexagon_S2_lsr_r_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_p_acc: - ID = Intrinsic::hexagon_S2_lsl_r_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r_nac: - ID = Intrinsic::hexagon_S2_asr_r_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r_nac: - ID = Intrinsic::hexagon_S2_asl_r_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_r_nac: - ID = Intrinsic::hexagon_S2_lsr_r_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_r_nac: - ID = Intrinsic::hexagon_S2_lsl_r_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_p_nac: - ID = Intrinsic::hexagon_S2_asr_r_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_p_nac: - ID = Intrinsic::hexagon_S2_asl_r_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_p_nac: - ID = Intrinsic::hexagon_S2_lsr_r_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_p_nac: - ID = Intrinsic::hexagon_S2_lsl_r_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r_and: - ID = Intrinsic::hexagon_S2_asr_r_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r_and: - ID = Intrinsic::hexagon_S2_asl_r_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_r_and: - ID = Intrinsic::hexagon_S2_lsr_r_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_r_and: - ID = Intrinsic::hexagon_S2_lsl_r_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r_or: - ID = Intrinsic::hexagon_S2_asr_r_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r_or: - ID = Intrinsic::hexagon_S2_asl_r_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_r_or: - ID = Intrinsic::hexagon_S2_lsr_r_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_r_or: - ID = Intrinsic::hexagon_S2_lsl_r_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_p_and: - ID = Intrinsic::hexagon_S2_asr_r_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_p_and: - ID = Intrinsic::hexagon_S2_asl_r_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_p_and: - ID = Intrinsic::hexagon_S2_lsr_r_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_p_and: - ID = Intrinsic::hexagon_S2_lsl_r_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_p_or: - ID = Intrinsic::hexagon_S2_asr_r_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_p_or: - ID = Intrinsic::hexagon_S2_asl_r_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_p_or: - ID = Intrinsic::hexagon_S2_lsr_r_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_p_or: - ID = Intrinsic::hexagon_S2_lsl_r_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_r_sat: - ID = Intrinsic::hexagon_S2_asr_r_r_sat; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_r_sat: - ID = Intrinsic::hexagon_S2_asl_r_r_sat; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r: - ID = Intrinsic::hexagon_S2_asr_i_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r: - ID = Intrinsic::hexagon_S2_lsr_i_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r: - ID = Intrinsic::hexagon_S2_asl_i_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_p: - ID = Intrinsic::hexagon_S2_asr_i_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p: - ID = Intrinsic::hexagon_S2_lsr_i_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p: - ID = Intrinsic::hexagon_S2_asl_i_p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_acc: - ID = Intrinsic::hexagon_S2_asr_i_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r_acc: - ID = Intrinsic::hexagon_S2_lsr_i_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_acc: - ID = Intrinsic::hexagon_S2_asl_i_r_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_p_acc: - ID = Intrinsic::hexagon_S2_asr_i_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p_acc: - ID = Intrinsic::hexagon_S2_lsr_i_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p_acc: - ID = Intrinsic::hexagon_S2_asl_i_p_acc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_nac: - ID = Intrinsic::hexagon_S2_asr_i_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r_nac: - ID = Intrinsic::hexagon_S2_lsr_i_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_nac: - ID = Intrinsic::hexagon_S2_asl_i_r_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_p_nac: - ID = Intrinsic::hexagon_S2_asr_i_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p_nac: - ID = Intrinsic::hexagon_S2_lsr_i_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p_nac: - ID = Intrinsic::hexagon_S2_asl_i_p_nac; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r_xacc: - ID = Intrinsic::hexagon_S2_lsr_i_r_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_xacc: - ID = Intrinsic::hexagon_S2_asl_i_r_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p_xacc: - ID = Intrinsic::hexagon_S2_lsr_i_p_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p_xacc: - ID = Intrinsic::hexagon_S2_asl_i_p_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_and: - ID = Intrinsic::hexagon_S2_asr_i_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r_and: - ID = Intrinsic::hexagon_S2_lsr_i_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_and: - ID = Intrinsic::hexagon_S2_asl_i_r_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_or: - ID = Intrinsic::hexagon_S2_asr_i_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_r_or: - ID = Intrinsic::hexagon_S2_lsr_i_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_or: - ID = Intrinsic::hexagon_S2_asl_i_r_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_p_and: - ID = Intrinsic::hexagon_S2_asr_i_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p_and: - ID = Intrinsic::hexagon_S2_lsr_i_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p_and: - ID = Intrinsic::hexagon_S2_asl_i_p_and; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_p_or: - ID = Intrinsic::hexagon_S2_asr_i_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_p_or: - ID = Intrinsic::hexagon_S2_lsr_i_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_p_or: - ID = Intrinsic::hexagon_S2_asl_i_p_or; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_r_sat: - ID = Intrinsic::hexagon_S2_asl_i_r_sat; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_rnd: - ID = Intrinsic::hexagon_S2_asr_i_r_rnd; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax: - ID = Intrinsic::hexagon_S2_asr_i_r_rnd_goodsyntax; break; - - case Hexagon::BI__builtin_HEXAGON_S2_addasl_rrri: - ID = Intrinsic::hexagon_S2_addasl_rrri; break; - - case Hexagon::BI__builtin_HEXAGON_S2_valignib: - ID = Intrinsic::hexagon_S2_valignib; break; - - case Hexagon::BI__builtin_HEXAGON_S2_valignrb: - ID = Intrinsic::hexagon_S2_valignrb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vspliceib: - ID = Intrinsic::hexagon_S2_vspliceib; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsplicerb: - ID = Intrinsic::hexagon_S2_vsplicerb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsplatrh: - ID = Intrinsic::hexagon_S2_vsplatrh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsplatrb: - ID = Intrinsic::hexagon_S2_vsplatrb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_insert: - ID = Intrinsic::hexagon_S2_insert; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tableidxb_goodsyntax: - ID = Intrinsic::hexagon_S2_tableidxb_goodsyntax; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tableidxh_goodsyntax: - ID = Intrinsic::hexagon_S2_tableidxh_goodsyntax; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tableidxw_goodsyntax: - ID = Intrinsic::hexagon_S2_tableidxw_goodsyntax; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tableidxd_goodsyntax: - ID = Intrinsic::hexagon_S2_tableidxd_goodsyntax; break; - - case Hexagon::BI__builtin_HEXAGON_S2_extractu: - ID = Intrinsic::hexagon_S2_extractu; break; - - case Hexagon::BI__builtin_HEXAGON_S2_insertp: - ID = Intrinsic::hexagon_S2_insertp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_extractup: - ID = Intrinsic::hexagon_S2_extractup; break; - - case Hexagon::BI__builtin_HEXAGON_S2_insert_rp: - ID = Intrinsic::hexagon_S2_insert_rp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_extractu_rp: - ID = Intrinsic::hexagon_S2_extractu_rp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_insertp_rp: - ID = Intrinsic::hexagon_S2_insertp_rp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_extractup_rp: - ID = Intrinsic::hexagon_S2_extractup_rp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tstbit_i: - ID = Intrinsic::hexagon_S2_tstbit_i; break; - - case Hexagon::BI__builtin_HEXAGON_S2_setbit_i: - ID = Intrinsic::hexagon_S2_setbit_i; break; - - case Hexagon::BI__builtin_HEXAGON_S2_togglebit_i: - ID = Intrinsic::hexagon_S2_togglebit_i; break; - - case Hexagon::BI__builtin_HEXAGON_S2_clrbit_i: - ID = Intrinsic::hexagon_S2_clrbit_i; break; - - case Hexagon::BI__builtin_HEXAGON_S2_tstbit_r: - ID = Intrinsic::hexagon_S2_tstbit_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_setbit_r: - ID = Intrinsic::hexagon_S2_setbit_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_togglebit_r: - ID = Intrinsic::hexagon_S2_togglebit_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_clrbit_r: - ID = Intrinsic::hexagon_S2_clrbit_r; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_vh: - ID = Intrinsic::hexagon_S2_asr_i_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_vh: - ID = Intrinsic::hexagon_S2_lsr_i_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_vh: - ID = Intrinsic::hexagon_S2_asl_i_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_vh: - ID = Intrinsic::hexagon_S2_asr_r_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_vh: - ID = Intrinsic::hexagon_S2_asl_r_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_vh: - ID = Intrinsic::hexagon_S2_lsr_r_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_vh: - ID = Intrinsic::hexagon_S2_lsl_r_vh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_vw: - ID = Intrinsic::hexagon_S2_asr_i_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_i_svw_trun: - ID = Intrinsic::hexagon_S2_asr_i_svw_trun; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_svw_trun: - ID = Intrinsic::hexagon_S2_asr_r_svw_trun; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_i_vw: - ID = Intrinsic::hexagon_S2_lsr_i_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_i_vw: - ID = Intrinsic::hexagon_S2_asl_i_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asr_r_vw: - ID = Intrinsic::hexagon_S2_asr_r_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_asl_r_vw: - ID = Intrinsic::hexagon_S2_asl_r_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsr_r_vw: - ID = Intrinsic::hexagon_S2_lsr_r_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lsl_r_vw: - ID = Intrinsic::hexagon_S2_lsl_r_vw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vrndpackwh: - ID = Intrinsic::hexagon_S2_vrndpackwh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vrndpackwhs: - ID = Intrinsic::hexagon_S2_vrndpackwhs; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsxtbh: - ID = Intrinsic::hexagon_S2_vsxtbh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vzxtbh: - ID = Intrinsic::hexagon_S2_vzxtbh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsathub: - ID = Intrinsic::hexagon_S2_vsathub; break; - - case Hexagon::BI__builtin_HEXAGON_S2_svsathub: - ID = Intrinsic::hexagon_S2_svsathub; break; - - case Hexagon::BI__builtin_HEXAGON_S2_svsathb: - ID = Intrinsic::hexagon_S2_svsathb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsathb: - ID = Intrinsic::hexagon_S2_vsathb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vtrunohb: - ID = Intrinsic::hexagon_S2_vtrunohb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vtrunewh: - ID = Intrinsic::hexagon_S2_vtrunewh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vtrunowh: - ID = Intrinsic::hexagon_S2_vtrunowh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vtrunehb: - ID = Intrinsic::hexagon_S2_vtrunehb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsxthw: - ID = Intrinsic::hexagon_S2_vsxthw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vzxthw: - ID = Intrinsic::hexagon_S2_vzxthw; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsatwh: - ID = Intrinsic::hexagon_S2_vsatwh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsatwuh: - ID = Intrinsic::hexagon_S2_vsatwuh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_packhl: - ID = Intrinsic::hexagon_S2_packhl; break; - - case Hexagon::BI__builtin_HEXAGON_A2_swiz: - ID = Intrinsic::hexagon_A2_swiz; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsathub_nopack: - ID = Intrinsic::hexagon_S2_vsathub_nopack; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsathb_nopack: - ID = Intrinsic::hexagon_S2_vsathb_nopack; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsatwh_nopack: - ID = Intrinsic::hexagon_S2_vsatwh_nopack; break; - - case Hexagon::BI__builtin_HEXAGON_S2_vsatwuh_nopack: - ID = Intrinsic::hexagon_S2_vsatwuh_nopack; break; - - case Hexagon::BI__builtin_HEXAGON_S2_shuffob: - ID = Intrinsic::hexagon_S2_shuffob; break; - - case Hexagon::BI__builtin_HEXAGON_S2_shuffeb: - ID = Intrinsic::hexagon_S2_shuffeb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_shuffoh: - ID = Intrinsic::hexagon_S2_shuffoh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_shuffeh: - ID = Intrinsic::hexagon_S2_shuffeh; break; - - case Hexagon::BI__builtin_HEXAGON_S2_parityp: - ID = Intrinsic::hexagon_S2_parityp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_lfsp: - ID = Intrinsic::hexagon_S2_lfsp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_clbnorm: - ID = Intrinsic::hexagon_S2_clbnorm; break; - - case Hexagon::BI__builtin_HEXAGON_S2_clb: - ID = Intrinsic::hexagon_S2_clb; break; - - case Hexagon::BI__builtin_HEXAGON_S2_cl0: - ID = Intrinsic::hexagon_S2_cl0; break; - - case Hexagon::BI__builtin_HEXAGON_S2_cl1: - ID = Intrinsic::hexagon_S2_cl1; break; - - case Hexagon::BI__builtin_HEXAGON_S2_clbp: - ID = Intrinsic::hexagon_S2_clbp; break; - - case Hexagon::BI__builtin_HEXAGON_S2_cl0p: - ID = Intrinsic::hexagon_S2_cl0p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_cl1p: - ID = Intrinsic::hexagon_S2_cl1p; break; - - case Hexagon::BI__builtin_HEXAGON_S2_brev: - ID = Intrinsic::hexagon_S2_brev; break; - - case Hexagon::BI__builtin_HEXAGON_S2_ct0: - ID = Intrinsic::hexagon_S2_ct0; break; - - case Hexagon::BI__builtin_HEXAGON_S2_ct1: - ID = Intrinsic::hexagon_S2_ct1; break; - - case Hexagon::BI__builtin_HEXAGON_S2_interleave: - ID = Intrinsic::hexagon_S2_interleave; break; - - case Hexagon::BI__builtin_HEXAGON_S2_deinterleave: - ID = Intrinsic::hexagon_S2_deinterleave; break; - - case Hexagon::BI__builtin_SI_to_SXTHI_asrh: - ID = Intrinsic::hexagon_SI_to_SXTHI_asrh; break; - - case Hexagon::BI__builtin_HEXAGON_A4_orn: - ID = Intrinsic::hexagon_A4_orn; break; - - case Hexagon::BI__builtin_HEXAGON_A4_andn: - ID = Intrinsic::hexagon_A4_andn; break; - - case Hexagon::BI__builtin_HEXAGON_A4_ornp: - ID = Intrinsic::hexagon_A4_ornp; break; - - case Hexagon::BI__builtin_HEXAGON_A4_andnp: - ID = Intrinsic::hexagon_A4_andnp; break; - - case Hexagon::BI__builtin_HEXAGON_A4_combineir: - ID = Intrinsic::hexagon_A4_combineir; break; - - case Hexagon::BI__builtin_HEXAGON_A4_combineri: - ID = Intrinsic::hexagon_A4_combineri; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmpneqi: - ID = Intrinsic::hexagon_C4_cmpneqi; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmpneq: - ID = Intrinsic::hexagon_C4_cmpneq; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmpltei: - ID = Intrinsic::hexagon_C4_cmpltei; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmplte: - ID = Intrinsic::hexagon_C4_cmplte; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmplteui: - ID = Intrinsic::hexagon_C4_cmplteui; break; - - case Hexagon::BI__builtin_HEXAGON_C4_cmplteu: - ID = Intrinsic::hexagon_C4_cmplteu; break; - - case Hexagon::BI__builtin_HEXAGON_A4_rcmpneq: - ID = Intrinsic::hexagon_A4_rcmpneq; break; - - case Hexagon::BI__builtin_HEXAGON_A4_rcmpneqi: - ID = Intrinsic::hexagon_A4_rcmpneqi; break; - - case Hexagon::BI__builtin_HEXAGON_A4_rcmpeq: - ID = Intrinsic::hexagon_A4_rcmpeq; break; - - case Hexagon::BI__builtin_HEXAGON_A4_rcmpeqi: - ID = Intrinsic::hexagon_A4_rcmpeqi; break; - - case Hexagon::BI__builtin_HEXAGON_C4_fastcorner9: - ID = Intrinsic::hexagon_C4_fastcorner9; break; - - case Hexagon::BI__builtin_HEXAGON_C4_fastcorner9_not: - ID = Intrinsic::hexagon_C4_fastcorner9_not; break; - - case Hexagon::BI__builtin_HEXAGON_C4_and_andn: - ID = Intrinsic::hexagon_C4_and_andn; break; - - case Hexagon::BI__builtin_HEXAGON_C4_and_and: - ID = Intrinsic::hexagon_C4_and_and; break; - - case Hexagon::BI__builtin_HEXAGON_C4_and_orn: - ID = Intrinsic::hexagon_C4_and_orn; break; - - case Hexagon::BI__builtin_HEXAGON_C4_and_or: - ID = Intrinsic::hexagon_C4_and_or; break; - - case Hexagon::BI__builtin_HEXAGON_C4_or_andn: - ID = Intrinsic::hexagon_C4_or_andn; break; - - case Hexagon::BI__builtin_HEXAGON_C4_or_and: - ID = Intrinsic::hexagon_C4_or_and; break; - - case Hexagon::BI__builtin_HEXAGON_C4_or_orn: - ID = Intrinsic::hexagon_C4_or_orn; break; - - case Hexagon::BI__builtin_HEXAGON_C4_or_or: - ID = Intrinsic::hexagon_C4_or_or; break; - - case Hexagon::BI__builtin_HEXAGON_S4_addaddi: - ID = Intrinsic::hexagon_S4_addaddi; break; - - case Hexagon::BI__builtin_HEXAGON_S4_subaddi: - ID = Intrinsic::hexagon_S4_subaddi; break; - - case Hexagon::BI__builtin_HEXAGON_M4_xor_xacc: - ID = Intrinsic::hexagon_M4_xor_xacc; break; - - case Hexagon::BI__builtin_HEXAGON_M4_and_and: - ID = Intrinsic::hexagon_M4_and_and; break; - - case Hexagon::BI__builtin_HEXAGON_M4_and_or: - ID = Intrinsic::hexagon_M4_and_or; break; - - case Hexagon::BI__builtin_HEXAGON_M4_and_xor: - ID = Intrinsic::hexagon_M4_and_xor; break; - - case Hexagon::BI__builtin_HEXAGON_M4_and_andn: - ID = Intrinsic::hexagon_M4_and_andn; break; - - case Hexagon::BI__builtin_HEXAGON_M4_xor_and: - ID = Intrinsic::hexagon_M4_xor_and; break; - - case Hexagon::BI__builtin_HEXAGON_M4_xor_or: - ID = Intrinsic::hexagon_M4_xor_or; break; - - case Hexagon::BI__builtin_HEXAGON_M4_xor_andn: - ID = Intrinsic::hexagon_M4_xor_andn; break; - - case Hexagon::BI__builtin_HEXAGON_M4_or_and: - ID = Intrinsic::hexagon_M4_or_and; break; - - case Hexagon::BI__builtin_HEXAGON_M4_or_or: - ID = Intrinsic::hexagon_M4_or_or; break; - - case Hexagon::BI__builtin_HEXAGON_M4_or_xor: - ID = Intrinsic::hexagon_M4_or_xor; break; - - case Hexagon::BI__builtin_HEXAGON_M4_or_andn: - ID = Intrinsic::hexagon_M4_or_andn; break; - - case Hexagon::BI__builtin_HEXAGON_S4_or_andix: - ID = Intrinsic::hexagon_S4_or_andix; break; - - case Hexagon::BI__builtin_HEXAGON_S4_or_andi: - ID = Intrinsic::hexagon_S4_or_andi; break; - - case Hexagon::BI__builtin_HEXAGON_S4_or_ori: - ID = Intrinsic::hexagon_S4_or_ori; break; - - case Hexagon::BI__builtin_HEXAGON_A4_modwrapu: - ID = Intrinsic::hexagon_A4_modwrapu; break; - - case Hexagon::BI__builtin_HEXAGON_A4_cround_rr: - ID = Intrinsic::hexagon_A4_cround_rr; break; - - case Hexagon::BI__builtin_HEXAGON_A4_round_ri: - ID = Intrinsic::hexagon_A4_round_ri; break; - - case Hexagon::BI__builtin_HEXAGON_A4_round_rr: - ID = Intrinsic::hexagon_A4_round_rr; break; - - case Hexagon::BI__builtin_HEXAGON_A4_round_ri_sat: - ID = Intrinsic::hexagon_A4_round_ri_sat; break; - - case Hexagon::BI__builtin_HEXAGON_A4_round_rr_sat: - ID = Intrinsic::hexagon_A4_round_rr_sat; break; - - } - - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops, ""); -} - Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { SmallVector Ops; diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp index 7c08650..003fef5 100644 --- a/lib/CodeGen/CGCXX.cpp +++ b/lib/CodeGen/CGCXX.cpp @@ -53,7 +53,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // destructor separately. for (CXXRecordDecl::field_iterator I = Class->field_begin(), E = Class->field_end(); I != E; ++I) - if ((*I)->getType().isDestructedType()) + if (I->getType().isDestructedType()) return true; // Try to find a unique base class with a non-trivial destructor. @@ -91,7 +91,7 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) { // If the base is at a non-zero offset, give up. const ASTRecordLayout &ClassLayout = Context.getASTRecordLayout(Class); - if (ClassLayout.getBaseClassOffsetInBits(UniqueBase) != 0) + if (!ClassLayout.getBaseClassOffset(UniqueBase).isZero()) return true; return TryEmitDefinitionAsAlias(GlobalDecl(D, Dtor_Base), diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index befebbe..aba5d75 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -23,7 +23,7 @@ static void ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { DiagnosticsEngine &Diags = CGF.CGM.getDiags(); unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, - "cannot yet compile %1 in this ABI"); + "cannot yet compile %0 in this ABI"); Diags.Report(CGF.getContext().getFullLoc(CGF.CurCodeDecl->getLocation()), DiagID) << S; @@ -145,6 +145,13 @@ void CGCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF, } CharUnits CGCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) { + if (!requiresArrayCookie(expr)) + return CharUnits::Zero(); + return getArrayCookieSizeImpl(expr->getAllocatedType()); +} + +CharUnits CGCXXABI::getArrayCookieSizeImpl(QualType elementType) { + // BOGUS return CharUnits::Zero(); } @@ -158,16 +165,53 @@ llvm::Value *CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, return 0; } -void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *Ptr, - const CXXDeleteExpr *expr, QualType ElementType, - llvm::Value *&NumElements, - llvm::Value *&AllocPtr, CharUnits &CookieSize) { - ErrorUnsupportedABI(CGF, "array cookie reading"); +bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr, + QualType elementType) { + // If the class's usual deallocation function takes two arguments, + // it needs a cookie. + if (expr->doesUsualArrayDeleteWantSize()) + return true; - // This should be enough to avoid assertions. - NumElements = 0; - AllocPtr = llvm::Constant::getNullValue(CGF.Builder.getInt8PtrTy()); - CookieSize = CharUnits::Zero(); + return elementType.isDestructedType(); +} + +bool CGCXXABI::requiresArrayCookie(const CXXNewExpr *expr) { + // If the class's usual deallocation function takes two arguments, + // it needs a cookie. + if (expr->doesUsualArrayDeleteWantSize()) + return true; + + return expr->getAllocatedType().isDestructedType(); +} + +void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *ptr, + const CXXDeleteExpr *expr, QualType eltTy, + llvm::Value *&numElements, + llvm::Value *&allocPtr, CharUnits &cookieSize) { + // Derive a char* in the same address space as the pointer. + unsigned AS = cast(ptr->getType())->getAddressSpace(); + llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS); + ptr = CGF.Builder.CreateBitCast(ptr, charPtrTy); + + // If we don't need an array cookie, bail out early. + if (!requiresArrayCookie(expr, eltTy)) { + allocPtr = ptr; + numElements = 0; + cookieSize = CharUnits::Zero(); + return; + } + + cookieSize = getArrayCookieSizeImpl(eltTy); + allocPtr = CGF.Builder.CreateConstInBoundsGEP1_64(ptr, + -cookieSize.getQuantity()); + numElements = readArrayCookieImpl(CGF, allocPtr, cookieSize); +} + +llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *ptr, + CharUnits cookieSize) { + ErrorUnsupportedABI(CGF, "reading a new[] cookie"); + return llvm::ConstantInt::get(CGF.SizeTy, 0); } void CGCXXABI::EmitGuardedInit(CodeGenFunction &CGF, @@ -177,6 +221,13 @@ void CGCXXABI::EmitGuardedInit(CodeGenFunction &CGF, ErrorUnsupportedABI(CGF, "static local variable initialization"); } +void CGCXXABI::registerGlobalDtor(CodeGenFunction &CGF, + llvm::Constant *dtor, + llvm::Constant *addr) { + // The default behavior is to use atexit. + CGF.registerGlobalDtorWithAtExit(dtor, addr); +} + /// Returns the adjustment, in bytes, required for the given /// member-pointer operation. Returns null if no adjustment is /// required. diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h index 4e045f5..a0dcdfd 100644 --- a/lib/CodeGen/CGCXXABI.h +++ b/lib/CodeGen/CGCXXABI.h @@ -38,7 +38,7 @@ namespace CodeGen { class CodeGenFunction; class CodeGenModule; -/// Implements C++ ABI-specific code generation functions. +/// \brief Implements C++ ABI-specific code generation functions. class CGCXXABI { protected: CodeGenModule &CGM; @@ -71,6 +71,9 @@ protected: ASTContext &getContext() const { return CGM.getContext(); } + virtual bool requiresArrayCookie(const CXXDeleteExpr *E, QualType eltType); + virtual bool requiresArrayCookie(const CXXNewExpr *E); + public: virtual ~CGCXXABI(); @@ -190,18 +193,20 @@ public: virtual void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResultType); + /// Gets the pure virtual member call function. + virtual StringRef GetPureVirtualCallName() = 0; + /**************************** Array cookies ******************************/ /// Returns the extra size required in order to store the array - /// cookie for the given type. May return 0 to indicate that no + /// cookie for the given new-expression. May return 0 to indicate that no /// array cookie is required. /// /// Several cases are filtered out before this method is called: /// - non-array allocations never need a cookie - /// - calls to ::operator new(size_t, void*) never need a cookie + /// - calls to \::operator new(size_t, void*) never need a cookie /// - /// \param ElementType - the allocated type of the expression, - /// i.e. the pointee type of the expression result type + /// \param expr - the new-expression being allocated. virtual CharUnits GetArrayCookieSize(const CXXNewExpr *expr); /// Initialize the array cookie for the given allocation. @@ -209,7 +214,8 @@ public: /// \param NewPtr - a char* which is the presumed-non-null /// return value of the allocation function /// \param NumElements - the computed number of elements, - /// potentially collapsed from the multidimensional array case + /// potentially collapsed from the multidimensional array case; + /// always a size_t /// \param ElementType - the base element allocated type, /// i.e. the allocated type after stripping all array types virtual llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF, @@ -236,6 +242,27 @@ public: QualType ElementType, llvm::Value *&NumElements, llvm::Value *&AllocPtr, CharUnits &CookieSize); +protected: + /// Returns the extra size required in order to store the array + /// cookie for the given type. Assumes that an array cookie is + /// required. + virtual CharUnits getArrayCookieSizeImpl(QualType elementType); + + /// Reads the array cookie for an allocation which is known to have one. + /// This is called by the standard implementation of ReadArrayCookie. + /// + /// \param ptr - a pointer to the allocation made for an array, as a char* + /// \param cookieSize - the computed cookie size of an array + /// + /// Other parameters are as above. + /// + /// \return a size_t + virtual llvm::Value *readArrayCookieImpl(CodeGenFunction &IGF, + llvm::Value *ptr, + CharUnits cookieSize); + +public: + /*************************** Static local guards ****************************/ /// Emits the guarded initializer and destructor setup for the given @@ -249,6 +276,18 @@ public: virtual void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); + /// Emit code to force the execution of a destructor during global + /// teardown. The default implementation of this uses atexit. + /// + /// \param dtor - a function taking a single pointer argument + /// \param addr - a pointer to pass to the destructor function. + virtual void registerGlobalDtor(CodeGenFunction &CGF, llvm::Constant *dtor, + llvm::Constant *addr); + + /***************************** Virtual Tables *******************************/ + + /// Generates and emits the virtual tables for a class. + virtual void EmitVTables(const CXXRecordDecl *Class) = 0; }; /// Creates an instance of a C++ ABI class. diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 82ee4fc..7d2b9d3 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -67,39 +67,68 @@ static CanQualType GetReturnType(QualType RetTy) { return RetTy->getCanonicalTypeUnqualified().getUnqualifiedType(); } -/// Arrange the argument and result information for a value of the -/// given unprototyped function type. +/// Arrange the argument and result information for a value of the given +/// unprototyped freestanding function type. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionType(CanQual FTNP) { +CodeGenTypes::arrangeFreeFunctionType(CanQual FTNP) { // When translating an unprototyped function type, always use a // variadic type. - return arrangeFunctionType(FTNP->getResultType().getUnqualifiedType(), - ArrayRef(), - FTNP->getExtInfo(), - RequiredArgs(0)); + return arrangeLLVMFunctionInfo(FTNP->getResultType().getUnqualifiedType(), + ArrayRef(), + FTNP->getExtInfo(), + RequiredArgs(0)); } -/// Arrange the argument and result information for a value of the -/// given function type, on top of any implicit parameters already -/// stored. -static const CGFunctionInfo &arrangeFunctionType(CodeGenTypes &CGT, - SmallVectorImpl &argTypes, - CanQual FTP) { - RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, argTypes.size()); +/// Arrange the LLVM function layout for a value of the given function +/// type, on top of any implicit parameters already stored. Use the +/// given ExtInfo instead of the ExtInfo from the function type. +static const CGFunctionInfo &arrangeLLVMFunctionInfo(CodeGenTypes &CGT, + SmallVectorImpl &prefix, + CanQual FTP, + FunctionType::ExtInfo extInfo) { + RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size()); // FIXME: Kill copy. for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i) - argTypes.push_back(FTP->getArgType(i)); + prefix.push_back(FTP->getArgType(i)); CanQualType resultType = FTP->getResultType().getUnqualifiedType(); - return CGT.arrangeFunctionType(resultType, argTypes, - FTP->getExtInfo(), required); + return CGT.arrangeLLVMFunctionInfo(resultType, prefix, extInfo, required); +} + +/// Arrange the argument and result information for a free function (i.e. +/// not a C++ or ObjC instance method) of the given type. +static const CGFunctionInfo &arrangeFreeFunctionType(CodeGenTypes &CGT, + SmallVectorImpl &prefix, + CanQual FTP) { + return arrangeLLVMFunctionInfo(CGT, prefix, FTP, FTP->getExtInfo()); +} + +/// Given the formal ext-info of a C++ instance method, adjust it +/// according to the C++ ABI in effect. +static void adjustCXXMethodInfo(CodeGenTypes &CGT, + FunctionType::ExtInfo &extInfo, + bool isVariadic) { + if (extInfo.getCC() == CC_Default) { + CallingConv CC = CGT.getContext().getDefaultCXXMethodCallConv(isVariadic); + extInfo = extInfo.withCallingConv(CC); + } +} + +/// Arrange the argument and result information for a free function (i.e. +/// not a C++ or ObjC instance method) of the given type. +static const CGFunctionInfo &arrangeCXXMethodType(CodeGenTypes &CGT, + SmallVectorImpl &prefix, + CanQual FTP) { + FunctionType::ExtInfo extInfo = FTP->getExtInfo(); + adjustCXXMethodInfo(CGT, extInfo, FTP->isVariadic()); + return arrangeLLVMFunctionInfo(CGT, prefix, FTP, extInfo); } /// Arrange the argument and result information for a value of the -/// given function type. +/// given freestanding function type. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionType(CanQual FTP) { +CodeGenTypes::arrangeFreeFunctionType(CanQual FTP) { SmallVector argTypes; - return ::arrangeFunctionType(*this, argTypes, FTP); + return ::arrangeFreeFunctionType(*this, argTypes, FTP); } static CallingConv getCallingConventionForDecl(const Decl *D) { @@ -134,7 +163,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD, // Add the 'this' pointer. argTypes.push_back(GetThisType(Context, RD)); - return ::arrangeFunctionType(*this, argTypes, + return ::arrangeCXXMethodType(*this, argTypes, FTP->getCanonicalTypeUnqualified().getAs()); } @@ -154,7 +183,7 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) { return arrangeCXXMethodType(MD->getParent(), prototype.getTypePtr()); } - return arrangeFunctionType(prototype); + return arrangeFreeFunctionType(prototype); } /// Arrange the argument and result information for a declaration @@ -176,7 +205,9 @@ CodeGenTypes::arrangeCXXConstructorDeclaration(const CXXConstructorDecl *D, for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i) argTypes.push_back(FTP->getArgType(i)); - return arrangeFunctionType(resultType, argTypes, FTP->getExtInfo(), required); + FunctionType::ExtInfo extInfo = FTP->getExtInfo(); + adjustCXXMethodInfo(*this, extInfo, FTP->isVariadic()); + return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo, required); } /// Arrange the argument and result information for a declaration, @@ -193,9 +224,12 @@ CodeGenTypes::arrangeCXXDestructor(const CXXDestructorDecl *D, CanQual FTP = GetFormalType(D); assert(FTP->getNumArgs() == 0 && "dtor with formal parameters"); + assert(FTP->isVariadic() == 0 && "dtor with formal parameters"); - return arrangeFunctionType(resultType, argTypes, FTP->getExtInfo(), - RequiredArgs::All); + FunctionType::ExtInfo extInfo = FTP->getExtInfo(); + adjustCXXMethodInfo(*this, extInfo, false); + return arrangeLLVMFunctionInfo(resultType, argTypes, extInfo, + RequiredArgs::All); } /// Arrange the argument and result information for the declaration or @@ -214,14 +248,14 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) { // non-variadic type. if (isa(FTy)) { CanQual noProto = FTy.getAs(); - return arrangeFunctionType(noProto->getResultType(), - ArrayRef(), - noProto->getExtInfo(), - RequiredArgs::All); + return arrangeLLVMFunctionInfo(noProto->getResultType(), + ArrayRef(), + noProto->getExtInfo(), + RequiredArgs::All); } assert(isa(FTy)); - return arrangeFunctionType(FTy.getAs()); + return arrangeFreeFunctionType(FTy.getAs()); } /// Arrange the argument and result information for the declaration or @@ -261,8 +295,8 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD, RequiredArgs required = (MD->isVariadic() ? RequiredArgs(argTys.size()) : RequiredArgs::All); - return arrangeFunctionType(GetReturnType(MD->getResultType()), argTys, - einfo, required); + return arrangeLLVMFunctionInfo(GetReturnType(MD->getResultType()), argTys, + einfo, required); } const CGFunctionInfo & @@ -284,8 +318,8 @@ CodeGenTypes::arrangeGlobalDeclaration(GlobalDecl GD) { /// because the function might be unprototyped, in which case it's /// target-dependent in crazy ways. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionCall(const CallArgList &args, - const FunctionType *fnType) { +CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args, + const FunctionType *fnType) { RequiredArgs required = RequiredArgs::All; if (const FunctionProtoType *proto = dyn_cast(fnType)) { if (proto->isVariadic()) @@ -295,22 +329,39 @@ CodeGenTypes::arrangeFunctionCall(const CallArgList &args, required = RequiredArgs(0); } - return arrangeFunctionCall(fnType->getResultType(), args, - fnType->getExtInfo(), required); + return arrangeFreeFunctionCall(fnType->getResultType(), args, + fnType->getExtInfo(), required); +} + +const CGFunctionInfo & +CodeGenTypes::arrangeFreeFunctionCall(QualType resultType, + const CallArgList &args, + FunctionType::ExtInfo info, + RequiredArgs required) { + // FIXME: Kill copy. + SmallVector argTypes; + for (CallArgList::const_iterator i = args.begin(), e = args.end(); + i != e; ++i) + argTypes.push_back(Context.getCanonicalParamType(i->Ty)); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info, + required); } +/// Arrange a call to a C++ method, passing the given arguments. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionCall(QualType resultType, - const CallArgList &args, - const FunctionType::ExtInfo &info, - RequiredArgs required) { +CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args, + const FunctionProtoType *FPT, + RequiredArgs required) { // FIXME: Kill copy. SmallVector argTypes; for (CallArgList::const_iterator i = args.begin(), e = args.end(); i != e; ++i) argTypes.push_back(Context.getCanonicalParamType(i->Ty)); - return arrangeFunctionType(GetReturnType(resultType), argTypes, info, - required); + + FunctionType::ExtInfo info = FPT->getExtInfo(); + adjustCXXMethodInfo(*this, info, FPT->isVariadic()); + return arrangeLLVMFunctionInfo(GetReturnType(FPT->getResultType()), + argTypes, info, required); } const CGFunctionInfo & @@ -326,23 +377,23 @@ CodeGenTypes::arrangeFunctionDeclaration(QualType resultType, RequiredArgs required = (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All); - return arrangeFunctionType(GetReturnType(resultType), argTypes, info, - required); + return arrangeLLVMFunctionInfo(GetReturnType(resultType), argTypes, info, + required); } const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() { - return arrangeFunctionType(getContext().VoidTy, ArrayRef(), - FunctionType::ExtInfo(), RequiredArgs::All); + return arrangeLLVMFunctionInfo(getContext().VoidTy, ArrayRef(), + FunctionType::ExtInfo(), RequiredArgs::All); } /// Arrange the argument and result information for an abstract value /// of a given function type. This is the method which all of the /// above functions ultimately defer to. const CGFunctionInfo & -CodeGenTypes::arrangeFunctionType(CanQualType resultType, - ArrayRef argTypes, - const FunctionType::ExtInfo &info, - RequiredArgs required) { +CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType, + ArrayRef argTypes, + FunctionType::ExtInfo info, + RequiredArgs required) { #ifndef NDEBUG for (ArrayRef::const_iterator I = argTypes.begin(), E = argTypes.end(); I != E; ++I) @@ -445,10 +496,9 @@ void CodeGenTypes::GetExpandedTypes(QualType type, } else { for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i) { - const FieldDecl *FD = *i; - assert(!FD->isBitField() && + assert(!i->isBitField() && "Cannot expand structure with bit-field members."); - GetExpandedTypes(FD->getType(), expandedTypes); + GetExpandedTypes(i->getType(), expandedTypes); } } } else if (const ComplexType *CT = type->getAs()) { @@ -933,14 +983,18 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI, case ABIArgInfo::Ignore: break; - case ABIArgInfo::Indirect: - PAL.push_back(llvm::AttributeWithIndex::get(Index, - llvm::Attribute::StructRet)); + case ABIArgInfo::Indirect: { + llvm::Attributes SRETAttrs = llvm::Attribute::StructRet; + if (RetAI.getInReg()) + SRETAttrs |= llvm::Attribute::InReg; + PAL.push_back(llvm::AttributeWithIndex::get(Index, SRETAttrs)); + ++Index; // sret disables readnone and readonly FuncAttrs &= ~(llvm::Attribute::ReadOnly | llvm::Attribute::ReadNone); break; + } case ABIArgInfo::Expand: llvm_unreachable("Invalid ABI kind for return argument"); @@ -949,14 +1003,6 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI, if (RetAttrs) PAL.push_back(llvm::AttributeWithIndex::get(0, RetAttrs)); - // FIXME: RegParm should be reduced in case of global register variable. - signed RegParm; - if (FI.getHasRegParm()) - RegParm = FI.getRegParm(); - else - RegParm = CodeGenOpts.NumRegisterParameters; - - unsigned PointerWidth = getContext().getTargetInfo().getPointerWidth(0); for (CGFunctionInfo::const_arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it) { QualType ParamType = it->type; @@ -974,22 +1020,22 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI, Attrs |= llvm::Attribute::ZExt; // FALL THROUGH case ABIArgInfo::Direct: - if (RegParm > 0 && - (ParamType->isIntegerType() || ParamType->isPointerType() || - ParamType->isReferenceType())) { - RegParm -= - (Context.getTypeSize(ParamType) + PointerWidth - 1) / PointerWidth; - if (RegParm >= 0) + if (AI.getInReg()) Attrs |= llvm::Attribute::InReg; - } + // FIXME: handle sseregparm someday... // Increment Index if there is padding. Index += (AI.getPaddingType() != 0); if (llvm::StructType *STy = - dyn_cast(AI.getCoerceToType())) - Index += STy->getNumElements()-1; // 1 will be added below. + dyn_cast(AI.getCoerceToType())) { + unsigned Extra = STy->getNumElements()-1; // 1 will be added below. + if (Attrs != llvm::Attribute::None) + for (unsigned I = 0; I < Extra; ++I) + PAL.push_back(llvm::AttributeWithIndex::get(Index + I, Attrs)); + Index += Extra; + } break; case ABIArgInfo::Indirect: @@ -1355,7 +1401,8 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF, static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF, llvm::Value *result) { // This is only applicable to a method with an immutable 'self'. - const ObjCMethodDecl *method = dyn_cast(CGF.CurCodeDecl); + const ObjCMethodDecl *method = + dyn_cast_or_null(CGF.CurCodeDecl); if (!method) return 0; const VarDecl *self = method->getSelfDecl(); if (!self->getType().isConstQualified()) return 0; @@ -2066,8 +2113,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, unsigned CallingConv; CodeGen::AttributeListType AttributeList; CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList, CallingConv); - llvm::AttrListPtr Attrs = llvm::AttrListPtr::get(AttributeList.begin(), - AttributeList.end()); + llvm::AttrListPtr Attrs = llvm::AttrListPtr::get(AttributeList); llvm::BasicBlock *InvokeDest = 0; if (!(Attrs.getFnAttributes() & llvm::Attribute::NoUnwind)) diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp index 2aedf95..e37fa3a 100644 --- a/lib/CodeGen/CGClass.cpp +++ b/lib/CodeGen/CGClass.cpp @@ -105,30 +105,28 @@ CodeGenFunction::GetAddressOfDirectBaseInCompleteClass(llvm::Value *This, } static llvm::Value * -ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, llvm::Value *ThisPtr, - CharUnits NonVirtual, llvm::Value *Virtual) { - llvm::Type *PtrDiffTy = - CGF.ConvertType(CGF.getContext().getPointerDiffType()); - - llvm::Value *NonVirtualOffset = 0; - if (!NonVirtual.isZero()) - NonVirtualOffset = llvm::ConstantInt::get(PtrDiffTy, - NonVirtual.getQuantity()); - - llvm::Value *BaseOffset; - if (Virtual) { - if (NonVirtualOffset) - BaseOffset = CGF.Builder.CreateAdd(Virtual, NonVirtualOffset); - else - BaseOffset = Virtual; - } else - BaseOffset = NonVirtualOffset; +ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, llvm::Value *ptr, + CharUnits nonVirtualOffset, + llvm::Value *virtualOffset) { + // Assert that we have something to do. + assert(!nonVirtualOffset.isZero() || virtualOffset != 0); + + // Compute the offset from the static and dynamic components. + llvm::Value *baseOffset; + if (!nonVirtualOffset.isZero()) { + baseOffset = llvm::ConstantInt::get(CGF.PtrDiffTy, + nonVirtualOffset.getQuantity()); + if (virtualOffset) { + baseOffset = CGF.Builder.CreateAdd(virtualOffset, baseOffset); + } + } else { + baseOffset = virtualOffset; + } // Apply the base offset. - ThisPtr = CGF.Builder.CreateBitCast(ThisPtr, CGF.Int8PtrTy); - ThisPtr = CGF.Builder.CreateGEP(ThisPtr, BaseOffset, "add.ptr"); - - return ThisPtr; + ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy); + ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr"); + return ptr; } llvm::Value * @@ -142,72 +140,81 @@ CodeGenFunction::GetAddressOfBaseClass(llvm::Value *Value, CastExpr::path_const_iterator Start = PathBegin; const CXXRecordDecl *VBase = 0; - // Get the virtual base. + // Sema has done some convenient canonicalization here: if the + // access path involved any virtual steps, the conversion path will + // *start* with a step down to the correct virtual base subobject, + // and hence will not require any further steps. if ((*Start)->isVirtual()) { VBase = cast((*Start)->getType()->getAs()->getDecl()); ++Start; } - + + // Compute the static offset of the ultimate destination within its + // allocating subobject (the virtual base, if there is one, or else + // the "complete" object that we see). CharUnits NonVirtualOffset = ComputeNonVirtualBaseClassOffset(getContext(), VBase ? VBase : Derived, Start, PathEnd); + // If there's a virtual step, we can sometimes "devirtualize" it. + // For now, that's limited to when the derived type is final. + // TODO: "devirtualize" this for accesses to known-complete objects. + if (VBase && Derived->hasAttr()) { + const ASTRecordLayout &layout = getContext().getASTRecordLayout(Derived); + CharUnits vBaseOffset = layout.getVBaseClassOffset(VBase); + NonVirtualOffset += vBaseOffset; + VBase = 0; // we no longer have a virtual step + } + // Get the base pointer type. llvm::Type *BasePtrTy = ConvertType((PathEnd[-1])->getType())->getPointerTo(); - + + // If the static offset is zero and we don't have a virtual step, + // just do a bitcast; null checks are unnecessary. if (NonVirtualOffset.isZero() && !VBase) { - // Just cast back. return Builder.CreateBitCast(Value, BasePtrTy); } + + llvm::BasicBlock *origBB = 0; + llvm::BasicBlock *endBB = 0; - llvm::BasicBlock *CastNull = 0; - llvm::BasicBlock *CastNotNull = 0; - llvm::BasicBlock *CastEnd = 0; - + // Skip over the offset (and the vtable load) if we're supposed to + // null-check the pointer. if (NullCheckValue) { - CastNull = createBasicBlock("cast.null"); - CastNotNull = createBasicBlock("cast.notnull"); - CastEnd = createBasicBlock("cast.end"); + origBB = Builder.GetInsertBlock(); + llvm::BasicBlock *notNullBB = createBasicBlock("cast.notnull"); + endBB = createBasicBlock("cast.end"); - llvm::Value *IsNull = Builder.CreateIsNull(Value); - Builder.CreateCondBr(IsNull, CastNull, CastNotNull); - EmitBlock(CastNotNull); + llvm::Value *isNull = Builder.CreateIsNull(Value); + Builder.CreateCondBr(isNull, endBB, notNullBB); + EmitBlock(notNullBB); } + // Compute the virtual offset. llvm::Value *VirtualOffset = 0; - if (VBase) { - if (Derived->hasAttr()) { - VirtualOffset = 0; - - const ASTRecordLayout &Layout = getContext().getASTRecordLayout(Derived); - - CharUnits VBaseOffset = Layout.getVBaseClassOffset(VBase); - NonVirtualOffset += VBaseOffset; - } else - VirtualOffset = GetVirtualBaseClassOffset(Value, Derived, VBase); + VirtualOffset = GetVirtualBaseClassOffset(Value, Derived, VBase); } - // Apply the offsets. + // Apply both offsets. Value = ApplyNonVirtualAndVirtualOffset(*this, Value, NonVirtualOffset, VirtualOffset); - // Cast back. + // Cast to the destination type. Value = Builder.CreateBitCast(Value, BasePtrTy); - + + // Build a phi if we needed a null check. if (NullCheckValue) { - Builder.CreateBr(CastEnd); - EmitBlock(CastNull); - Builder.CreateBr(CastEnd); - EmitBlock(CastEnd); + llvm::BasicBlock *notNullBB = Builder.GetInsertBlock(); + Builder.CreateBr(endBB); + EmitBlock(endBB); - llvm::PHINode *PHI = Builder.CreatePHI(Value->getType(), 2); - PHI->addIncoming(Value, CastNotNull); - PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()), - CastNull); + llvm::PHINode *PHI = Builder.CreatePHI(BasePtrTy, 2, "cast.result"); + PHI->addIncoming(Value, notNullBB); + PHI->addIncoming(llvm::Constant::getNullValue(BasePtrTy), origBB); Value = PHI; } @@ -556,16 +563,19 @@ static void EmitMemberInitializer(CodeGenFunction &CGF, llvm::Value *ThisPtr = CGF.LoadCXXThis(); QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl); - LValue LHS; + LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); - // If we are initializing an anonymous union field, drill down to the field. if (MemberInit->isIndirectMemberInitializer()) { - LHS = CGF.EmitLValueForAnonRecordField(ThisPtr, - MemberInit->getIndirectMember(), 0); + // If we are initializing an anonymous union field, drill down to + // the field. + IndirectFieldDecl *IndirectField = MemberInit->getIndirectMember(); + IndirectFieldDecl::chain_iterator I = IndirectField->chain_begin(), + IEnd = IndirectField->chain_end(); + for ( ; I != IEnd; ++I) + LHS = CGF.EmitLValueForFieldInitialization(LHS, cast(*I)); FieldType = MemberInit->getIndirectMember()->getAnonField()->getType(); } else { - LValue ThisLHSLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy); - LHS = CGF.EmitLValueForFieldInitialization(ThisLHSLV, Field); + LHS = CGF.EmitLValueForFieldInitialization(LHS, Field); } // Special case: if we are in a copy or move constructor, and we are copying @@ -717,7 +727,8 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) { // Before we go any further, try the complete->base constructor // delegation optimization. - if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor)) { + if (CtorType == Ctor_Complete && IsConstructorDelegationValid(Ctor) && + CGM.getContext().getTargetInfo().getCXXABI() != CXXABI_Microsoft) { if (CGDebugInfo *DI = getDebugInfo()) DI->EmitLocation(Builder, Ctor->getLocEnd()); EmitDelegateCXXConstructorCall(Ctor, Ctor_Base, Args); @@ -916,7 +927,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) { // Enter the cleanup scopes for virtual bases. EnterDtorCleanups(Dtor, Dtor_Complete); - if (!isTryBody) { + if (!isTryBody && CGM.getContext().getTargetInfo().getCXXABI() != CXXABI_Microsoft) { EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false, LoadCXXThis()); break; @@ -1226,7 +1237,8 @@ CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D, CallExpr::const_arg_iterator ArgEnd) { CGDebugInfo *DI = getDebugInfo(); - if (DI && CGM.getCodeGenOpts().LimitDebugInfo) { + if (DI && + CGM.getCodeGenOpts().DebugInfo == CodeGenOptions::LimitedDebugInfo) { // If debug info for this class has not been emitted then this is the // right time to do so. const CXXRecordDecl *Parent = D->getParent(); @@ -1308,8 +1320,8 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D, EmitCallArg(Args, *Arg, ArgType); } - EmitCall(CGM.getTypes().arrangeFunctionCall(Args, FPT), Callee, - ReturnValueSlot(), Args, D); + EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All), + Callee, ReturnValueSlot(), Args, D); } void @@ -1742,38 +1754,42 @@ CodeGenFunction::EmitCXXOperatorMemberCallee(const CXXOperatorCallExpr *E, return CGM.GetAddrOfFunction(MD, fnType); } -void CodeGenFunction::EmitForwardingCallToLambda(const CXXRecordDecl *Lambda, - CallArgList &CallArgs) { +void CodeGenFunction::EmitForwardingCallToLambda(const CXXRecordDecl *lambda, + CallArgList &callArgs) { // Lookup the call operator - DeclarationName Name + DeclarationName operatorName = getContext().DeclarationNames.getCXXOperatorName(OO_Call); - DeclContext::lookup_const_result Calls = Lambda->lookup(Name); - CXXMethodDecl *CallOperator = cast(*Calls.first++); - const FunctionProtoType *FPT = - CallOperator->getType()->getAs(); - QualType ResultType = FPT->getResultType(); + CXXMethodDecl *callOperator = + cast(*lambda->lookup(operatorName).first); // Get the address of the call operator. - GlobalDecl GD(CallOperator); - const CGFunctionInfo &CalleeFnInfo = - CGM.getTypes().arrangeFunctionCall(ResultType, CallArgs, FPT->getExtInfo(), - RequiredArgs::forPrototypePlus(FPT, 1)); - llvm::Type *Ty = CGM.getTypes().GetFunctionType(CalleeFnInfo); - llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty); - - // Determine whether we have a return value slot to use. - ReturnValueSlot Slot; - if (!ResultType->isVoidType() && - CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::Indirect && - hasAggregateLLVMType(CurFnInfo->getReturnType())) - Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified()); + const CGFunctionInfo &calleeFnInfo = + CGM.getTypes().arrangeCXXMethodDeclaration(callOperator); + llvm::Value *callee = + CGM.GetAddrOfFunction(GlobalDecl(callOperator), + CGM.getTypes().GetFunctionType(calleeFnInfo)); + + // Prepare the return slot. + const FunctionProtoType *FPT = + callOperator->getType()->castAs(); + QualType resultType = FPT->getResultType(); + ReturnValueSlot returnSlot; + if (!resultType->isVoidType() && + calleeFnInfo.getReturnInfo().getKind() == ABIArgInfo::Indirect && + hasAggregateLLVMType(calleeFnInfo.getReturnType())) + returnSlot = ReturnValueSlot(ReturnValue, resultType.isVolatileQualified()); + + // We don't need to separately arrange the call arguments because + // the call can't be variadic anyway --- it's impossible to forward + // variadic arguments. // Now emit our call. - RValue RV = EmitCall(CalleeFnInfo, Callee, Slot, CallArgs, CallOperator); + RValue RV = EmitCall(calleeFnInfo, callee, returnSlot, + callArgs, callOperator); - // Forward the returned value - if (!ResultType->isVoidType() && Slot.isNull()) - EmitReturnOfRValue(RV, ResultType); + // If necessary, copy the returned value into the slot. + if (!resultType->isVoidType() && returnSlot.isNull()) + EmitReturnOfRValue(RV, resultType); } void CodeGenFunction::EmitLambdaBlockInvokeBody() { diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp index b00e2a2..f9ea7e0 100644 --- a/lib/CodeGen/CGCleanup.cpp +++ b/lib/CodeGen/CGCleanup.cpp @@ -831,8 +831,12 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { EmitBlock(EHEntry); - cleanupFlags.setIsForEHCleanup(); - EmitCleanup(*this, Fn, cleanupFlags, EHActiveFlag); + // We only actually emit the cleanup code if the cleanup is either + // active or was used before it was deactivated. + if (EHActiveFlag || IsActive) { + cleanupFlags.setIsForEHCleanup(); + EmitCleanup(*this, Fn, cleanupFlags, EHActiveFlag); + } Builder.CreateBr(getEHDispatchBlock(EHParent)); diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h index 7726e44..d8dbe41 100644 --- a/lib/CodeGen/CGCleanup.h +++ b/lib/CodeGen/CGCleanup.h @@ -131,7 +131,7 @@ public: /// A scope which attempts to handle some, possibly all, types of /// exceptions. /// -/// Objective C @finally blocks are represented using a cleanup scope +/// Objective C \@finally blocks are represented using a cleanup scope /// after the catch scope. class EHCatchScope : public EHScope { // In effect, we have a flexible array member diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index d286d24..00127ac 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -320,7 +320,7 @@ void CGDebugInfo::CreateCompileUnit() { // Figure out which version of the ObjC runtime we have. unsigned RuntimeVers = 0; if (LO.ObjC1) - RuntimeVers = LO.ObjCNonFragileABI ? 2 : 1; + RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1; // Create new compile unit. DBuilder.createCompileUnit( @@ -335,7 +335,7 @@ void CGDebugInfo::CreateCompileUnit() { /// one if necessary. llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) { unsigned Encoding = 0; - const char *BTName = NULL; + StringRef BTName; switch (BT->getKind()) { #define BUILTIN_TYPE(Id, SingletonId) #define PLACEHOLDER_TYPE(Id, SingletonId) \ @@ -350,8 +350,8 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) { return llvm::DIType(); case BuiltinType::ObjCClass: return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", getOrCreateMainFile(), - 0); + "objc_class", TheCU, + getOrCreateMainFile(), 0); case BuiltinType::ObjCId: { // typedef struct objc_class *Class; // typedef struct objc_object { @@ -361,8 +361,7 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) { // TODO: Cache these two types to avoid duplicates. llvm::DIType OCTy = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_class", getOrCreateMainFile(), - 0); + "objc_class", TheCU, getOrCreateMainFile(), 0); unsigned Size = CGM.getContext().getTypeSize(CGM.getContext().VoidPtrTy); llvm::DIType ISATy = DBuilder.createPointerType(OCTy, Size); @@ -382,7 +381,7 @@ llvm::DIType CGDebugInfo::CreateType(const BuiltinType *BT) { case BuiltinType::ObjCSel: { return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - "objc_selector", getOrCreateMainFile(), + "objc_selector", TheCU, getOrCreateMainFile(), 0); } case BuiltinType::UChar: @@ -514,7 +513,7 @@ llvm::DIType CGDebugInfo::createRecordFwdDecl(const RecordDecl *RD, llvm_unreachable("Unknown RecordDecl type!"); // Create the type. - return DBuilder.createForwardDecl(Tag, RDName, DefUnit, Line); + return DBuilder.createForwardDecl(Tag, RDName, Ctx, DefUnit, Line); } // Walk up the context chain and create forward decls for record decls, @@ -547,7 +546,7 @@ llvm::DIDescriptor CGDebugInfo::createContextChain(const Decl *Context) { /// then emit record's fwd if debug info size reduction is enabled. llvm::DIType CGDebugInfo::CreatePointeeType(QualType PointeeTy, llvm::DIFile Unit) { - if (!CGM.getCodeGenOpts().LimitDebugInfo) + if (CGM.getCodeGenOpts().DebugInfo != CodeGenOptions::LimitedDebugInfo) return getOrCreateType(PointeeTy, Unit); // Limit debug info for the pointee type. @@ -577,8 +576,10 @@ llvm::DIType CGDebugInfo::CreatePointerLikeType(unsigned Tag, const Type *Ty, QualType PointeeTy, llvm::DIFile Unit) { - if (Tag == llvm::dwarf::DW_TAG_reference_type) - return DBuilder.createReferenceType(CreatePointeeType(PointeeTy, Unit)); + if (Tag == llvm::dwarf::DW_TAG_reference_type || + Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) + return DBuilder.createReferenceType(Tag, + CreatePointeeType(PointeeTy, Unit)); // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here @@ -683,15 +684,13 @@ llvm::DIType CGDebugInfo::CreateType(const FunctionType *Ty, // FIXME: IF NOT, HOW IS THIS REPRESENTED? llvm-gcc doesn't represent '...'! if (isa(Ty)) EltTys.push_back(DBuilder.createUnspecifiedParameter()); - else if (const FunctionProtoType *FTP = dyn_cast(Ty)) { - for (unsigned i = 0, e = FTP->getNumArgs(); i != e; ++i) - EltTys.push_back(getOrCreateType(FTP->getArgType(i), Unit)); + else if (const FunctionProtoType *FPT = dyn_cast(Ty)) { + for (unsigned i = 0, e = FPT->getNumArgs(); i != e; ++i) + EltTys.push_back(getOrCreateType(FPT->getArgType(i), Unit)); } llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(EltTys); - - llvm::DIType DbgTy = DBuilder.createSubroutineType(Unit, EltTypeArray); - return DbgTy; + return DBuilder.createSubroutineType(Unit, EltTypeArray); } @@ -765,7 +764,7 @@ CollectRecordFields(const RecordDecl *record, llvm::DIFile tunit, const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(record); const CXXRecordDecl *CXXDecl = dyn_cast(record); - // For C++11 Lambdas a Fields will be the same as a Capture, but the Capture + // For C++11 Lambdas a Field will be the same as a Capture, but the Capture // has the name and the location of the variable so we should iterate over // both concurrently. if (CXXDecl && CXXDecl->isLambda()) { @@ -912,7 +911,7 @@ CGDebugInfo::CreateCXXMemberFunction(const CXXMethodDecl *Method, StringRef MethodName = getFunctionName(Method); llvm::DIType MethodTy = getOrCreateMethodType(Method, Unit); - + // Since a single ctor/dtor corresponds to multiple functions, it doesn't // make sense to give a single ctor/dtor a linkage name. StringRef MethodLinkageName; @@ -992,15 +991,17 @@ CollectCXXMemberFunctions(const CXXRecordDecl *RD, llvm::DIFile Unit, if (D->isImplicit() && !D->isUsed()) continue; - if (const CXXMethodDecl *Method = dyn_cast(D)) - EltTys.push_back(CreateCXXMemberFunction(Method, Unit, RecordTy)); + if (const CXXMethodDecl *Method = dyn_cast(D)) { + // Only emit debug information for user provided functions, we're + // unlikely to want info for artificial functions. + if (Method->isUserProvided()) + EltTys.push_back(CreateCXXMemberFunction(Method, Unit, RecordTy)); + } else if (FunctionTemplateDecl *FTD = dyn_cast(D)) for (FunctionTemplateDecl::spec_iterator SI = FTD->spec_begin(), - SE = FTD->spec_end(); SI != SE; ++SI) { - FunctionDecl *FD = *SI; - if (CXXMethodDecl *M = dyn_cast(FD)) - EltTys.push_back(CreateCXXMemberFunction(M, Unit, RecordTy)); - } + SE = FTD->spec_end(); SI != SE; ++SI) + EltTys.push_back(CreateCXXMemberFunction(cast(*SI), Unit, + RecordTy)); } } @@ -1047,7 +1048,7 @@ CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile Unit, .getVirtualBaseOffsetOffset(RD, Base).getQuantity(); BFlags = llvm::DIDescriptor::FlagVirtual; } else - BaseOffset = RL.getBaseClassOffsetInBits(Base); + BaseOffset = CGM.getContext().toBits(RL.getBaseClassOffset(Base)); // FIXME: Inconsistent units for BaseOffset. It is in bytes when // BI->isVirtual() and bits when not. @@ -1083,7 +1084,7 @@ CollectTemplateParams(const TemplateParameterList *TPList, llvm::DIType TTy = getOrCreateType(TA.getIntegralType(), Unit); llvm::DITemplateValueParameter TVP = DBuilder.createTemplateValueParameter(TheCU, ND->getName(), TTy, - TA.getAsIntegral()->getZExtValue()); + TA.getAsIntegral().getZExtValue()); TemplateParams.push_back(TVP); } } @@ -1177,6 +1178,7 @@ CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile Unit, /// getOrCreateRecordType - Emit record type's standalone debug info. llvm::DIType CGDebugInfo::getOrCreateRecordType(QualType RTy, SourceLocation Loc) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); llvm::DIType T = getOrCreateType(RTy, getOrCreateFile(Loc)); return T; } @@ -1185,6 +1187,7 @@ llvm::DIType CGDebugInfo::getOrCreateRecordType(QualType RTy, /// debug info. llvm::DIType CGDebugInfo::getOrCreateInterfaceType(QualType D, SourceLocation Loc) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); llvm::DIType T = getOrCreateType(D, getOrCreateFile(Loc)); DBuilder.retainType(T); return T; @@ -1287,7 +1290,7 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty, if (!Def) { llvm::DIType FwdDecl = DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type, - ID->getName(), DefUnit, Line, + ID->getName(), TheCU, DefUnit, Line, RuntimeLang); return FwdDecl; } @@ -1385,8 +1388,8 @@ llvm::DIType CGDebugInfo::CreateType(const ObjCInterfaceType *Ty, // the non-fragile abi and the debugger should ignore the value anyways. // Call it the FieldNo+1 due to how debuggers use the information, // e.g. negating the value when it needs a lookup in the dynamic table. - uint64_t FieldOffset = CGM.getLangOpts().ObjCNonFragileABI ? FieldNo+1 - : RL.getFieldOffset(FieldNo); + uint64_t FieldOffset = CGM.getLangOpts().ObjCRuntime.isNonFragile() + ? FieldNo+1 : RL.getFieldOffset(FieldNo); unsigned Flags = 0; if (Field->getAccessControl() == ObjCIvarDecl::Protected) @@ -1456,7 +1459,6 @@ llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty, uint64_t Size; uint64_t Align; - // FIXME: make getTypeAlign() aware of VLAs and incomplete array types if (const VariableArrayType *VAT = dyn_cast(Ty)) { Size = 0; @@ -1464,7 +1466,10 @@ llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty, CGM.getContext().getTypeAlign(CGM.getContext().getBaseElementType(VAT)); } else if (Ty->isIncompleteArrayType()) { Size = 0; - Align = CGM.getContext().getTypeAlign(Ty->getElementType()); + if (Ty->getElementType()->isIncompleteType()) + Align = 0; + else + Align = CGM.getContext().getTypeAlign(Ty->getElementType()); } else if (Ty->isDependentSizedArrayType() || Ty->isIncompleteType()) { Size = 0; Align = 0; @@ -1479,25 +1484,21 @@ llvm::DIType CGDebugInfo::CreateType(const ArrayType *Ty, // obvious/recursive way? SmallVector Subscripts; QualType EltTy(Ty, 0); - if (Ty->isIncompleteArrayType()) + while ((Ty = dyn_cast(EltTy))) { + int64_t UpperBound = 0; + int64_t LowerBound = 0; + if (const ConstantArrayType *CAT = dyn_cast(Ty)) { + if (CAT->getSize().getZExtValue()) + UpperBound = CAT->getSize().getZExtValue() - 1; + } else + // This is an unbounded array. Use Low = 1, Hi = 0 to express such + // arrays. + LowerBound = 1; + + // FIXME: Verify this is right for VLAs. + Subscripts.push_back(DBuilder.getOrCreateSubrange(LowerBound, + UpperBound)); EltTy = Ty->getElementType(); - else { - while ((Ty = dyn_cast(EltTy))) { - int64_t UpperBound = 0; - int64_t LowerBound = 0; - if (const ConstantArrayType *CAT = dyn_cast(Ty)) { - if (CAT->getSize().getZExtValue()) - UpperBound = CAT->getSize().getZExtValue() - 1; - } else - // This is an unbounded array. Use Low = 1, Hi = 0 to express such - // arrays. - LowerBound = 1; - - // FIXME: Verify this is right for VLAs. - Subscripts.push_back(DBuilder.getOrCreateSubrange(LowerBound, - UpperBound)); - EltTy = Ty->getElementType(); - } } llvm::DIArray SubscriptArray = DBuilder.getOrCreateArray(Subscripts); @@ -1537,7 +1538,7 @@ llvm::DIType CGDebugInfo::CreateType(const MemberPointerType *Ty, uint64_t FieldOffset = 0; llvm::Value *ElementTypes[2]; - // FIXME: This should probably be a function type instead. + // FIXME: This should be a DW_TAG_pointer_to_member type. ElementTypes[0] = DBuilder.createMemberType(U, "ptr", U, 0, Info.first, Info.second, FieldOffset, 0, @@ -1565,7 +1566,6 @@ llvm::DIType CGDebugInfo::CreateType(const AtomicType *Ty, /// CreateEnumType - get enumeration type. llvm::DIType CGDebugInfo::CreateEnumType(const EnumDecl *ED) { - llvm::DIFile Unit = getOrCreateFile(ED->getLocation()); SmallVector Enumerators; // Create DIEnumerator elements for each enumerator. @@ -1590,9 +1590,13 @@ llvm::DIType CGDebugInfo::CreateEnumType(const EnumDecl *ED) { } llvm::DIDescriptor EnumContext = getContextDescriptor(cast(ED->getDeclContext())); + llvm::DIType ClassTy = ED->isScopedUsingClassTag() ? + getOrCreateType(ED->getIntegerType(), DefUnit) : llvm::DIType(); + unsigned Flags = !ED->isCompleteDefinition() ? llvm::DIDescriptor::FlagFwdDecl : 0; llvm::DIType DbgTy = DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit, Line, - Size, Align, EltArray); + Size, Align, EltArray, + ClassTy, Flags); return DbgTy; } @@ -1626,8 +1630,13 @@ static QualType UnwrapTypeForDebugInfo(QualType T) { case Type::Paren: T = cast(T)->getInnerType(); break; - case Type::SubstTemplateTypeParm: + case Type::SubstTemplateTypeParm: { + // We need to keep the qualifiers handy since getReplacementType() + // will strip them away. + unsigned Quals = T.getLocalFastQualifiers(); T = cast(T)->getReplacementType(); + T.addFastQualifiers(Quals); + } break; case Type::Auto: T = cast(T)->getDeducedType(); @@ -1686,23 +1695,26 @@ llvm::DIType CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile Unit) { // Unwrap the type as needed for debug information. Ty = UnwrapTypeForDebugInfo(Ty); - + llvm::DIType T = getCompletedTypeOrNull(Ty); - if (T.Verify()) return T; + if (T.Verify()) + return T; // Otherwise create the type. llvm::DIType Res = CreateTypeNode(Ty, Unit); llvm::DIType TC = getTypeOrNull(Ty); if (TC.Verify() && TC.isForwardDecl()) - ReplaceMap.push_back(std::make_pair(Ty.getAsOpaquePtr(), TC)); + ReplaceMap.push_back(std::make_pair(Ty.getAsOpaquePtr(), + static_cast(TC))); // And update the type cache. TypeCache[Ty.getAsOpaquePtr()] = Res; if (!Res.isForwardDecl()) CompletedTypeCache[Ty.getAsOpaquePtr()] = Res; + return Res; } @@ -1807,7 +1819,8 @@ llvm::DIType CGDebugInfo::getOrCreateLimitedType(QualType Ty, llvm::DIType Res = CreateLimitedTypeNode(Ty, Unit); if (T.Verify() && T.isForwardDecl()) - ReplaceMap.push_back(std::make_pair(Ty.getAsOpaquePtr(), T)); + ReplaceMap.push_back(std::make_pair(Ty.getAsOpaquePtr(), + static_cast(T))); // And update the type cache. TypeCache[Ty.getAsOpaquePtr()] = Res; @@ -1824,7 +1837,7 @@ llvm::DIType CGDebugInfo::CreateLimitedType(const RecordType *Ty) { StringRef RDName = RD->getName(); llvm::DIDescriptor RDContext; - if (CGM.getCodeGenOpts().LimitDebugInfo) + if (CGM.getCodeGenOpts().DebugInfo == CodeGenOptions::LimitedDebugInfo) RDContext = createContextChain(cast(RD->getDeclContext())); else RDContext = getContextDescriptor(cast(RD->getDeclContext())); @@ -1953,6 +1966,7 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DIType CGDebugInfo::getOrCreateFunctionType(const Decl * D, QualType FnType, llvm::DIFile F) { + if (const CXXMethodDecl *Method = dyn_cast(D)) return getOrCreateMethodType(Method, F); if (const ObjCMethodDecl *OMethod = dyn_cast(D)) { @@ -2013,18 +2027,21 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType, LinkageName = CGM.getMangledName(GD); Flags |= llvm::DIDescriptor::FlagPrototyped; } - if (LinkageName == Name) + if (LinkageName == Name || + CGM.getCodeGenOpts().DebugInfo <= CodeGenOptions::DebugLineTablesOnly) LinkageName = StringRef(); - if (const NamespaceDecl *NSDecl = - dyn_cast_or_null(FD->getDeclContext())) - FDContext = getOrCreateNameSpace(NSDecl); - else if (const RecordDecl *RDecl = - dyn_cast_or_null(FD->getDeclContext())) - FDContext = getContextDescriptor(cast(RDecl->getDeclContext())); + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { + if (const NamespaceDecl *NSDecl = + dyn_cast_or_null(FD->getDeclContext())) + FDContext = getOrCreateNameSpace(NSDecl); + else if (const RecordDecl *RDecl = + dyn_cast_or_null(FD->getDeclContext())) + FDContext = getContextDescriptor(cast(RDecl->getDeclContext())); - // Collect template parameters. - TParamsArray = CollectFunctionTemplateParams(FD, Unit); + // Collect template parameters. + TParamsArray = CollectFunctionTemplateParams(FD, Unit); + } } else if (const ObjCMethodDecl *OMD = dyn_cast(D)) { Name = getObjCMethodName(OMD); Flags |= llvm::DIDescriptor::FlagPrototyped; @@ -2040,14 +2057,27 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType, if (D->isImplicit()) Flags |= llvm::DIDescriptor::FlagArtificial; - llvm::DISubprogram SPDecl = getFunctionDeclaration(D); - llvm::DISubprogram SP = - DBuilder.createFunction(FDContext, Name, LinkageName, Unit, - LineNo, getOrCreateFunctionType(D, FnType, Unit), - Fn->hasInternalLinkage(), true/*definition*/, - getLineNumber(CurLoc), - Flags, CGM.getLangOpts().Optimize, Fn, - TParamsArray, SPDecl); + llvm::DIType DIFnType; + llvm::DISubprogram SPDecl; + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { + DIFnType = getOrCreateFunctionType(D, FnType, Unit); + SPDecl = getFunctionDeclaration(D); + } else { + // Create fake but valid subroutine type. Otherwise + // llvm::DISubprogram::Verify() would return false, and + // subprogram DIE will miss DW_AT_decl_file and + // DW_AT_decl_line fields. + SmallVector Elts; + llvm::DIArray EltTypeArray = DBuilder.getOrCreateArray(Elts); + DIFnType = DBuilder.createSubroutineType(Unit, EltTypeArray); + } + llvm::DISubprogram SP; + SP = DBuilder.createFunction(FDContext, Name, LinkageName, Unit, + LineNo, DIFnType, + Fn->hasInternalLinkage(), true/*definition*/, + getLineNumber(CurLoc), Flags, + CGM.getLangOpts().Optimize, + Fn, TParamsArray, SPDecl); // Push function on region stack. llvm::MDNode *SPN = SP; @@ -2205,6 +2235,7 @@ llvm::DIType CGDebugInfo::EmitTypeForVarWithBlocksAttr(const ValueDecl *VD, void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag, llvm::Value *Storage, unsigned ArgNo, CGBuilderTy &Builder) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); llvm::DIFile Unit = getOrCreateFile(VD->getLocation()); @@ -2224,14 +2255,14 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag, // If Storage is an aggregate returned as 'sret' then let debugger know // about this. if (Arg->hasStructRetAttr()) - Ty = DBuilder.createReferenceType(Ty); + Ty = DBuilder.createReferenceType(llvm::dwarf::DW_TAG_reference_type, Ty); else if (CXXRecordDecl *Record = VD->getType()->getAsCXXRecordDecl()) { // If an aggregate variable has non trivial destructor or non trivial copy // constructor than it is pass indirectly. Let debug info know about this // by using reference of the aggregate type as a argument type. if (!Record->hasTrivialCopyConstructor() || !Record->hasTrivialDestructor()) - Ty = DBuilder.createReferenceType(Ty); + Ty = DBuilder.createReferenceType(llvm::dwarf::DW_TAG_reference_type, Ty); } } @@ -2272,8 +2303,25 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag, DBuilder.insertDeclare(Storage, D, Builder.GetInsertBlock()); Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope)); return; - } + } else if (isa(VD->getType())) { + // These are "complex" variables in that they need an op_deref. // Create the descriptor for the variable. + llvm::Value *Addr = llvm::ConstantInt::get(CGM.Int64Ty, + llvm::DIBuilder::OpDeref); + llvm::DIVariable D = + DBuilder.createComplexVariable(Tag, + llvm::DIDescriptor(Scope), + Name, Unit, Line, Ty, + Addr, ArgNo); + + // Insert an llvm.dbg.declare into the current block. + llvm::Instruction *Call = + DBuilder.insertDeclare(Storage, D, Builder.GetInsertBlock()); + Call->setDebugLoc(llvm::DebugLoc::get(Line, Column, Scope)); + return; + } + + // Create the descriptor for the variable. llvm::DIVariable D = DBuilder.createLocalVariable(Tag, llvm::DIDescriptor(Scope), Name, Unit, Line, Ty, @@ -2321,12 +2369,14 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, unsigned Tag, void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); EmitDeclare(VD, llvm::dwarf::DW_TAG_auto_variable, Storage, 0, Builder); } void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder, const CGBlockInfo &blockInfo) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!"); if (Builder.GetInsertBlock() == 0) @@ -2387,6 +2437,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable( void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI, unsigned ArgNo, CGBuilderTy &Builder) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); EmitDeclare(VD, llvm::dwarf::DW_TAG_arg_variable, AI, ArgNo, Builder); } @@ -2403,6 +2454,7 @@ namespace { void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, llvm::Value *addr, CGBuilderTy &Builder) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); ASTContext &C = CGM.getContext(); const BlockDecl *blockDecl = block.getBlockDecl(); @@ -2547,6 +2599,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block, /// EmitGlobalVariable - Emit information about a global variable. void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, const VarDecl *D) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); // Create global variable debug descriptor. llvm::DIFile Unit = getOrCreateFile(D->getLocation()); unsigned LineNo = getLineNumber(D->getLocation()); @@ -2557,9 +2610,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, if (T->isIncompleteArrayType()) { // CodeGen turns int[] into int[1] so we'll do the same here. - llvm::APSInt ConstVal(32); - - ConstVal = 1; + llvm::APInt ConstVal(32, 1); QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); T = CGM.getContext().getConstantArrayType(ET, ConstVal, @@ -2582,6 +2633,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, /// EmitGlobalVariable - Emit information about an objective-c interface. void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, ObjCInterfaceDecl *ID) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); // Create global variable debug descriptor. llvm::DIFile Unit = getOrCreateFile(ID->getLocation()); unsigned LineNo = getLineNumber(ID->getLocation()); @@ -2592,9 +2644,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, if (T->isIncompleteArrayType()) { // CodeGen turns int[] into int[1] so we'll do the same here. - llvm::APSInt ConstVal(32); - - ConstVal = 1; + llvm::APInt ConstVal(32, 1); QualType ET = CGM.getContext().getAsArrayType(T)->getElementType(); T = CGM.getContext().getConstantArrayType(ET, ConstVal, @@ -2609,13 +2659,15 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, /// EmitGlobalVariable - Emit global variable's debug info. void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, llvm::Constant *Init) { + assert(CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo); // Create the descriptor for the variable. llvm::DIFile Unit = getOrCreateFile(VD->getLocation()); StringRef Name = VD->getName(); llvm::DIType Ty = getOrCreateType(VD->getType(), Unit); if (const EnumConstantDecl *ECD = dyn_cast(VD)) { - if (const EnumDecl *ED = dyn_cast(ECD->getDeclContext())) - Ty = CreateEnumType(ED); + const EnumDecl *ED = cast(ECD->getDeclContext()); + assert(isa(ED->getTypeForDecl()) && "Enum without EnumType?"); + Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit); } // Do not use DIGlobalVariable for enums. if (Ty.getTag() == llvm::dwarf::DW_TAG_enumeration_type) diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h index ec7705c..44cc49a 100644 --- a/lib/CodeGen/CGDebugInfo.h +++ b/lib/CodeGen/CGDebugInfo.h @@ -17,9 +17,9 @@ #include "clang/AST/Type.h" #include "clang/AST/Expr.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/DebugInfo.h" +#include "llvm/DIBuilder.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Allocator.h" @@ -30,6 +30,7 @@ namespace llvm { } namespace clang { + class CXXMethodDecl; class VarDecl; class ObjCInterfaceDecl; class ClassTemplateSpecializationDecl; diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp index 6447779..be6638e 100644 --- a/lib/CodeGen/CGDecl.cpp +++ b/lib/CodeGen/CGDecl.cpp @@ -188,11 +188,15 @@ CodeGenFunction::CreateStaticVarDecl(const VarDecl &D, new llvm::GlobalVariable(CGM.getModule(), LTy, Ty.isConstant(getContext()), Linkage, CGM.EmitNullConstant(D.getType()), Name, 0, - D.isThreadSpecified(), + llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(Ty)); GV->setAlignment(getContext().getDeclAlign(&D).getQuantity()); if (Linkage != llvm::GlobalValue::InternalLinkage) GV->setVisibility(CurFn->getVisibility()); + + if (D.isThreadSpecified()) + CGM.setTLSMode(GV, D); + return GV; } @@ -239,7 +243,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D, OldGV->isConstant(), OldGV->getLinkage(), Init, "", /*InsertBefore*/ OldGV, - D.isThreadSpecified(), + OldGV->getThreadLocalMode(), CGM.getContext().getTargetAddressSpace(D.getType())); GV->setVisibility(OldGV->getVisibility()); @@ -326,7 +330,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D, // Emit global variable debug descriptor for static vars. CGDebugInfo *DI = getDebugInfo(); - if (DI) { + if (DI && + CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { DI->setLocation(D.getLocation()); DI->EmitGlobalVariable(var, &D); } @@ -489,6 +494,14 @@ static bool isAccessedBy(const VarDecl &var, const Stmt *s) { if (const DeclRefExpr *ref = dyn_cast(e)) return (ref->getDecl() == &var); + if (const BlockExpr *be = dyn_cast(e)) { + const BlockDecl *block = be->getBlockDecl(); + for (BlockDecl::capture_const_iterator i = block->capture_begin(), + e = block->capture_end(); i != e; ++i) { + if (i->getVariable() == &var) + return true; + } + } } for (Stmt::const_child_range children = s->children(); children; ++children) @@ -897,11 +910,14 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // Emit debug info for local var declaration. if (HaveInsertPoint()) if (CGDebugInfo *DI = getDebugInfo()) { - DI->setLocation(D.getLocation()); - if (Target.useGlobalsForAutomaticVariables()) { - DI->EmitGlobalVariable(static_cast(DeclPtr), &D); - } else - DI->EmitDeclareOfAutoVariable(&D, DeclPtr, Builder); + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { + DI->setLocation(D.getLocation()); + if (Target.useGlobalsForAutomaticVariables()) { + DI->EmitGlobalVariable(static_cast(DeclPtr), + &D); + } else + DI->EmitDeclareOfAutoVariable(&D, DeclPtr, Builder); + } } if (D.hasAttr()) @@ -1054,7 +1070,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { llvm::GlobalVariable *GV = new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true, llvm::GlobalValue::PrivateLinkage, - constant, Name, 0, false, 0); + constant, Name); GV->setAlignment(alignment.getQuantity()); GV->setUnnamedAddr(true); @@ -1477,8 +1493,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg, LocalDeclMap[&D] = Arg; if (CGDebugInfo *DI = getDebugInfo()) { - DI->setLocation(D.getLocation()); - DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, Arg, Builder); + if (CGM.getCodeGenOpts().DebugInfo >= + CodeGenOptions::LimitedDebugInfo) { + DI->setLocation(D.getLocation()); + DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, Arg, Builder); + } } return; @@ -1556,8 +1575,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg, DMEntry = DeclPtr; // Emit debug info for param declaration. - if (CGDebugInfo *DI = getDebugInfo()) - DI->EmitDeclareOfArgVariable(&D, DeclPtr, ArgNo, Builder); + if (CGDebugInfo *DI = getDebugInfo()) { + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) { + DI->EmitDeclareOfArgVariable(&D, DeclPtr, ArgNo, Builder); + } + } if (D.hasAttr()) EmitVarAnnotations(&D, DeclPtr); diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp index 10f0b83..492b95a 100644 --- a/lib/CodeGen/CGDeclCXX.cpp +++ b/lib/CodeGen/CGDeclCXX.cpp @@ -98,7 +98,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, argument = llvm::Constant::getNullValue(CGF.Int8PtrTy); } - CGF.EmitCXXGlobalDtorRegistration(function, argument); + CGM.getCXXABI().registerGlobalDtor(CGF, function, argument); } /// Emit code to cause the variable at the given address to be considered as @@ -145,39 +145,6 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D, EmitStoreOfScalar(RV.getScalarVal(), DeclPtr, false, Alignment, T); } -/// Register a global destructor using __cxa_atexit. -static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, - llvm::Constant *dtor, - llvm::Constant *addr) { - // We're assuming that the destructor function is something we can - // reasonably call with the default CC. Go ahead and cast it to the - // right prototype. - llvm::Type *dtorTy = - llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo(); - - // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); - llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy }; - llvm::FunctionType *atexitTy = - llvm::FunctionType::get(CGF.IntTy, paramTys, false); - - // Fetch the actual function. - llvm::Constant *atexit = - CGF.CGM.CreateRuntimeFunction(atexitTy, "__cxa_atexit"); - if (llvm::Function *fn = dyn_cast(atexit)) - fn->setDoesNotThrow(); - - // Create a variable that binds the atexit to this shared object. - llvm::Constant *handle = - CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); - - llvm::Value *args[] = { - llvm::ConstantExpr::getBitCast(dtor, dtorTy), - llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), - handle - }; - CGF.Builder.CreateCall(atexit, args); -} - static llvm::Function * CreateGlobalInitOrDestructFunction(CodeGenModule &CGM, llvm::FunctionType *ty, @@ -212,43 +179,22 @@ static llvm::Constant *createAtExitStub(CodeGenModule &CGM, return fn; } -/// Register a global destructor using atexit. -static void emitGlobalDtorWithAtExit(CodeGenFunction &CGF, - llvm::Constant *dtor, - llvm::Constant *addr) { +/// Register a global destructor using the C atexit runtime function. +void CodeGenFunction::registerGlobalDtorWithAtExit(llvm::Constant *dtor, + llvm::Constant *addr) { // Create a function which calls the destructor. - llvm::Constant *dtorStub = createAtExitStub(CGF.CGM, dtor, addr); + llvm::Constant *dtorStub = createAtExitStub(CGM, dtor, addr); // extern "C" int atexit(void (*f)(void)); llvm::FunctionType *atexitTy = - llvm::FunctionType::get(CGF.IntTy, dtorStub->getType(), false); + llvm::FunctionType::get(IntTy, dtorStub->getType(), false); llvm::Constant *atexit = - CGF.CGM.CreateRuntimeFunction(atexitTy, "atexit"); + CGM.CreateRuntimeFunction(atexitTy, "atexit"); if (llvm::Function *atexitFn = dyn_cast(atexit)) atexitFn->setDoesNotThrow(); - CGF.Builder.CreateCall(atexit, dtorStub); -} - -void CodeGenFunction::EmitCXXGlobalDtorRegistration(llvm::Constant *dtor, - llvm::Constant *addr) { - // Use __cxa_atexit if available. - if (CGM.getCodeGenOpts().CXAAtExit) { - emitGlobalDtorWithCXAAtExit(*this, dtor, addr); - return; - } - - // In Apple kexts, we want to add a global destructor entry. - // FIXME: shouldn't this be guarded by some variable? - if (CGM.getContext().getLangOpts().AppleKext) { - // Generate a global destructor entry. - CGM.AddCXXDtorEntry(dtor, addr); - return; - } - - // Otherwise, we just use atexit. - emitGlobalDtorWithAtExit(*this, dtor, addr); + Builder.CreateCall(atexit, dtorStub)->setDoesNotThrow(); } void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, @@ -282,6 +228,9 @@ CreateGlobalInitOrDestructFunction(CodeGenModule &CGM, if (!CGM.getLangOpts().Exceptions) Fn->setDoesNotThrow(); + if (CGM.getLangOpts().AddressSanitizer) + Fn->addFnAttr(llvm::Attribute::AddressSafety); + return Fn; } @@ -372,9 +321,12 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, const VarDecl *D, llvm::GlobalVariable *Addr, bool PerformInit) { - StartFunction(GlobalDecl(), getContext().VoidTy, Fn, + if (CGM.getModuleDebugInfo() && !D->hasAttr()) + DebugInfo = CGM.getModuleDebugInfo(); + + StartFunction(GlobalDecl(D), getContext().VoidTy, Fn, getTypes().arrangeNullaryFunction(), - FunctionArgList(), SourceLocation()); + FunctionArgList(), D->getInit()->getExprLoc()); // Use guarded initialization if the global variable is weak. This // occurs for, e.g., instantiated static data members and diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp index 95e0030..ba9c296 100644 --- a/lib/CodeGen/CGException.cpp +++ b/lib/CodeGen/CGException.cpp @@ -126,7 +126,7 @@ static llvm::Constant *getTerminateFn(CodeGenFunction &CGF) { if (CGF.getLangOpts().CPlusPlus) name = "_ZSt9terminatev"; // FIXME: mangling! else if (CGF.getLangOpts().ObjC1 && - CGF.CGM.getCodeGenOpts().ObjCRuntimeHasTerminate) + CGF.getLangOpts().ObjCRuntime.hasTerminate()) name = "objc_terminate"; else name = "abort"; @@ -180,12 +180,18 @@ static const EHPersonality &getCPersonality(const LangOptions &L) { } static const EHPersonality &getObjCPersonality(const LangOptions &L) { - if (L.NeXTRuntime) { - if (L.ObjCNonFragileABI) return EHPersonality::NeXT_ObjC; - else return getCPersonality(L); - } else { + switch (L.ObjCRuntime.getKind()) { + case ObjCRuntime::FragileMacOSX: + return getCPersonality(L); + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + return EHPersonality::NeXT_ObjC; + case ObjCRuntime::GNUstep: + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: return EHPersonality::GNU_ObjC; } + llvm_unreachable("bad runtime kind"); } static const EHPersonality &getCXXPersonality(const LangOptions &L) { @@ -198,22 +204,28 @@ static const EHPersonality &getCXXPersonality(const LangOptions &L) { /// Determines the personality function to use when both C++ /// and Objective-C exceptions are being caught. static const EHPersonality &getObjCXXPersonality(const LangOptions &L) { + switch (L.ObjCRuntime.getKind()) { // The ObjC personality defers to the C++ personality for non-ObjC // handlers. Unlike the C++ case, we use the same personality // function on targets using (backend-driven) SJLJ EH. - if (L.NeXTRuntime) { - if (L.ObjCNonFragileABI) - return EHPersonality::NeXT_ObjC; + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + return EHPersonality::NeXT_ObjC; - // In the fragile ABI, just use C++ exception handling and hope - // they're not doing crazy exception mixing. - else - return getCXXPersonality(L); - } + // In the fragile ABI, just use C++ exception handling and hope + // they're not doing crazy exception mixing. + case ObjCRuntime::FragileMacOSX: + return getCXXPersonality(L); - // The GNU runtime's personality function inherently doesn't support + // The GCC runtime's personality function inherently doesn't support // mixed EH. Use the C++ personality just to avoid returning null. - return EHPersonality::GNU_ObjCXX; + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: // XXX: this will change soon + return EHPersonality::GNU_ObjC; + case ObjCRuntime::GNUstep: + return EHPersonality::GNU_ObjCXX; + } + llvm_unreachable("bad runtime kind"); } const EHPersonality &EHPersonality::get(const LangOptions &L) { @@ -1127,14 +1139,6 @@ static void BeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *S) { CGF.EmitAutoVarCleanups(var); } -namespace { - struct CallRethrow : EHScopeStack::Cleanup { - void Emit(CodeGenFunction &CGF, Flags flags) { - CGF.EmitCallOrInvoke(getReThrowFn(CGF)); - } - }; -} - /// Emit the structure of the dispatch block for the given catch scope. /// It is an invariant that the dispatch block already exists. static void emitCatchDispatchBlock(CodeGenFunction &CGF, @@ -1246,11 +1250,12 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { if (HaveInsertPoint()) Builder.CreateBr(ContBB); - // Determine if we need an implicit rethrow for all these catch handlers. - bool ImplicitRethrow = false; + // Determine if we need an implicit rethrow for all these catch handlers; + // see the comment below. + bool doImplicitRethrow = false; if (IsFnTryBlock) - ImplicitRethrow = isa(CurCodeDecl) || - isa(CurCodeDecl); + doImplicitRethrow = isa(CurCodeDecl) || + isa(CurCodeDecl); // Perversely, we emit the handlers backwards precisely because we // want them to appear in source order. In all of these cases, the @@ -1273,15 +1278,24 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { // Initialize the catch variable and set up the cleanups. BeginCatch(*this, C); - // If there's an implicit rethrow, push a normal "cleanup" to call - // _cxa_rethrow. This needs to happen before __cxa_end_catch is - // called, and so it is pushed after BeginCatch. - if (ImplicitRethrow) - EHStack.pushCleanup(NormalCleanup); - // Perform the body of the catch. EmitStmt(C->getHandlerBlock()); + // [except.handle]p11: + // The currently handled exception is rethrown if control + // reaches the end of a handler of the function-try-block of a + // constructor or destructor. + + // It is important that we only do this on fallthrough and not on + // return. Note that it's illegal to put a return in a + // constructor function-try-block's catch handler (p14), so this + // really only applies to destructors. + if (doImplicitRethrow && HaveInsertPoint()) { + EmitCallOrInvoke(getReThrowFn(*this)); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); + } + // Fall out through the catch cleanups. CatchScope.ForceCleanup(); diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 5f2b1f0..ecee7b4 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -21,10 +21,11 @@ #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" +#include "clang/Basic/ConvertUTF.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" -#include "llvm/Support/MDBuilder.h" +#include "llvm/MDBuilder.h" #include "llvm/Target/TargetData.h" using namespace clang; using namespace CodeGen; @@ -108,15 +109,18 @@ void CodeGenFunction::EmitIgnoredExpr(const Expr *E) { /// can have any type. The result is returned as an RValue struct. /// If this is an aggregate expression, AggSlot indicates where the /// result should be returned. -RValue CodeGenFunction::EmitAnyExpr(const Expr *E, AggValueSlot AggSlot, - bool IgnoreResult) { +RValue CodeGenFunction::EmitAnyExpr(const Expr *E, + AggValueSlot aggSlot, + bool ignoreResult) { if (!hasAggregateLLVMType(E->getType())) - return RValue::get(EmitScalarExpr(E, IgnoreResult)); + return RValue::get(EmitScalarExpr(E, ignoreResult)); else if (E->getType()->isAnyComplexType()) - return RValue::getComplex(EmitComplexExpr(E, IgnoreResult, IgnoreResult)); + return RValue::getComplex(EmitComplexExpr(E, ignoreResult, ignoreResult)); - EmitAggExpr(E, AggSlot, IgnoreResult); - return AggSlot.asRValue(); + if (!ignoreResult && aggSlot.isIgnored()) + aggSlot = CreateAggTemp(E->getType(), "agg-temp"); + EmitAggExpr(E, aggSlot); + return aggSlot.asRValue(); } /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will @@ -156,7 +160,11 @@ namespace { /// \brief An adjustment to be made to the temporary created when emitting a /// reference binding, which accesses a particular subobject of that temporary. struct SubobjectAdjustment { - enum { DerivedToBaseAdjustment, FieldAdjustment } Kind; + enum { + DerivedToBaseAdjustment, + FieldAdjustment, + MemberPointerAdjustment + } Kind; union { struct { @@ -165,6 +173,11 @@ namespace { } DerivedToBase; FieldDecl *Field; + + struct { + const MemberPointerType *MPT; + llvm::Value *Ptr; + } Ptr; }; SubobjectAdjustment(const CastExpr *BasePath, @@ -178,6 +191,12 @@ namespace { : Kind(FieldAdjustment) { this->Field = Field; } + + SubobjectAdjustment(const MemberPointerType *MPT, llvm::Value *Ptr) + : Kind(MemberPointerAdjustment) { + this->Ptr.MPT = MPT; + this->Ptr.Ptr = Ptr; + } }; } @@ -345,6 +364,15 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E, continue; } } + } else if (const BinaryOperator *BO = dyn_cast(E)) { + if (BO->isPtrMemOp()) { + assert(BO->getLHS()->isRValue()); + E = BO->getLHS(); + const MemberPointerType *MPT = + BO->getRHS()->getType()->getAs(); + llvm::Value *Ptr = CGF.EmitScalarExpr(BO->getRHS()); + Adjustments.push_back(SubobjectAdjustment(MPT, Ptr)); + } } if (const OpaqueValueExpr *opaque = dyn_cast(E)) @@ -417,6 +445,11 @@ EmitExprForReferenceBinding(CodeGenFunction &CGF, const Expr *E, break; } + case SubobjectAdjustment::MemberPointerAdjustment: { + Object = CGF.CGM.getCXXABI().EmitMemberDataPointerAddress( + CGF, Object, Adjustment.Ptr.Ptr, Adjustment.Ptr.MPT); + break; + } } } @@ -462,7 +495,7 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E, if (ReferenceTemporaryDtor) { llvm::Constant *DtorFn = CGM.GetAddrOfCXXDestructor(ReferenceTemporaryDtor, Dtor_Complete); - EmitCXXGlobalDtorRegistration(DtorFn, + CGM.getCXXABI().registerGlobalDtor(*this, DtorFn, cast(ReferenceTemporary)); } else { assert(!ObjCARCReferenceLifetimeType.isNull()); @@ -525,15 +558,9 @@ void CodeGenFunction::EmitCheck(llvm::Value *Address, unsigned Size) { llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, IntPtrTy); - // In time, people may want to control this and use a 1 here. - llvm::Value *Arg = Builder.getFalse(); - llvm::Value *C = Builder.CreateCall2(F, Address, Arg); + llvm::Value *Min = Builder.getFalse(); + llvm::Value *C = Builder.CreateCall2(F, Address, Min); llvm::BasicBlock *Cont = createBasicBlock(); - llvm::BasicBlock *Check = createBasicBlock(); - llvm::Value *NegativeOne = llvm::ConstantInt::get(IntPtrTy, -1ULL); - Builder.CreateCondBr(Builder.CreateICmpEQ(C, NegativeOne), Cont, Check); - - EmitBlock(Check); Builder.CreateCondBr(Builder.CreateICmpUGE(C, llvm::ConstantInt::get(IntPtrTy, Size)), Cont, getTrapBB()); @@ -676,10 +703,7 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { case Expr::PseudoObjectExprClass: return EmitPseudoObjectLValue(cast(E)); case Expr::InitListExprClass: - assert(cast(E)->getNumInits() == 1 && - "Only single-element init list can be lvalue."); - return EmitLValue(cast(E)->getInit(0)); - + return EmitInitListLValue(cast(E)); case Expr::CXXTemporaryObjectExprClass: case Expr::CXXConstructExprClass: return EmitCXXConstructLValue(cast(E)); @@ -880,7 +904,6 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { CGM.getCodeGenOpts().StrictEnums && !ET->getDecl()->isFixed()); bool IsBool = hasBooleanRepresentation(Ty); - llvm::Type *LTy; if (!IsBool && !IsRegularCPlusPlusEnum) return NULL; @@ -889,10 +912,9 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { if (IsBool) { Min = llvm::APInt(8, 0); End = llvm::APInt(8, 2); - LTy = Int8Ty; } else { const EnumDecl *ED = ET->getDecl(); - LTy = ConvertTypeForMem(ED->getIntegerType()); + llvm::Type *LTy = ConvertTypeForMem(ED->getIntegerType()); unsigned Bitwidth = LTy->getScalarSizeInBits(); unsigned NumNegativeBits = ED->getNumNegativeBits(); unsigned NumPositiveBits = ED->getNumPositiveBits(); @@ -1028,6 +1050,9 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { llvm::Value *Res = 0; for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) { const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i); + CharUnits AccessAlignment = AI.AccessAlignment; + if (!LV.getAlignment().isZero()) + AccessAlignment = std::min(AccessAlignment, LV.getAlignment()); // Get the field pointer. llvm::Value *Ptr = LV.getBitFieldBaseAddr(); @@ -1051,8 +1076,7 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) { // Perform the load. llvm::LoadInst *Load = Builder.CreateLoad(Ptr, LV.isVolatileQualified()); - if (!AI.AccessAlignment.isZero()) - Load->setAlignment(AI.AccessAlignment.getQuantity()); + Load->setAlignment(AccessAlignment.getQuantity()); // Shift out unused low bits and mask out unused high bits. llvm::Value *Val = Load; @@ -1251,6 +1275,9 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Iterate over the components, writing each piece to memory. for (unsigned i = 0, e = Info.getNumComponents(); i != e; ++i) { const CGBitFieldInfo::AccessInfo &AI = Info.getComponent(i); + CharUnits AccessAlignment = AI.AccessAlignment; + if (!Dst.getAlignment().isZero()) + AccessAlignment = std::min(AccessAlignment, Dst.getAlignment()); // Get the field pointer. llvm::Value *Ptr = Dst.getBitFieldBaseAddr(); @@ -1297,8 +1324,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // If necessary, load and OR in bits that are outside of the bit-field. if (AI.TargetBitWidth != AI.AccessWidth) { llvm::LoadInst *Load = Builder.CreateLoad(Ptr, Dst.isVolatileQualified()); - if (!AI.AccessAlignment.isZero()) - Load->setAlignment(AI.AccessAlignment.getQuantity()); + Load->setAlignment(AccessAlignment.getQuantity()); // Compute the mask for zeroing the bits that are part of the bit-field. llvm::APInt InvMask = @@ -1312,8 +1338,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Write the value. llvm::StoreInst *Store = Builder.CreateStore(Val, Ptr, Dst.isVolatileQualified()); - if (!AI.AccessAlignment.isZero()) - Store->setAlignment(AI.AccessAlignment.getQuantity()); + Store->setAlignment(AccessAlignment.getQuantity()); } } @@ -1683,6 +1708,39 @@ LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) { E->getType()); } +static llvm::Constant* +GetAddrOfConstantWideString(StringRef Str, + const char *GlobalName, + ASTContext &Context, + QualType Ty, SourceLocation Loc, + CodeGenModule &CGM) { + + StringLiteral *SL = StringLiteral::Create(Context, + Str, + StringLiteral::Wide, + /*Pascal = */false, + Ty, Loc); + llvm::Constant *C = CGM.GetConstantArrayFromStringLiteral(SL); + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(CGM.getModule(), C->getType(), + !CGM.getLangOpts().WritableStrings, + llvm::GlobalValue::PrivateLinkage, + C, GlobalName); + const unsigned WideAlignment = + Context.getTypeAlignInChars(Ty).getQuantity(); + GV->setAlignment(WideAlignment); + return GV; +} + +static void ConvertUTF8ToWideString(unsigned CharByteWidth, StringRef Source, + SmallString<32>& Target) { + Target.resize(CharByteWidth * (Source.size() + 1)); + char* ResultPtr = &Target[0]; + bool success = ConvertUTF8toWide(CharByteWidth, Source, ResultPtr); + (void)success; + assert(success); + Target.resize(ResultPtr - &Target[0]); +} LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { switch (E->getIdentType()) { @@ -1691,11 +1749,12 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { case PredefinedExpr::Func: case PredefinedExpr::Function: + case PredefinedExpr::LFunction: case PredefinedExpr::PrettyFunction: { - unsigned Type = E->getIdentType(); + unsigned IdentType = E->getIdentType(); std::string GlobalVarName; - switch (Type) { + switch (IdentType) { default: llvm_unreachable("Invalid type"); case PredefinedExpr::Func: GlobalVarName = "__func__."; @@ -1703,6 +1762,9 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { case PredefinedExpr::Function: GlobalVarName = "__FUNCTION__."; break; + case PredefinedExpr::LFunction: + GlobalVarName = "L__FUNCTION__."; + break; case PredefinedExpr::PrettyFunction: GlobalVarName = "__PRETTY_FUNCTION__."; break; @@ -1720,10 +1782,27 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) { std::string FunctionName = (isa(CurDecl) ? FnName.str() - : PredefinedExpr::ComputeName((PredefinedExpr::IdentType)Type, CurDecl)); - - llvm::Constant *C = - CGM.GetAddrOfConstantCString(FunctionName, GlobalVarName.c_str()); + : PredefinedExpr::ComputeName((PredefinedExpr::IdentType)IdentType, + CurDecl)); + + const Type* ElemType = E->getType()->getArrayElementTypeNoTypeQual(); + llvm::Constant *C; + if (ElemType->isWideCharType()) { + SmallString<32> RawChars; + ConvertUTF8ToWideString( + getContext().getTypeSizeInChars(ElemType).getQuantity(), + FunctionName, RawChars); + C = GetAddrOfConstantWideString(RawChars, + GlobalVarName.c_str(), + getContext(), + E->getType(), + E->getLocation(), + CGM); + } else { + C = CGM.GetAddrOfConstantCString(FunctionName, + GlobalVarName.c_str(), + 1); + } return MakeAddrLValue(C, E->getType()); } } @@ -1794,25 +1873,6 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) { // Extend or truncate the index type to 32 or 64-bits. if (Idx->getType() != IntPtrTy) Idx = Builder.CreateIntCast(Idx, IntPtrTy, IdxSigned, "idxprom"); - - // FIXME: As llvm implements the object size checking, this can come out. - if (CatchUndefined) { - if (const ImplicitCastExpr *ICE = dyn_cast(E->getBase())){ - if (const DeclRefExpr *DRE = dyn_cast(ICE->getSubExpr())) { - if (ICE->getCastKind() == CK_ArrayToPointerDecay) { - if (const ConstantArrayType *CAT - = getContext().getAsConstantArrayType(DRE->getType())) { - llvm::APInt Size = CAT->getSize(); - llvm::BasicBlock *Cont = createBasicBlock("cont"); - Builder.CreateCondBr(Builder.CreateICmpULE(Idx, - llvm::ConstantInt::get(Idx->getType(), Size)), - Cont, getTrapBB()); - EmitBlock(Cont); - } - } - } - } - } // We know that the pointer points to a type of the correct size, unless the // size is a VLA or Objective-C interface. @@ -1996,43 +2056,17 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { llvm_unreachable("Unhandled member declaration!"); } -LValue CodeGenFunction::EmitLValueForBitfield(llvm::Value *BaseValue, - const FieldDecl *Field, - unsigned CVRQualifiers) { - const CGRecordLayout &RL = - CGM.getTypes().getCGRecordLayout(Field->getParent()); - const CGBitFieldInfo &Info = RL.getBitFieldInfo(Field); - return LValue::MakeBitfield(BaseValue, Info, - Field->getType().withCVRQualifiers(CVRQualifiers)); -} - -/// EmitLValueForAnonRecordField - Given that the field is a member of -/// an anonymous struct or union buried inside a record, and given -/// that the base value is a pointer to the enclosing record, derive -/// an lvalue for the ultimate field. -LValue CodeGenFunction::EmitLValueForAnonRecordField(llvm::Value *BaseValue, - const IndirectFieldDecl *Field, - unsigned CVRQualifiers) { - IndirectFieldDecl::chain_iterator I = Field->chain_begin(), - IEnd = Field->chain_end(); - while (true) { - QualType RecordTy = - getContext().getTypeDeclType(cast(*I)->getParent()); - LValue LV = EmitLValueForField(MakeAddrLValue(BaseValue, RecordTy), - cast(*I)); - if (++I == IEnd) return LV; - - assert(LV.isSimple()); - BaseValue = LV.getAddress(); - CVRQualifiers |= LV.getVRQualifiers(); - } -} - LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field) { - if (field->isBitField()) - return EmitLValueForBitfield(base.getAddress(), field, - base.getVRQualifiers()); + if (field->isBitField()) { + const CGRecordLayout &RL = + CGM.getTypes().getCGRecordLayout(field->getParent()); + const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); + QualType fieldType = + field->getType().withCVRQualifiers(base.getVRQualifiers()); + return LValue::MakeBitfield(base.getAddress(), Info, fieldType, + base.getAlignment()); + } const RecordDecl *rec = field->getParent(); QualType type = field->getType(); @@ -2144,7 +2178,10 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ llvm::Value *GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E); return MakeAddrLValue(GlobalPtr, E->getType()); } - + if (E->getType()->isVariablyModifiedType()) + // make sure to emit the VLA size. + EmitVariablyModifiedType(E->getType()); + llvm::Value *DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral"); const Expr *InitExpr = E->getInitializer(); LValue Result = MakeAddrLValue(DeclPtr, E->getType()); @@ -2155,6 +2192,16 @@ LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){ return Result; } +LValue CodeGenFunction::EmitInitListLValue(const InitListExpr *E) { + if (!E->isGLValue()) + // Initializing an aggregate temporary in C++11: T{...}. + return EmitAggExprToLValue(E); + + // An lvalue initializer list must be initializing a reference. + assert(E->getNumInits() == 1 && "reference init with multiple values"); + return EmitLValue(E->getInit(0)); +} + LValue CodeGenFunction:: EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { if (!expr->isGLValue()) { @@ -2214,11 +2261,11 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) { return MakeAddrLValue(phi, expr->getType()); } -/// EmitCastLValue - Casts are never lvalues unless that cast is a dynamic_cast. -/// If the cast is a dynamic_cast, we can have the usual lvalue result, +/// EmitCastLValue - Casts are never lvalues unless that cast is to a reference +/// type. If the cast is to a reference, we can have the usual lvalue result, /// otherwise if a cast is needed by the code generator in an lvalue context, /// then it must mean that we need the address of an aggregate in order to -/// access one of its fields. This can happen for all the reasons that casts +/// access one of its members. This can happen for all the reasons that casts /// are permitted with aggregate result, including noop aggregate casts, and /// cast from scalar to union. LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { @@ -2648,7 +2695,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee, EmitCallArgs(Args, dyn_cast(FnType), ArgBeg, ArgEnd); const CGFunctionInfo &FnInfo = - CGM.getTypes().arrangeFunctionCall(Args, FnType); + CGM.getTypes().arrangeFreeFunctionCall(Args, FnType); // C99 6.5.2.2p6: // If the expression that denotes the called function has a type @@ -3038,7 +3085,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) { getContext().IntTy); const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeFunctionCall(RetTy, Args, + CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args, FunctionType::ExtInfo(), RequiredArgs::All); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp index 7b0e0f5..61f7362 100644 --- a/lib/CodeGen/CGExprAgg.cpp +++ b/lib/CodeGen/CGExprAgg.cpp @@ -34,7 +34,6 @@ class AggExprEmitter : public StmtVisitor { CodeGenFunction &CGF; CGBuilderTy &Builder; AggValueSlot Dest; - bool IgnoreResult; /// We want to use 'dest' as the return slot except under two /// conditions: @@ -56,12 +55,14 @@ class AggExprEmitter : public StmtVisitor { if (!Dest.isIgnored()) return Dest; return CGF.CreateAggTemp(T, "agg.tmp.ensured"); } + void EnsureDest(QualType T) { + if (!Dest.isIgnored()) return; + Dest = CGF.CreateAggTemp(T, "agg.tmp.ensured"); + } public: - AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest, - bool ignore) - : CGF(cgf), Builder(CGF.Builder), Dest(Dest), - IgnoreResult(ignore) { + AggExprEmitter(CodeGenFunction &cgf, AggValueSlot Dest) + : CGF(cgf), Builder(CGF.Builder), Dest(Dest) { } //===--------------------------------------------------------------------===// @@ -74,9 +75,11 @@ public: void EmitAggLoadOfLValue(const Expr *E); /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. - void EmitFinalDestCopy(const Expr *E, LValue Src, bool Ignore = false); - void EmitFinalDestCopy(const Expr *E, RValue Src, bool Ignore = false, - unsigned Alignment = 0); + void EmitFinalDestCopy(QualType type, const LValue &src); + void EmitFinalDestCopy(QualType type, RValue src, + CharUnits srcAlignment = CharUnits::Zero()); + void EmitCopy(QualType type, const AggValueSlot &dest, + const AggValueSlot &src); void EmitMoveFromReturnSlot(const Expr *E, RValue Src); @@ -119,7 +122,7 @@ public: if (E->getDecl()->getType()->isReferenceType()) { if (CodeGenFunction::ConstantEmission result = CGF.tryEmitAsConstant(E)) { - EmitFinalDestCopy(E, result.getReferenceLValue(CGF, E)); + EmitFinalDestCopy(E->getType(), result.getReferenceLValue(CGF, E)); return; } } @@ -171,7 +174,7 @@ public: void VisitPseudoObjectExpr(PseudoObjectExpr *E) { if (E->isGLValue()) { LValue LV = CGF.EmitPseudoObjectLValue(E); - return EmitFinalDestCopy(E, LV); + return EmitFinalDestCopy(E->getType(), LV); } CGF.EmitPseudoObjectRValue(E, EnsureSlot(E->getType())); @@ -198,7 +201,7 @@ public: /// then loads the result into DestPtr. void AggExprEmitter::EmitAggLoadOfLValue(const Expr *E) { LValue LV = CGF.EmitLValue(E); - EmitFinalDestCopy(E, LV); + EmitFinalDestCopy(E->getType(), LV); } /// \brief True if the given aggregate type requires special GC API calls. @@ -228,7 +231,7 @@ bool AggExprEmitter::TypeRequiresGCollection(QualType T) { /// If nothing interferes, this will cause the result to be emitted /// directly into the return value slot. Otherwise, a final move /// will be performed. -void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue Src) { +void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) { if (shouldUseDestForReturnSlot()) { // Logically, Dest.getAddr() should equal Src.getAggregateAddr(). // The possibility of undef rvalues complicates that a lot, @@ -236,61 +239,58 @@ void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue Src) { return; } - // Otherwise, do a final copy, - assert(Dest.getAddr() != Src.getAggregateAddr()); - std::pair TypeInfo = + // Otherwise, copy from there to the destination. + assert(Dest.getAddr() != src.getAggregateAddr()); + std::pair typeInfo = CGF.getContext().getTypeInfoInChars(E->getType()); - CharUnits Alignment = std::min(TypeInfo.second, Dest.getAlignment()); - EmitFinalDestCopy(E, Src, /*Ignore*/ true, Alignment.getQuantity()); + EmitFinalDestCopy(E->getType(), src, typeInfo.second); } /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. -void AggExprEmitter::EmitFinalDestCopy(const Expr *E, RValue Src, bool Ignore, - unsigned Alignment) { - assert(Src.isAggregate() && "value must be aggregate value!"); +void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src, + CharUnits srcAlign) { + assert(src.isAggregate() && "value must be aggregate value!"); + LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddr(), type, srcAlign); + EmitFinalDestCopy(type, srcLV); +} +/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. +void AggExprEmitter::EmitFinalDestCopy(QualType type, const LValue &src) { // If Dest is ignored, then we're evaluating an aggregate expression - // in a context (like an expression statement) that doesn't care - // about the result. C says that an lvalue-to-rvalue conversion is - // performed in these cases; C++ says that it is not. In either - // case, we don't actually need to do anything unless the value is - // volatile. - if (Dest.isIgnored()) { - if (!Src.isVolatileQualified() || - CGF.CGM.getLangOpts().CPlusPlus || - (IgnoreResult && Ignore)) - return; + // in a context that doesn't care about the result. Note that loads + // from volatile l-values force the existence of a non-ignored + // destination. + if (Dest.isIgnored()) + return; - // If the source is volatile, we must read from it; to do that, we need - // some place to put it. - Dest = CGF.CreateAggTemp(E->getType(), "agg.tmp"); - } + AggValueSlot srcAgg = + AggValueSlot::forLValue(src, AggValueSlot::IsDestructed, + needsGC(type), AggValueSlot::IsAliased); + EmitCopy(type, Dest, srcAgg); +} - if (Dest.requiresGCollection()) { - CharUnits size = CGF.getContext().getTypeSizeInChars(E->getType()); - llvm::Type *SizeTy = CGF.ConvertType(CGF.getContext().getSizeType()); - llvm::Value *SizeVal = llvm::ConstantInt::get(SizeTy, size.getQuantity()); +/// Perform a copy from the source into the destination. +/// +/// \param type - the type of the aggregate being copied; qualifiers are +/// ignored +void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest, + const AggValueSlot &src) { + if (dest.requiresGCollection()) { + CharUnits sz = CGF.getContext().getTypeSizeInChars(type); + llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity()); CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF, - Dest.getAddr(), - Src.getAggregateAddr(), - SizeVal); + dest.getAddr(), + src.getAddr(), + size); return; } - // If the result of the assignment is used, copy the LHS there also. - // FIXME: Pass VolatileDest as well. I think we also need to merge volatile - // from the source as well, as we can't eliminate it if either operand - // is volatile, unless copy has volatile for both source and destination.. - CGF.EmitAggregateCopy(Dest.getAddr(), Src.getAggregateAddr(), E->getType(), - Dest.isVolatile()|Src.isVolatileQualified(), - Alignment); -} -/// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired. -void AggExprEmitter::EmitFinalDestCopy(const Expr *E, LValue Src, bool Ignore) { - assert(Src.isSimple() && "Can't have aggregate bitfield, vector, etc"); - - CharUnits Alignment = std::min(Src.getAlignment(), Dest.getAlignment()); - EmitFinalDestCopy(E, Src.asAggregateRValue(), Ignore, Alignment.getQuantity()); + // If the result of the assignment is used, copy the LHS there also. + // It's volatile if either side is. Use the minimum alignment of + // the two sides. + CGF.EmitAggregateCopy(dest.getAddr(), src.getAddr(), type, + dest.isVolatile() || src.isVolatile(), + std::min(dest.getAlignment(), src.getAlignment())); } static QualType GetStdInitializerListElementType(QualType T) { @@ -526,7 +526,7 @@ void AggExprEmitter::VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E){ } void AggExprEmitter::VisitOpaqueValueExpr(OpaqueValueExpr *e) { - EmitFinalDestCopy(e, CGF.getOpaqueLValueMapping(e)); + EmitFinalDestCopy(e->getType(), CGF.getOpaqueLValueMapping(e)); } void @@ -582,7 +582,15 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { "should have been unpacked before we got here"); } - case CK_LValueToRValue: // hope for downstream optimization + case CK_LValueToRValue: + // If we're loading from a volatile type, force the destination + // into existence. + if (E->getSubExpr()->getType().isVolatileQualified()) { + EnsureDest(E->getType()); + return Visit(E->getSubExpr()); + } + // fallthrough + case CK_NoOp: case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: @@ -676,7 +684,73 @@ void AggExprEmitter::VisitBinaryOperator(const BinaryOperator *E) { void AggExprEmitter::VisitPointerToDataMemberBinaryOperator( const BinaryOperator *E) { LValue LV = CGF.EmitPointerToDataMemberBinaryExpr(E); - EmitFinalDestCopy(E, LV); + EmitFinalDestCopy(E->getType(), LV); +} + +/// Is the value of the given expression possibly a reference to or +/// into a __block variable? +static bool isBlockVarRef(const Expr *E) { + // Make sure we look through parens. + E = E->IgnoreParens(); + + // Check for a direct reference to a __block variable. + if (const DeclRefExpr *DRE = dyn_cast(E)) { + const VarDecl *var = dyn_cast(DRE->getDecl()); + return (var && var->hasAttr()); + } + + // More complicated stuff. + + // Binary operators. + if (const BinaryOperator *op = dyn_cast(E)) { + // For an assignment or pointer-to-member operation, just care + // about the LHS. + if (op->isAssignmentOp() || op->isPtrMemOp()) + return isBlockVarRef(op->getLHS()); + + // For a comma, just care about the RHS. + if (op->getOpcode() == BO_Comma) + return isBlockVarRef(op->getRHS()); + + // FIXME: pointer arithmetic? + return false; + + // Check both sides of a conditional operator. + } else if (const AbstractConditionalOperator *op + = dyn_cast(E)) { + return isBlockVarRef(op->getTrueExpr()) + || isBlockVarRef(op->getFalseExpr()); + + // OVEs are required to support BinaryConditionalOperators. + } else if (const OpaqueValueExpr *op + = dyn_cast(E)) { + if (const Expr *src = op->getSourceExpr()) + return isBlockVarRef(src); + + // Casts are necessary to get things like (*(int*)&var) = foo(). + // We don't really care about the kind of cast here, except + // we don't want to look through l2r casts, because it's okay + // to get the *value* in a __block variable. + } else if (const CastExpr *cast = dyn_cast(E)) { + if (cast->getCastKind() == CK_LValueToRValue) + return false; + return isBlockVarRef(cast->getSubExpr()); + + // Handle unary operators. Again, just aggressively look through + // it, ignoring the operation. + } else if (const UnaryOperator *uop = dyn_cast(E)) { + return isBlockVarRef(uop->getSubExpr()); + + // Look into the base of a field access. + } else if (const MemberExpr *mem = dyn_cast(E)) { + return isBlockVarRef(mem->getBase()); + + // Look into the base of a subscript. + } else if (const ArraySubscriptExpr *sub = dyn_cast(E)) { + return isBlockVarRef(sub->getBase()); + } + + return false; } void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { @@ -686,20 +760,26 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { E->getRHS()->getType()) && "Invalid assignment"); - if (const DeclRefExpr *DRE = dyn_cast(E->getLHS())) - if (const VarDecl *VD = dyn_cast(DRE->getDecl())) - if (VD->hasAttr() && - E->getRHS()->HasSideEffects(CGF.getContext())) { - // When __block variable on LHS, the RHS must be evaluated first - // as it may change the 'forwarding' field via call to Block_copy. - LValue RHS = CGF.EmitLValue(E->getRHS()); - LValue LHS = CGF.EmitLValue(E->getLHS()); - Dest = AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, - needsGC(E->getLHS()->getType()), - AggValueSlot::IsAliased); - EmitFinalDestCopy(E, RHS, true); - return; - } + // If the LHS might be a __block variable, and the RHS can + // potentially cause a block copy, we need to evaluate the RHS first + // so that the assignment goes the right place. + // This is pretty semantically fragile. + if (isBlockVarRef(E->getLHS()) && + E->getRHS()->HasSideEffects(CGF.getContext())) { + // Ensure that we have a destination, and evaluate the RHS into that. + EnsureDest(E->getRHS()->getType()); + Visit(E->getRHS()); + + // Now emit the LHS and copy into it. + LValue LHS = CGF.EmitLValue(E->getLHS()); + + EmitCopy(E->getLHS()->getType(), + AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, + needsGC(E->getLHS()->getType()), + AggValueSlot::IsAliased), + Dest); + return; + } LValue LHS = CGF.EmitLValue(E->getLHS()); @@ -708,8 +788,10 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { AggValueSlot::forLValue(LHS, AggValueSlot::IsDestructed, needsGC(E->getLHS()->getType()), AggValueSlot::IsAliased); - CGF.EmitAggExpr(E->getRHS(), LHSSlot, false); - EmitFinalDestCopy(E, LHS, true); + CGF.EmitAggExpr(E->getRHS(), LHSSlot); + + // Copy into the destination if the assignment isn't ignored. + EmitFinalDestCopy(E->getType(), LHS); } void AggExprEmitter:: @@ -762,14 +844,14 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) { return; } - EmitFinalDestCopy(VE, CGF.MakeAddrLValue(ArgPtr, VE->getType())); + EmitFinalDestCopy(VE->getType(), CGF.MakeAddrLValue(ArgPtr, VE->getType())); } void AggExprEmitter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { // Ensure that we have a slot, but if we already do, remember // whether it was externally destructed. bool wasExternallyDestructed = Dest.isExternallyDestructed(); - Dest = EnsureSlot(E->getType()); + EnsureDest(E->getType()); // We're going to push a destructor if there isn't already one. Dest.setExternallyDestructed(); @@ -904,7 +986,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { llvm::GlobalVariable* GV = new llvm::GlobalVariable(CGF.CGM.getModule(), C->getType(), true, llvm::GlobalValue::InternalLinkage, C, ""); - EmitFinalDestCopy(E, CGF.MakeAddrLValue(GV, E->getType())); + EmitFinalDestCopy(E->getType(), CGF.MakeAddrLValue(GV, E->getType())); return; } #endif @@ -1164,11 +1246,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E, /// type. The result is computed into DestPtr. Note that if DestPtr is null, /// the value of the aggregate expression is not needed. If VolatileDest is /// true, DestPtr cannot be 0. -/// -/// \param IsInitializer - true if this evaluation is initializing an -/// object whose lifetime is already being managed. -void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot, - bool IgnoreResult) { +void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) { assert(E && hasAggregateLLVMType(E->getType()) && "Invalid aggregate expression to emit"); assert((Slot.getAddr() != 0 || Slot.isIgnored()) && @@ -1177,7 +1255,7 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot, // Optimize the slot if possible. CheckAggExprForMemSetUse(Slot, E, *this); - AggExprEmitter(*this, Slot, IgnoreResult).Visit(const_cast(E)); + AggExprEmitter(*this, Slot).Visit(const_cast(E)); } LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { @@ -1192,7 +1270,8 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) { void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, QualType Ty, - bool isVolatile, unsigned Alignment) { + bool isVolatile, + CharUnits alignment) { assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex"); if (getContext().getLangOpts().CPlusPlus) { @@ -1225,8 +1304,8 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, std::pair TypeInfo = getContext().getTypeInfoInChars(Ty); - if (!Alignment) - Alignment = TypeInfo.second.getQuantity(); + if (alignment.isZero()) + alignment = TypeInfo.second; // FIXME: Handle variable sized types. @@ -1284,7 +1363,7 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr, Builder.CreateMemCpy(DestPtr, SrcPtr, llvm::ConstantInt::get(IntPtrTy, TypeInfo.first.getQuantity()), - Alignment, isVolatile); + alignment.getQuantity(), isVolatile); } void CodeGenFunction::MaybeEmitStdInitializerListCleanup(llvm::Value *loc, diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index c69c883..7c2c9f1 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -50,36 +50,10 @@ RValue CodeGenFunction::EmitCXXMemberCall(const CXXMethodDecl *MD, // And the rest of the call args. EmitCallArgs(Args, FPT, ArgBeg, ArgEnd); - return EmitCall(CGM.getTypes().arrangeFunctionCall(FPT->getResultType(), Args, - FPT->getExtInfo(), - required), + return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args, MD); } -static const CXXRecordDecl *getMostDerivedClassDecl(const Expr *Base) { - const Expr *E = Base; - - while (true) { - E = E->IgnoreParens(); - if (const CastExpr *CE = dyn_cast(E)) { - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) { - E = CE->getSubExpr(); - continue; - } - } - - break; - } - - QualType DerivedType = E->getType(); - if (const PointerType *PTy = DerivedType->getAs()) - DerivedType = PTy->getPointeeType(); - - return cast(DerivedType->castAs()->getDecl()); -} - // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do // quite what we want. static const Expr *skipNoOpCastsAndParens(const Expr *E) { @@ -126,7 +100,7 @@ static bool canDevirtualizeMemberFunctionCalls(ASTContext &Context, // b->f(); // } // - const CXXRecordDecl *MostDerivedClassDecl = getMostDerivedClassDecl(Base); + const CXXRecordDecl *MostDerivedClassDecl = Base->getBestDynamicClassType(); if (MostDerivedClassDecl->hasAttr()) return true; @@ -166,6 +140,14 @@ static bool canDevirtualizeMemberFunctionCalls(ASTContext &Context, return false; } +static CXXRecordDecl *getCXXRecord(const Expr *E) { + QualType T = E->getType(); + if (const PointerType *PTy = T->getAs()) + T = PTy->getPointeeType(); + const RecordType *Ty = T->castAs(); + return cast(Ty->getDecl()); +} + // Note: This function also emit constructor calls to support a MSVC // extensions allowing explicit constructor function call. RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, @@ -179,7 +161,7 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, const CXXMethodDecl *MD = cast(ME->getMemberDecl()); CGDebugInfo *DI = getDebugInfo(); - if (DI && CGM.getCodeGenOpts().LimitDebugInfo + if (DI && CGM.getCodeGenOpts().DebugInfo == CodeGenOptions::LimitedDebugInfo && !isa(ME->getBase())) { QualType PQTy = ME->getBase()->IgnoreParenImpCasts()->getType(); if (const PointerType * PTy = dyn_cast(PQTy)) { @@ -196,11 +178,45 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, } // Compute the object pointer. + const Expr *Base = ME->getBase(); + bool CanUseVirtualCall = MD->isVirtual() && !ME->hasQualifier(); + + const CXXMethodDecl *DevirtualizedMethod = NULL; + if (CanUseVirtualCall && + canDevirtualizeMemberFunctionCalls(getContext(), Base, MD)) { + const CXXRecordDecl *BestDynamicDecl = Base->getBestDynamicClassType(); + DevirtualizedMethod = MD->getCorrespondingMethodInClass(BestDynamicDecl); + assert(DevirtualizedMethod); + const CXXRecordDecl *DevirtualizedClass = DevirtualizedMethod->getParent(); + const Expr *Inner = Base->ignoreParenBaseCasts(); + if (getCXXRecord(Inner) == DevirtualizedClass) + // If the class of the Inner expression is where the dynamic method + // is defined, build the this pointer from it. + Base = Inner; + else if (getCXXRecord(Base) != DevirtualizedClass) { + // If the method is defined in a class that is not the best dynamic + // one or the one of the full expression, we would have to build + // a derived-to-base cast to compute the correct this pointer, but + // we don't have support for that yet, so do a virtual call. + DevirtualizedMethod = NULL; + } + // If the return types are not the same, this might be a case where more + // code needs to run to compensate for it. For example, the derived + // method might return a type that inherits form from the return + // type of MD and has a prefix. + // For now we just avoid devirtualizing these covariant cases. + if (DevirtualizedMethod && + DevirtualizedMethod->getResultType().getCanonicalType() != + MD->getResultType().getCanonicalType()) + DevirtualizedMethod = NULL; + } + llvm::Value *This; if (ME->isArrow()) - This = EmitScalarExpr(ME->getBase()); + This = EmitScalarExpr(Base); else - This = EmitLValue(ME->getBase()).getAddress(); + This = EmitLValue(Base).getAddress(); + if (MD->isTrivial()) { if (isa(MD)) return RValue::get(0); @@ -247,10 +263,8 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, // // We also don't emit a virtual call if the base expression has a record type // because then we know what the type is. - bool UseVirtualCall; - UseVirtualCall = MD->isVirtual() && !ME->hasQualifier() - && !canDevirtualizeMemberFunctionCalls(getContext(), - ME->getBase(), MD); + bool UseVirtualCall = CanUseVirtualCall && !DevirtualizedMethod; + llvm::Value *Callee; if (const CXXDestructorDecl *Dtor = dyn_cast(MD)) { if (UseVirtualCall) { @@ -260,8 +274,13 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, MD->isVirtual() && ME->hasQualifier()) Callee = BuildAppleKextVirtualCall(MD, ME->getQualifier(), Ty); - else + else if (!DevirtualizedMethod) Callee = CGM.GetAddrOfFunction(GlobalDecl(Dtor, Dtor_Complete), Ty); + else { + const CXXDestructorDecl *DDtor = + cast(DevirtualizedMethod); + Callee = CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty); + } } } else if (const CXXConstructorDecl *Ctor = dyn_cast(MD)) { @@ -273,8 +292,11 @@ RValue CodeGenFunction::EmitCXXMemberCallExpr(const CXXMemberCallExpr *CE, MD->isVirtual() && ME->hasQualifier()) Callee = BuildAppleKextVirtualCall(MD, ME->getQualifier(), Ty); - else + else if (!DevirtualizedMethod) Callee = CGM.GetAddrOfFunction(MD, Ty); + else { + Callee = CGM.GetAddrOfFunction(DevirtualizedMethod, Ty); + } } return EmitCXXMemberCall(MD, Callee, ReturnValue, This, /*VTT=*/0, @@ -319,10 +341,12 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // Push the this ptr. Args.add(RValue::get(This), ThisType); + + RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1); // And the rest of the call args EmitCallArgs(Args, FPT, E->arg_begin(), E->arg_end()); - return EmitCall(CGM.getTypes().arrangeFunctionCall(Args, FPT), Callee, + return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required), Callee, ReturnValue, Args); } @@ -409,7 +433,6 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E, if (E->requiresZeroInitialization() && !Dest.isZeroed()) { switch (E->getConstructionKind()) { case CXXConstructExpr::CK_Delegating: - assert(0 && "Delegating constructor should not need zeroing"); case CXXConstructExpr::CK_Complete: EmitNullInitialization(Dest.getAddr(), E->getType()); break; @@ -1006,7 +1029,7 @@ namespace { DeleteArgs.add(getPlacementArgs()[I], *AI++); // Call 'operator delete'. - CGF.EmitCall(CGF.CGM.getTypes().arrangeFunctionCall(DeleteArgs, FPT), + CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(DeleteArgs, FPT), CGF.CGM.GetAddrOfFunction(OperatorDelete), ReturnValueSlot(), DeleteArgs, OperatorDelete); } @@ -1067,7 +1090,7 @@ namespace { } // Call 'operator delete'. - CGF.EmitCall(CGF.CGM.getTypes().arrangeFunctionCall(DeleteArgs, FPT), + CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall(DeleteArgs, FPT), CGF.CGM.GetAddrOfFunction(OperatorDelete), ReturnValueSlot(), DeleteArgs, OperatorDelete); } @@ -1182,8 +1205,8 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { // TODO: kill any unnecessary computations done for the size // argument. } else { - RV = EmitCall(CGM.getTypes().arrangeFunctionCall(allocatorArgs, - allocatorType), + RV = EmitCall(CGM.getTypes().arrangeFreeFunctionCall(allocatorArgs, + allocatorType), CGM.GetAddrOfFunction(allocator), ReturnValueSlot(), allocatorArgs, allocator); } @@ -1306,7 +1329,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD, DeleteArgs.add(RValue::get(Size), SizeTy); // Emit the call to delete. - EmitCall(CGM.getTypes().arrangeFunctionCall(DeleteArgs, DeleteFTy), + EmitCall(CGM.getTypes().arrangeFreeFunctionCall(DeleteArgs, DeleteFTy), CGM.GetAddrOfFunction(DeleteFD), ReturnValueSlot(), DeleteArgs, DeleteFD); } @@ -1462,7 +1485,7 @@ namespace { } // Emit the call to delete. - CGF.EmitCall(CGF.getTypes().arrangeFunctionCall(Args, DeleteFTy), + CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Args, DeleteFTy), CGF.CGM.GetAddrOfFunction(OperatorDelete), ReturnValueSlot(), Args, OperatorDelete); } @@ -1510,18 +1533,7 @@ static void EmitArrayDelete(CodeGenFunction &CGF, } void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { - - // Get at the argument before we performed the implicit conversion - // to void*. const Expr *Arg = E->getArgument(); - while (const ImplicitCastExpr *ICE = dyn_cast(Arg)) { - if (ICE->getCastKind() != CK_UserDefinedConversion && - ICE->getType()->isVoidPointerType()) - Arg = ICE->getSubExpr(); - else - break; - } - llvm::Value *Ptr = EmitScalarExpr(Arg); // Null check the pointer. @@ -1631,15 +1643,9 @@ llvm::Value *CodeGenFunction::EmitCXXTypeidExpr(const CXXTypeidExpr *E) { // polymorphic class type, the result refers to a std::type_info object // representing the type of the most derived object (that is, the dynamic // type) to which the glvalue refers. - if (E->getExprOperand()->isGLValue()) { - if (const RecordType *RT = - E->getExprOperand()->getType()->getAs()) { - const CXXRecordDecl *RD = cast(RT->getDecl()); - if (RD->isPolymorphic()) - return EmitTypeidFromVTable(*this, E->getExprOperand(), - StdTypeInfoPtrTy); - } - } + if (E->isPotentiallyEvaluated()) + return EmitTypeidFromVTable(*this, E->getExprOperand(), + StdTypeInfoPtrTy); QualType OperandTy = E->getExprOperand()->getType(); return Builder.CreateBitCast(CGM.GetAddrOfRTTIDescriptor(OperandTy), diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp index bc9f9ef..a17a436 100644 --- a/lib/CodeGen/CGExprConstant.cpp +++ b/lib/CodeGen/CGExprConstant.cpp @@ -386,11 +386,11 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { if (IsMsStruct) { // Zero-length bitfields following non-bitfield members are // ignored: - if (CGM.getContext().ZeroBitfieldFollowsNonBitfield((*Field), LastFD)) { + if (CGM.getContext().ZeroBitfieldFollowsNonBitfield(*Field, LastFD)) { --FieldNo; continue; } - LastFD = (*Field); + LastFD = *Field; } // If this is a union, skip all the fields that aren't being initialized. @@ -399,7 +399,7 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) { // Don't emit anonymous bitfields, they just affect layout. if (Field->isUnnamedBitfield()) { - LastFD = (*Field); + LastFD = *Field; continue; } @@ -486,11 +486,11 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, if (IsMsStruct) { // Zero-length bitfields following non-bitfield members are // ignored: - if (CGM.getContext().ZeroBitfieldFollowsNonBitfield((*Field), LastFD)) { + if (CGM.getContext().ZeroBitfieldFollowsNonBitfield(*Field, LastFD)) { --FieldNo; continue; } - LastFD = (*Field); + LastFD = *Field; } // If this is a union, skip all the fields that aren't being initialized. @@ -499,7 +499,7 @@ void ConstStructBuilder::Build(const APValue &Val, const RecordDecl *RD, // Don't emit anonymous bitfields, they just affect layout. if (Field->isUnnamedBitfield()) { - LastFD = (*Field); + LastFD = *Field; continue; } @@ -932,7 +932,8 @@ public: C = new llvm::GlobalVariable(CGM.getModule(), C->getType(), E->getType().isConstant(CGM.getContext()), llvm::GlobalValue::InternalLinkage, - C, ".compoundliteral", 0, false, + C, ".compoundliteral", 0, + llvm::GlobalVariable::NotThreadLocal, CGM.getContext().getTargetAddressSpace(E->getType())); return C; } @@ -1300,7 +1301,8 @@ FillInNullDataMemberPointers(CodeGenModule &CGM, QualType T, if (CGM.getTypes().isZeroInitializable(BaseDecl)) continue; - uint64_t BaseOffset = Layout.getBaseClassOffsetInBits(BaseDecl); + uint64_t BaseOffset = + CGM.getContext().toBits(Layout.getBaseClassOffset(BaseDecl)); FillInNullDataMemberPointers(CGM, I->getType(), Elements, StartOffset + BaseOffset); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 18891f7..1cccafe 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -498,8 +498,8 @@ public: Value *VisitObjCStringLiteral(const ObjCStringLiteral *E) { return CGF.EmitObjCStringLiteral(E); } - Value *VisitObjCNumericLiteral(ObjCNumericLiteral *E) { - return CGF.EmitObjCNumericLiteral(E); + Value *VisitObjCBoxedExpr(ObjCBoxedExpr *E) { + return CGF.EmitObjCBoxedExpr(E); } Value *VisitObjCArrayLiteral(ObjCArrayLiteral *E) { return CGF.EmitObjCArrayLiteral(E); @@ -798,14 +798,15 @@ Value *ScalarExprEmitter::VisitMemberExpr(MemberExpr *E) { return Builder.getInt(Value); } - // Emit debug info for aggregate now, if it was delayed to reduce + // Emit debug info for aggregate now, if it was delayed to reduce // debug info size. CGDebugInfo *DI = CGF.getDebugInfo(); - if (DI && CGF.CGM.getCodeGenOpts().LimitDebugInfo) { + if (DI && + CGF.CGM.getCodeGenOpts().DebugInfo == CodeGenOptions::LimitedDebugInfo) { QualType PQTy = E->getBase()->IgnoreParenImpCasts()->getType(); if (const PointerType * PTy = dyn_cast(PQTy)) if (FieldDecl *M = dyn_cast(E->getMemberDecl())) - DI->getOrCreateRecordType(PTy->getPointeeType(), + DI->getOrCreateRecordType(PTy->getPointeeType(), M->getParent()->getLocation()); } return EmitLoadOfLValue(E); @@ -1520,7 +1521,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { // FIXME: It would be nice if we didn't have to loop here! for (RecordDecl::field_iterator Field = RD->field_begin(), FieldEnd = RD->field_end(); - Field != FieldEnd; (void)++Field, ++i) { + Field != FieldEnd; ++Field, ++i) { if (*Field == MemberDecl) break; } @@ -1554,9 +1555,8 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) { // Compute the offset to the base. const RecordType *BaseRT = CurrentType->getAs(); CXXRecordDecl *BaseRD = cast(BaseRT->getDecl()); - int64_t OffsetInt = RL.getBaseClassOffsetInBits(BaseRD) / - CGF.getContext().getCharWidth(); - Offset = llvm::ConstantInt::get(ResultType, OffsetInt); + CharUnits OffsetInt = RL.getBaseClassOffset(BaseRD); + Offset = llvm::ConstantInt::get(ResultType, OffsetInt.getQuantity()); break; } } @@ -1682,11 +1682,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Load/convert the LHS. LValue LHSLV = EmitCheckedLValue(E->getLHS()); OpInfo.LHS = EmitLoadOfLValue(LHSLV); - OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, - E->getComputationLHSType()); llvm::PHINode *atomicPHI = 0; - if (const AtomicType *atomicTy = OpInfo.Ty->getAs()) { + if (LHSTy->isAtomicType()) { // FIXME: For floating point types, we should be saving and restoring the // floating point environment in the loop. llvm::BasicBlock *startBB = Builder.GetInsertBlock(); @@ -1695,10 +1693,12 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( Builder.SetInsertPoint(opBB); atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2); atomicPHI->addIncoming(OpInfo.LHS, startBB); - OpInfo.Ty = atomicTy->getValueType(); OpInfo.LHS = atomicPHI; } - + + OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy, + E->getComputationLHSType()); + // Expand the binary operator. Result = (this->*Func)(OpInfo); @@ -2592,7 +2592,7 @@ VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { llvm::Value *LHSTmp = LHS; bool wasCast = false; llvm::VectorType *rhsVTy = cast(RHS->getType()); - if (rhsVTy->getElementType()->isFloatTy()) { + if (rhsVTy->getElementType()->isFloatingPointTy()) { RHSTmp = Builder.CreateBitCast(RHS, tmp2->getType()); LHSTmp = Builder.CreateBitCast(LHS, tmp->getType()); wasCast = true; diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp index d0aa0f5..4ac172d 100644 --- a/lib/CodeGen/CGObjC.cpp +++ b/lib/CodeGen/CGObjC.cpp @@ -30,7 +30,7 @@ typedef llvm::PointerIntPair TryEmitResult; static TryEmitResult tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e); static RValue AdjustRelatedResultType(CodeGenFunction &CGF, - const Expr *E, + QualType ET, const ObjCMethodDecl *Method, RValue Result); @@ -51,36 +51,36 @@ llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E) return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType())); } -/// EmitObjCNumericLiteral - This routine generates code for -/// the appropriate +[NSNumber numberWith:] method. +/// EmitObjCBoxedExpr - This routine generates code to call +/// the appropriate expression boxing method. This will either be +/// one of +[NSNumber numberWith:], or +[NSString stringWithUTF8String:]. /// llvm::Value * -CodeGenFunction::EmitObjCNumericLiteral(const ObjCNumericLiteral *E) { +CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) { // Generate the correct selector for this literal's concrete type. - const Expr *NL = E->getNumber(); + const Expr *SubExpr = E->getSubExpr(); // Get the method. - const ObjCMethodDecl *Method = E->getObjCNumericLiteralMethod(); - assert(Method && "NSNumber method is null"); - Selector Sel = Method->getSelector(); + const ObjCMethodDecl *BoxingMethod = E->getBoxingMethod(); + assert(BoxingMethod && "BoxingMethod is null"); + assert(BoxingMethod->isClassMethod() && "BoxingMethod must be a class method"); + Selector Sel = BoxingMethod->getSelector(); // Generate a reference to the class pointer, which will be the receiver. - QualType ResultType = E->getType(); // should be NSNumber * - const ObjCObjectPointerType *InterfacePointerType = - ResultType->getAsObjCInterfacePointerType(); - ObjCInterfaceDecl *NSNumberDecl = - InterfacePointerType->getObjectType()->getInterface(); + // Assumes that the method was introduced in the class that should be + // messaged (avoids pulling it out of the result type). CGObjCRuntime &Runtime = CGM.getObjCRuntime(); - llvm::Value *Receiver = Runtime.GetClass(Builder, NSNumberDecl); - - const ParmVarDecl *argDecl = *Method->param_begin(); + const ObjCInterfaceDecl *ClassDecl = BoxingMethod->getClassInterface(); + llvm::Value *Receiver = Runtime.GetClass(Builder, ClassDecl); + + const ParmVarDecl *argDecl = *BoxingMethod->param_begin(); QualType ArgQT = argDecl->getType().getUnqualifiedType(); - RValue RV = EmitAnyExpr(NL); + RValue RV = EmitAnyExpr(SubExpr); CallArgList Args; Args.add(RV, ArgQT); - + RValue result = Runtime.GenerateMessageSend(*this, ReturnValueSlot(), - ResultType, Sel, Receiver, Args, - NSNumberDecl, Method); + BoxingMethod->getResultType(), Sel, Receiver, Args, + ClassDecl, BoxingMethod); return Builder.CreateBitCast(result.getScalarVal(), ConvertType(E->getType())); } @@ -202,20 +202,20 @@ llvm::Value *CodeGenFunction::EmitObjCProtocolExpr(const ObjCProtocolExpr *E) { /// \brief Adjust the type of the result of an Objective-C message send /// expression when the method has a related result type. static RValue AdjustRelatedResultType(CodeGenFunction &CGF, - const Expr *E, + QualType ExpT, const ObjCMethodDecl *Method, RValue Result) { if (!Method) return Result; if (!Method->hasRelatedResultType() || - CGF.getContext().hasSameType(E->getType(), Method->getResultType()) || + CGF.getContext().hasSameType(ExpT, Method->getResultType()) || !Result.isScalar()) return Result; // We have applied a related result type. Cast the rvalue appropriately. return RValue::get(CGF.Builder.CreateBitCast(Result.getScalarVal(), - CGF.ConvertType(E->getType()))); + CGF.ConvertType(ExpT))); } /// Decide whether to extend the lifetime of the receiver of a @@ -401,7 +401,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E, Builder.CreateStore(newSelf, selfAddr); } - return AdjustRelatedResultType(*this, E, method, result); + return AdjustRelatedResultType(*this, E->getType(), method, result); } namespace { @@ -507,9 +507,9 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar, args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy); llvm::Value *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFunctionCall(Context.VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Context.VoidTy, args, + FunctionType::ExtInfo(), + RequiredArgs::All), fn, ReturnValueSlot(), args); } @@ -580,7 +580,7 @@ namespace { }; } -/// Pick an implementation strategy for the the given property synthesis. +/// Pick an implementation strategy for the given property synthesis. PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM, const ObjCPropertyImplDecl *propImpl) { const ObjCPropertyDecl *prop = propImpl->getPropertyDecl(); @@ -698,8 +698,9 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM, Kind = Native; } -/// GenerateObjCGetter - Generate an Objective-C property getter -/// function. The given Decl must be an ObjCImplementationDecl. @synthesize +/// \brief Generate an Objective-C property getter function. +/// +/// The given Decl must be an ObjCImplementationDecl. \@synthesize /// is illegal within a category. void CodeGenFunction::GenerateObjCGetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID) { @@ -710,7 +711,7 @@ void CodeGenFunction::GenerateObjCGetter(ObjCImplementationDecl *IMP, assert(OMD && "Invalid call to generate getter (empty method)"); StartObjCMethod(OMD, IMP->getClassInterface(), OMD->getLocStart()); - generateObjCGetterBody(IMP, PID, AtomicHelperFn); + generateObjCGetterBody(IMP, PID, OMD, AtomicHelperFn); FinishFunction(); } @@ -763,15 +764,17 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFunctionCall(CGF.getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, + args, + FunctionType::ExtInfo(), + RequiredArgs::All), copyCppAtomicObjectFn, ReturnValueSlot(), args); } void CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, + const ObjCMethodDecl *GetterMethodDecl, llvm::Constant *AtomicHelperFn) { // If there's a non-trivial 'get' expression, we just have to emit that. if (!hasTrivialGetExpr(propImpl)) { @@ -850,16 +853,16 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, // FIXME: We shouldn't need to get the function info here, the // runtime already should have computed it to build the function. - RValue RV = EmitCall(getTypes().arrangeFunctionCall(propType, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + RValue RV = EmitCall(getTypes().arrangeFreeFunctionCall(propType, args, + FunctionType::ExtInfo(), + RequiredArgs::All), getPropertyFn, ReturnValueSlot(), args); // We need to fix the type here. Ivars with copy & retain are // always objects so we don't need to worry about complex or // aggregates. RV = RValue::get(Builder.CreateBitCast(RV.getScalarVal(), - getTypes().ConvertType(propType))); + getTypes().ConvertType(getterMethod->getResultType()))); EmitReturnOfRValue(RV, propType); @@ -905,6 +908,8 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl, } value = Builder.CreateBitCast(value, ConvertType(propType)); + value = Builder.CreateBitCast(value, + ConvertType(GetterMethodDecl->getResultType())); } EmitReturnOfRValue(RValue::get(value), propType); @@ -952,9 +957,10 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD, args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy); llvm::Value *copyStructFn = CGF.CGM.getObjCRuntime().GetSetStructFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFunctionCall(CGF.getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, + args, + FunctionType::ExtInfo(), + RequiredArgs::All), copyStructFn, ReturnValueSlot(), args); } @@ -989,9 +995,10 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF, llvm::Value *copyCppAtomicObjectFn = CGF.CGM.getObjCRuntime().GetCppAtomicObjectFunction(); - CGF.EmitCall(CGF.getTypes().arrangeFunctionCall(CGF.getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy, + args, + FunctionType::ExtInfo(), + RequiredArgs::All), copyCppAtomicObjectFn, ReturnValueSlot(), args); @@ -1125,9 +1132,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, if (setOptimizedPropertyFn) { args.add(RValue::get(arg), getContext().getObjCIdType()); args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); - EmitCall(getTypes().arrangeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, + FunctionType::ExtInfo(), + RequiredArgs::All), setOptimizedPropertyFn, ReturnValueSlot(), args); } else { args.add(RValue::get(ivarOffset), getContext().getPointerDiffType()); @@ -1138,9 +1145,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, getContext().BoolTy); // FIXME: We shouldn't need to get the function info here, the runtime // already should have computed it to build the function. - EmitCall(getTypes().arrangeFunctionCall(getContext().VoidTy, args, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args, + FunctionType::ExtInfo(), + RequiredArgs::All), setPropertyFn, ReturnValueSlot(), args); } @@ -1206,8 +1213,9 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl, EmitStmt(&assign); } -/// GenerateObjCSetter - Generate an Objective-C property setter -/// function. The given Decl must be an ObjCImplementationDecl. @synthesize +/// \brief Generate an Objective-C property setter function. +/// +/// The given Decl must be an ObjCImplementationDecl. \@synthesize /// is illegal within a category. void CodeGenFunction::GenerateObjCSetter(ObjCImplementationDecl *IMP, const ObjCPropertyImplDecl *PID) { @@ -1502,9 +1510,9 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){ Args2.add(RValue::get(V), getContext().getObjCIdType()); // FIXME: We shouldn't need to get the function info here, the runtime already // should have computed it to build the function. - EmitCall(CGM.getTypes().arrangeFunctionCall(getContext().VoidTy, Args2, - FunctionType::ExtInfo(), - RequiredArgs::All), + EmitCall(CGM.getTypes().arrangeFreeFunctionCall(getContext().VoidTy, Args2, + FunctionType::ExtInfo(), + RequiredArgs::All), EnumerationMutationFn, ReturnValueSlot(), Args2); // Otherwise, or if the mutation function returns, just continue. @@ -1685,11 +1693,16 @@ static llvm::Constant *createARCRuntimeFunction(CodeGenModule &CGM, StringRef fnName) { llvm::Constant *fn = CGM.CreateRuntimeFunction(type, fnName); - // In -fobjc-no-arc-runtime, emit weak references to the runtime - // support library. - if (!CGM.getCodeGenOpts().ObjCRuntimeHasARC) - if (llvm::Function *f = dyn_cast(fn)) + // If the target runtime doesn't naturally support ARC, emit weak + // references to the runtime support library. We don't really + // permit this to fail, but we need a particular relocation style. + if (llvm::Function *f = dyn_cast(fn)) { + if (!CGM.getLangOpts().ObjCRuntime.hasARC()) f->setLinkage(llvm::Function::ExternalWeakLinkage); + // set nonlazybind attribute for these APIs for performance. + if (fnName == "objc_retain" || fnName == "objc_release") + f->addFnAttr(llvm::Attribute::NonLazyBind); + } return fn; } @@ -1808,8 +1821,8 @@ static void emitARCCopyOperation(CodeGenFunction &CGF, } /// Produce the code to do a retain. Based on the type, calls one of: -/// call i8* @objc_retain(i8* %value) -/// call i8* @objc_retainBlock(i8* %value) +/// call i8* \@objc_retain(i8* %value) +/// call i8* \@objc_retainBlock(i8* %value) llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) { if (type->isBlockPointerType()) return EmitARCRetainBlock(value, /*mandatory*/ false); @@ -1818,7 +1831,7 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) { } /// Retain the given object, with normal retain semantics. -/// call i8* @objc_retain(i8* %value) +/// call i8* \@objc_retain(i8* %value) llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { return emitARCValueOperation(*this, value, CGM.getARCEntrypoints().objc_retain, @@ -1826,7 +1839,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) { } /// Retain the given block, with _Block_copy semantics. -/// call i8* @objc_retainBlock(i8* %value) +/// call i8* \@objc_retainBlock(i8* %value) /// /// \param mandatory - If false, emit the call with metadata /// indicating that it's okay for the optimizer to eliminate this call @@ -1856,7 +1869,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value, } /// Retain the given object which is the result of a function call. -/// call i8* @objc_retainAutoreleasedReturnValue(i8* %value) +/// call i8* \@objc_retainAutoreleasedReturnValue(i8* %value) /// /// Yes, this function name is one character away from a different /// call with completely different semantics. @@ -1906,7 +1919,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) { } /// Release the given object. -/// call void @objc_release(i8* %value) +/// call void \@objc_release(i8* %value) void CodeGenFunction::EmitARCRelease(llvm::Value *value, bool precise) { if (isa(value)) return; @@ -1933,7 +1946,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value, bool precise) { } /// Store into a strong object. Always calls this: -/// call void @objc_storeStrong(i8** %addr, i8* %value) +/// call void \@objc_storeStrong(i8** %addr, i8* %value) llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr, llvm::Value *value, bool ignored) { @@ -1958,7 +1971,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr, } /// Store into a strong object. Sometimes calls this: -/// call void @objc_storeStrong(i8** %addr, i8* %value) +/// call void \@objc_storeStrong(i8** %addr, i8* %value) /// Other times, breaks it down into components. llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, llvm::Value *newValue, @@ -1994,7 +2007,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst, } /// Autorelease the given object. -/// call i8* @objc_autorelease(i8* %value) +/// call i8* \@objc_autorelease(i8* %value) llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) { return emitARCValueOperation(*this, value, CGM.getARCEntrypoints().objc_autorelease, @@ -2002,7 +2015,7 @@ llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) { } /// Autorelease the given object. -/// call i8* @objc_autoreleaseReturnValue(i8* %value) +/// call i8* \@objc_autoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, @@ -2011,7 +2024,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) { } /// Do a fused retain/autorelease of the given object. -/// call i8* @objc_retainAutoreleaseReturnValue(i8* %value) +/// call i8* \@objc_retainAutoreleaseReturnValue(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { return emitARCValueOperation(*this, value, @@ -2020,10 +2033,10 @@ CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) { } /// Do a fused retain/autorelease of the given object. -/// call i8* @objc_retainAutorelease(i8* %value) +/// call i8* \@objc_retainAutorelease(i8* %value) /// or -/// %retain = call i8* @objc_retainBlock(i8* %value) -/// call i8* @objc_autorelease(i8* %retain) +/// %retain = call i8* \@objc_retainBlock(i8* %value) +/// call i8* \@objc_autorelease(i8* %retain) llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type, llvm::Value *value) { if (!type->isBlockPointerType()) @@ -2039,7 +2052,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type, } /// Do a fused retain/autorelease of the given object. -/// call i8* @objc_retainAutorelease(i8* %value) +/// call i8* \@objc_retainAutorelease(i8* %value) llvm::Value * CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) { return emitARCValueOperation(*this, value, @@ -2047,7 +2060,7 @@ CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) { "objc_retainAutorelease"); } -/// i8* @objc_loadWeak(i8** %addr) +/// i8* \@objc_loadWeak(i8** %addr) /// Essentially objc_autorelease(objc_loadWeakRetained(addr)). llvm::Value *CodeGenFunction::EmitARCLoadWeak(llvm::Value *addr) { return emitARCLoadOperation(*this, addr, @@ -2055,14 +2068,14 @@ llvm::Value *CodeGenFunction::EmitARCLoadWeak(llvm::Value *addr) { "objc_loadWeak"); } -/// i8* @objc_loadWeakRetained(i8** %addr) +/// i8* \@objc_loadWeakRetained(i8** %addr) llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(llvm::Value *addr) { return emitARCLoadOperation(*this, addr, CGM.getARCEntrypoints().objc_loadWeakRetained, "objc_loadWeakRetained"); } -/// i8* @objc_storeWeak(i8** %addr, i8* %value) +/// i8* \@objc_storeWeak(i8** %addr, i8* %value) /// Returns %value. llvm::Value *CodeGenFunction::EmitARCStoreWeak(llvm::Value *addr, llvm::Value *value, @@ -2072,7 +2085,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreWeak(llvm::Value *addr, "objc_storeWeak", ignored); } -/// i8* @objc_initWeak(i8** %addr, i8* %value) +/// i8* \@objc_initWeak(i8** %addr, i8* %value) /// Returns %value. %addr is known to not have a current weak entry. /// Essentially equivalent to: /// *addr = nil; objc_storeWeak(addr, value); @@ -2092,7 +2105,7 @@ void CodeGenFunction::EmitARCInitWeak(llvm::Value *addr, llvm::Value *value) { "objc_initWeak", /*ignored*/ true); } -/// void @objc_destroyWeak(i8** %addr) +/// void \@objc_destroyWeak(i8** %addr) /// Essentially objc_storeWeak(addr, nil). void CodeGenFunction::EmitARCDestroyWeak(llvm::Value *addr) { llvm::Constant *&fn = CGM.getARCEntrypoints().objc_destroyWeak; @@ -2110,7 +2123,7 @@ void CodeGenFunction::EmitARCDestroyWeak(llvm::Value *addr) { call->setDoesNotThrow(); } -/// void @objc_moveWeak(i8** %dest, i8** %src) +/// void \@objc_moveWeak(i8** %dest, i8** %src) /// Disregards the current value in %dest. Leaves %src pointing to nothing. /// Essentially (objc_copyWeak(dest, src), objc_destroyWeak(src)). void CodeGenFunction::EmitARCMoveWeak(llvm::Value *dst, llvm::Value *src) { @@ -2119,7 +2132,7 @@ void CodeGenFunction::EmitARCMoveWeak(llvm::Value *dst, llvm::Value *src) { "objc_moveWeak"); } -/// void @objc_copyWeak(i8** %dest, i8** %src) +/// void \@objc_copyWeak(i8** %dest, i8** %src) /// Disregards the current value in %dest. Essentially /// objc_release(objc_initWeak(dest, objc_readWeakRetained(src))) void CodeGenFunction::EmitARCCopyWeak(llvm::Value *dst, llvm::Value *src) { @@ -2129,7 +2142,7 @@ void CodeGenFunction::EmitARCCopyWeak(llvm::Value *dst, llvm::Value *src) { } /// Produce the code to do a objc_autoreleasepool_push. -/// call i8* @objc_autoreleasePoolPush(void) +/// call i8* \@objc_autoreleasePoolPush(void) llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { llvm::Constant *&fn = CGM.getRREntrypoints().objc_autoreleasePoolPush; if (!fn) { @@ -2145,7 +2158,7 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() { } /// Produce the code to do a primitive release. -/// call void @objc_autoreleasePoolPop(i8* %ptr) +/// call void \@objc_autoreleasePoolPop(i8* %ptr) void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) { assert(value->getType() == Int8PtrTy); @@ -2717,7 +2730,7 @@ void CodeGenFunction::EmitObjCAutoreleasePoolStmt( // Keep track of the current cleanup stack depth. RunCleanupsScope Scope(*this); - if (CGM.getCodeGenOpts().ObjCRuntimeHasARC) { + if (CGM.getLangOpts().ObjCRuntime.hasARC()) { llvm::Value *token = EmitObjCAutoreleasePoolPush(); EHStack.pushCleanup(NormalCleanup, token); } else { @@ -2749,6 +2762,11 @@ void CodeGenFunction::EmitExtendGCLifetime(llvm::Value *object) { Builder.CreateCall(extender, object)->setDoesNotThrow(); } +static bool hasAtomicCopyHelperAPI(const ObjCRuntime &runtime) { + // For now, only NeXT has these APIs. + return runtime.isNeXTFamily(); +} + /// GenerateObjCAtomicSetterCopyHelperFunction - Given a c++ object type with /// non-trivial copy assignment function, produce following helper function. /// static void copyHelper(Ty *dest, const Ty *source) { *dest = *source; } @@ -2757,7 +2775,8 @@ llvm::Constant * CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction( const ObjCPropertyImplDecl *PID) { // FIXME. This api is for NeXt runtime only for now. - if (!getLangOpts().CPlusPlus || !getLangOpts().NeXTRuntime) + if (!getLangOpts().CPlusPlus || + !hasAtomicCopyHelperAPI(getLangOpts().ObjCRuntime)) return 0; QualType Ty = PID->getPropertyIvarDecl()->getType(); if (!Ty->isRecordType()) @@ -2841,7 +2860,8 @@ llvm::Constant * CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction( const ObjCPropertyImplDecl *PID) { // FIXME. This api is for NeXt runtime only for now. - if (!getLangOpts().CPlusPlus || !getLangOpts().NeXTRuntime) + if (!getLangOpts().CPlusPlus || + !hasAtomicCopyHelperAPI(getLangOpts().ObjCRuntime)) return 0; const ObjCPropertyDecl *PD = PID->getPropertyDecl(); QualType Ty = PD->getType(); diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp index db0bd95..6d129d0 100644 --- a/lib/CodeGen/CGObjCGNU.cpp +++ b/lib/CodeGen/CGObjCGNU.cpp @@ -99,8 +99,8 @@ class LazyRuntimeFunction { /// GNU Objective-C runtime code generation. This class implements the parts of -/// Objective-C support that are specific to the GNU family of runtimes (GCC and -/// GNUstep). +/// Objective-C support that are specific to the GNU family of runtimes (GCC, +/// GNUstep and ObjFW). class CGObjCGNU : public CGObjCRuntime { protected: /// The LLVM module into which output is inserted @@ -292,8 +292,8 @@ private: protected: /// Function used for throwing Objective-C exceptions. LazyRuntimeFunction ExceptionThrowFn; - /// Function used for rethrowing exceptions, used at the end of @finally or - /// @synchronize blocks. + /// Function used for rethrowing exceptions, used at the end of \@finally or + /// \@synchronize blocks. LazyRuntimeFunction ExceptionReThrowFn; /// Function called when entering a catch function. This is required for /// differentiating Objective-C exceptions and foreign exceptions. @@ -301,9 +301,9 @@ protected: /// Function called when exiting from a catch block. Used to do exception /// cleanup. LazyRuntimeFunction ExitCatchFn; - /// Function called when entering an @synchronize block. Acquires the lock. + /// Function called when entering an \@synchronize block. Acquires the lock. LazyRuntimeFunction SyncEnterFn; - /// Function called when exiting an @synchronize block. Releases the lock. + /// Function called when exiting an \@synchronize block. Releases the lock. LazyRuntimeFunction SyncExitFn; private: @@ -350,7 +350,7 @@ private: ArrayRef MethodSels, ArrayRef MethodTypes, bool isClassMethodList); - /// Emits an empty protocol. This is used for @protocol() where no protocol + /// Emits an empty protocol. This is used for \@protocol() where no protocol /// is found. The runtime will (hopefully) fix up the pointer to refer to the /// real protocol. llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName); @@ -397,11 +397,11 @@ private: const ObjCIvarDecl *Ivar); /// Emits a reference to a class. This allows the linker to object if there /// is no class of the matching name. +protected: void EmitClassRef(const std::string &className); /// Emits a pointer to the named class - llvm::Value *GetClassNamed(CGBuilderTy &Builder, const std::string &Name, - bool isWeak); -protected: + virtual llvm::Value *GetClassNamed(CGBuilderTy &Builder, + const std::string &Name, bool isWeak); /// Looks up the method for sending a message to the specified object. This /// mechanism differs between the GCC and GNU runtimes, so this method must be /// overridden in subclasses. @@ -653,6 +653,33 @@ class CGObjCGNUstep : public CGObjCGNU { } }; +/// The ObjFW runtime, which closely follows the GCC runtime's +/// compiler ABI. Support here is due to Jonathan Schleifer, the +/// ObjFW maintainer. +class CGObjCObjFW : public CGObjCGCC { + /// Emit class references unconditionally as direct symbol references. + virtual llvm::Value *GetClassNamed(CGBuilderTy &Builder, + const std::string &Name, bool isWeak) { + if (isWeak) + return CGObjCGNU::GetClassNamed(Builder, Name, isWeak); + + EmitClassRef(Name); + + std::string SymbolName = "_OBJC_CLASS_" + Name; + + llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName); + + if (!ClassSymbol) + ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false, + llvm::GlobalValue::ExternalLinkage, + 0, SymbolName); + + return ClassSymbol; + } + +public: + CGObjCObjFW(CodeGenModule &Mod): CGObjCGCC(Mod) {} +}; } // end anonymous namespace @@ -889,7 +916,7 @@ llvm::Constant *CGObjCGNU::GetEHType(QualType T) { // foreign exceptions. With the new ABI, we use __objc_id_typeinfo as // a pointer indicating object catchalls, and NULL to indicate real // catchalls - if (CGM.getLangOpts().ObjCNonFragileABI) { + if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { return MakeConstantString("@id"); } else { return 0; @@ -1627,7 +1654,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) { iter = PD->prop_begin(), endIter = PD->prop_end(); iter != endIter ; iter++) { std::vector Fields; - ObjCPropertyDecl *property = (*iter); + ObjCPropertyDecl *property = *iter; Fields.push_back(MakeConstantString(property->getNameAsString())); Fields.push_back(llvm::ConstantInt::get(Int8Ty, @@ -1877,7 +1904,7 @@ llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OI iter = OID->propimpl_begin(), endIter = OID->propimpl_end(); iter != endIter ; iter++) { std::vector Fields; - ObjCPropertyDecl *property = (*iter)->getPropertyDecl(); + ObjCPropertyDecl *property = iter->getPropertyDecl(); ObjCPropertyImplDecl *propertyImpl = *iter; bool isSynthesized = (propertyImpl->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize); @@ -1984,7 +2011,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { Context.getASTObjCInterfaceLayout(SuperClassDecl).getSize().getQuantity(); // For non-fragile ivars, set the instance size to 0 - {the size of just this // class}. The runtime will then set this to the correct value on load. - if (CGM.getContext().getLangOpts().ObjCNonFragileABI) { + if (CGM.getContext().getLangOpts().ObjCRuntime.isNonFragile()) { instanceSize = 0 - (instanceSize - superInstanceSize); } @@ -1999,7 +2026,7 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) { // Get the offset uint64_t BaseOffset = ComputeIvarBaseOffset(CGM, OID, IVD); uint64_t Offset = BaseOffset; - if (CGM.getContext().getLangOpts().ObjCNonFragileABI) { + if (CGM.getContext().getLangOpts().ObjCRuntime.isNonFragile()) { Offset = BaseOffset - superInstanceSize; } llvm::Constant *OffsetValue = llvm::ConstantInt::get(IntTy, Offset); @@ -2486,25 +2513,8 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF, ExceptionAsObject = CGF.ObjCEHValueStack.back(); } ExceptionAsObject = CGF.Builder.CreateBitCast(ExceptionAsObject, IdTy); - - // Note: This may have to be an invoke, if we want to support constructs like: - // @try { - // @throw(obj); - // } - // @catch(id) ... - // - // This is effectively turning @throw into an incredibly-expensive goto, but - // it may happen as a result of inlining followed by missed optimizations, or - // as a result of stupidity. - llvm::BasicBlock *UnwindBB = CGF.getInvokeDest(); - if (!UnwindBB) { - CGF.Builder.CreateCall(ExceptionThrowFn, ExceptionAsObject); - CGF.Builder.CreateUnreachable(); - } else { - CGF.Builder.CreateInvoke(ExceptionThrowFn, UnwindBB, UnwindBB, - ExceptionAsObject); - } - // Clear the insertion point to indicate we are in unreachable code. + CGF.EmitCallOrInvoke(ExceptionThrowFn, ExceptionAsObject); + CGF.Builder.CreateUnreachable(); CGF.Builder.ClearInsertionPoint(); } @@ -2640,7 +2650,7 @@ static const ObjCInterfaceDecl *FindIvarInterface(ASTContext &Context, llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar) { - if (CGM.getLangOpts().ObjCNonFragileABI) { + if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) { Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar); if (RuntimeVersion < 10) return CGF.Builder.CreateZExtOrBitCast( @@ -2665,7 +2675,20 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF, CGObjCRuntime * clang::CodeGen::CreateGNUObjCRuntime(CodeGenModule &CGM) { - if (CGM.getLangOpts().ObjCNonFragileABI) + switch (CGM.getLangOpts().ObjCRuntime.getKind()) { + case ObjCRuntime::GNUstep: return new CGObjCGNUstep(CGM); - return new CGObjCGCC(CGM); + + case ObjCRuntime::GCC: + return new CGObjCGCC(CGM); + + case ObjCRuntime::ObjFW: + return new CGObjCObjFW(CGM); + + case ObjCRuntime::FragileMacOSX: + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + llvm_unreachable("these runtimes are not GNU runtimes"); + } + llvm_unreachable("bad runtime"); } diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp index e5246f1..ef802a3 100644 --- a/lib/CodeGen/CGObjCMac.cpp +++ b/lib/CodeGen/CGObjCMac.cpp @@ -241,9 +241,9 @@ public: Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(IdType, Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(IdType, Params, + FunctionType::ExtInfo(), + RequiredArgs::All)); return CGM.CreateRuntimeFunction(FTy, "objc_getProperty"); } @@ -261,9 +261,9 @@ public: Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(Ctx.VoidTy, Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params, + FunctionType::ExtInfo(), + RequiredArgs::All)); return CGM.CreateRuntimeFunction(FTy, "objc_setProperty"); } @@ -287,9 +287,9 @@ public: Params.push_back(IdType); Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified()); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(Ctx.VoidTy, Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params, + FunctionType::ExtInfo(), + RequiredArgs::All)); const char *name; if (atomic && copy) name = "objc_setProperty_atomic_copy"; @@ -314,9 +314,9 @@ public: Params.push_back(Ctx.BoolTy); Params.push_back(Ctx.BoolTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(Ctx.VoidTy, Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params, + FunctionType::ExtInfo(), + RequiredArgs::All)); return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct"); } @@ -333,9 +333,9 @@ public: Params.push_back(Ctx.VoidPtrTy); Params.push_back(Ctx.VoidPtrTy); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(Ctx.VoidTy, Params, - FunctionType::ExtInfo(), - RequiredArgs::All)); + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params, + FunctionType::ExtInfo(), + RequiredArgs::All)); return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic"); } @@ -346,7 +346,7 @@ public: SmallVector Params; Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType())); llvm::FunctionType *FTy = - Types.GetFunctionType(Types.arrangeFunctionType(Ctx.VoidTy, Params, + Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, Params, FunctionType::ExtInfo(), RequiredArgs::All)); return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation"); @@ -2515,7 +2515,7 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID, Values); std::string Name("\01L_OBJC_METACLASS_"); - Name += ID->getNameAsCString(); + Name += ID->getName(); // Check for a forward reference. llvm::GlobalVariable *GV = CGM.getModule().getGlobalVariable(Name); @@ -3612,7 +3612,8 @@ enum ImageInfoFlags { // A flag indicating that the module has no instances of a @synthesize of a // superclass variable. - eImageInfo_CorrectedSynthesize = (1 << 4) + eImageInfo_CorrectedSynthesize = (1 << 4), + eImageInfo_ImageIsSimulated = (1 << 5) }; void CGObjCCommonMac::EmitImageInfo() { @@ -3657,6 +3658,14 @@ void CGObjCCommonMac::EmitImageInfo() { llvm::MDNode::get(VMContext, Ops)); } } + + // Indicate whether we're compiling this to run on a simulator. + const llvm::Triple &Triple = CGM.getTarget().getTriple(); + if (Triple.getOS() == llvm::Triple::IOS && + (Triple.getArch() == llvm::Triple::x86 || + Triple.getArch() == llvm::Triple::x86_64)) + Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated", + eImageInfo_ImageIsSimulated); } // struct objc_module { @@ -3809,7 +3818,10 @@ void CGObjCCommonMac::BuildAggrIvarRecordLayout(const RecordType *RT, bool &HasUnion) { const RecordDecl *RD = RT->getDecl(); // FIXME - Use iterator. - SmallVector Fields(RD->field_begin(), RD->field_end()); + SmallVector Fields; + for (RecordDecl::field_iterator i = RD->field_begin(), + e = RD->field_end(); i != e; ++i) + Fields.push_back(*i); llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0)); const llvm::StructLayout *RecLayout = CGM.getTargetData().getStructLayout(cast(Ty)); @@ -4374,9 +4386,10 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm) SourceLocation(), SourceLocation(), &Ctx.Idents.get("_objc_super")); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0, - Ctx.getObjCIdType(), 0, 0, false, false)); + Ctx.getObjCIdType(), 0, 0, false, ICIS_NoInit)); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0, - Ctx.getObjCClassType(), 0, 0, false, false)); + Ctx.getObjCClassType(), 0, 0, false, + ICIS_NoInit)); RD->completeDefinition(); SuperCTy = Ctx.getTagDeclType(RD); @@ -4755,9 +4768,10 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul SourceLocation(), SourceLocation(), &Ctx.Idents.get("_message_ref_t")); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0, - Ctx.VoidPtrTy, 0, 0, false, false)); + Ctx.VoidPtrTy, 0, 0, false, ICIS_NoInit)); RD->addDecl(FieldDecl::Create(Ctx, RD, SourceLocation(), SourceLocation(), 0, - Ctx.getObjCSelType(), 0, 0, false, false)); + Ctx.getObjCSelType(), 0, 0, false, + ICIS_NoInit)); RD->completeDefinition(); MessageRefCTy = Ctx.getTagDeclType(RD); @@ -6367,7 +6381,18 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID, CodeGen::CGObjCRuntime * CodeGen::CreateMacObjCRuntime(CodeGen::CodeGenModule &CGM) { - if (CGM.getLangOpts().ObjCNonFragileABI) - return new CGObjCNonFragileABIMac(CGM); + switch (CGM.getLangOpts().ObjCRuntime.getKind()) { + case ObjCRuntime::FragileMacOSX: return new CGObjCMac(CGM); + + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: + return new CGObjCNonFragileABIMac(CGM); + + case ObjCRuntime::GNUstep: + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: + llvm_unreachable("these runtimes are not Mac runtimes"); + } + llvm_unreachable("bad runtime"); } diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp index 9370096..9aa6837 100644 --- a/lib/CodeGen/CGObjCRuntime.cpp +++ b/lib/CodeGen/CGObjCRuntime.cpp @@ -120,6 +120,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, uint64_t ContainingTypeAlign = CGF.CGM.getContext().getTargetInfo().getCharAlign(); uint64_t ContainingTypeSize = TypeSizeInBits - (FieldBitOffset - BitOffset); uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext()); + CharUnits ContainingTypeAlignCharUnits = + CGF.CGM.getContext().toCharUnitsFromBits(ContainingTypeAlign); // Allocate a new CGBitFieldInfo object to describe this access. // @@ -132,7 +134,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF, ContainingTypeSize, ContainingTypeAlign)); return LValue::MakeBitfield(V, *Info, - IvarTy.withCVRQualifiers(CVRQualifiers)); + IvarTy.withCVRQualifiers(CVRQualifiers), + ContainingTypeAlignCharUnits); } namespace { @@ -334,7 +337,7 @@ void CGObjCRuntime::EmitAtSynchronizedStmt(CodeGenFunction &CGF, /// /// \param method - may be null /// \param resultType - the result type to use if there's no method -/// \param argInfo - the actual arguments, including implicit ones +/// \param callArgs - the actual arguments, including implicit ones CGObjCRuntime::MessageSendInfo CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, QualType resultType, @@ -355,17 +358,17 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method, // Otherwise, there is. FunctionType::ExtInfo einfo = signature.getExtInfo(); const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFunctionCall(resultType, callArgs, einfo, - signature.getRequiredArgs()); + CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, einfo, + signature.getRequiredArgs()); return MessageSendInfo(argsInfo, signatureType); } // There's no method; just use a default CC. const CGFunctionInfo &argsInfo = - CGM.getTypes().arrangeFunctionCall(resultType, callArgs, - FunctionType::ExtInfo(), - RequiredArgs::All); + CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, + FunctionType::ExtInfo(), + RequiredArgs::All); // Derive the signature to call from that. llvm::PointerType *signatureType = diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h index ccf4d4d..219a3e4 100644 --- a/lib/CodeGen/CGObjCRuntime.h +++ b/lib/CodeGen/CGObjCRuntime.h @@ -91,20 +91,20 @@ protected: llvm::Value *Offset); /// Emits a try / catch statement. This function is intended to be called by /// subclasses, and provides a generic mechanism for generating these, which - /// should be usable by all runtimes. The caller must provide the functions to - /// call when entering and exiting a @catch() block, and the function used to - /// rethrow exceptions. If the begin and end catch functions are NULL, then - /// the function assumes that the EH personality function provides the - /// thrown object directly. + /// should be usable by all runtimes. The caller must provide the functions + /// to call when entering and exiting a \@catch() block, and the function + /// used to rethrow exceptions. If the begin and end catch functions are + /// NULL, then the function assumes that the EH personality function provides + /// the thrown object directly. void EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, llvm::Constant *beginCatchFn, llvm::Constant *endCatchFn, llvm::Constant *exceptionRethrowFn); - /// Emits an @synchronize() statement, using the syncEnterFn and syncExitFn - /// arguments as the functions called to lock and unlock the object. This - /// function can be called by subclasses that use zero-cost exception - /// handling. + /// Emits an \@synchronize() statement, using the \p syncEnterFn and + /// \p syncExitFn arguments as the functions called to lock and unlock + /// the object. This function can be called by subclasses that use + /// zero-cost exception handling. void EmitAtSynchronizedStmt(CodeGenFunction &CGF, const ObjCAtSynchronizedStmt &S, llvm::Function *syncEnterFn, @@ -179,7 +179,7 @@ public: const ObjCMethodDecl *Method = 0) = 0; /// Emit the code to return the named protocol as an object, as in a - /// @protocol expression. + /// \@protocol expression. virtual llvm::Value *GenerateProtocolRef(CGBuilderTy &Builder, const ObjCProtocolDecl *OPD) = 0; diff --git a/lib/CodeGen/CGRTTI.cpp b/lib/CodeGen/CGRTTI.cpp index 19973b4..d1b370a 100644 --- a/lib/CodeGen/CGRTTI.cpp +++ b/lib/CodeGen/CGRTTI.cpp @@ -985,7 +985,8 @@ llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, if (!ForEH && !getContext().getLangOpts().RTTI) return llvm::Constant::getNullValue(Int8PtrTy); - if (ForEH && Ty->isObjCObjectPointerType() && !LangOpts.NeXTRuntime) + if (ForEH && Ty->isObjCObjectPointerType() && + LangOpts.ObjCRuntime.isGNUFamily()) return ObjCRuntime->GetEHType(Ty); return RTTIBuilder(*this).BuildTypeInfo(Ty); diff --git a/lib/CodeGen/CGRecordLayout.h b/lib/CodeGen/CGRecordLayout.h index 25a0a50..94c822f 100644 --- a/lib/CodeGen/CGRecordLayout.h +++ b/lib/CodeGen/CGRecordLayout.h @@ -64,12 +64,7 @@ public: /// Bit width of the memory access to perform. unsigned AccessWidth; - /// The alignment of the memory access, or 0 if the default alignment should - /// be used. - // - // FIXME: Remove use of 0 to encode default, instead have IRgen do the right - // thing when it generates the code, if avoiding align directives is - // desired. + /// The alignment of the memory access, assuming the parent is aligned. CharUnits AccessAlignment; /// Offset for the target value. diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp index 1193e97..d642ef8 100644 --- a/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -235,6 +235,8 @@ CGBitFieldInfo CGBitFieldInfo::MakeInfo(CodeGenTypes &Types, uint64_t FieldSize, uint64_t ContainingTypeSizeInBits, unsigned ContainingTypeAlign) { + assert(ContainingTypeAlign && "Expected alignment to be specified"); + llvm::Type *Ty = Types.ConvertTypeForMem(FD->getType()); CharUnits TypeSizeInBytes = CharUnits::fromQuantity(Types.getTargetData().getTypeAllocSize(Ty)); @@ -714,14 +716,18 @@ CGRecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD, } // Otherwise, add a vtable / vf-table if the layout says to do so. - } else if (Types.getContext().getTargetInfo().getCXXABI() == CXXABI_Microsoft - ? Layout.getVFPtrOffset() != CharUnits::fromQuantity(-1) - : RD->isDynamicClass()) { + } else if (Layout.hasOwnVFPtr()) { llvm::Type *FunctionType = llvm::FunctionType::get(llvm::Type::getInt32Ty(Types.getLLVMContext()), /*isVarArg=*/true); llvm::Type *VTableTy = FunctionType->getPointerTo(); - + + if (getTypeAlignment(VTableTy) > Alignment) { + // FIXME: Should we allow this to happen in Sema? + assert(!Packed && "Alignment is wrong even with packed struct!"); + return false; + } + assert(NextFieldOffset.isZero() && "VTable pointer must come first!"); AppendField(CharUnits::Zero(), VTableTy->getPointerTo()); @@ -814,7 +820,7 @@ bool CGRecordLayoutBuilder::LayoutFields(const RecordDecl *D) { if (IsMsStruct) { // Zero-length bitfields following non-bitfield members are // ignored: - const FieldDecl *FD = (*Field); + const FieldDecl *FD = *Field; if (Types.getContext().ZeroBitfieldFollowsNonBitfield(FD, LastFD)) { --FieldNo; continue; diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index a1d0789..467c779 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -133,6 +133,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { case Stmt::SwitchStmtClass: EmitSwitchStmt(cast(*S)); break; case Stmt::AsmStmtClass: EmitAsmStmt(cast(*S)); break; + case Stmt::MSAsmStmtClass: EmitMSAsmStmt(cast(*S)); break; case Stmt::ObjCAtTryStmtClass: EmitObjCAtTryStmt(cast(*S)); @@ -155,7 +156,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S) { case Stmt::ObjCAutoreleasePoolStmtClass: EmitObjCAutoreleasePoolStmt(cast(*S)); break; - + case Stmt::CXXTryStmtClass: EmitCXXTryStmt(cast(*S)); break; @@ -360,15 +361,14 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) { llvm::Value *V = Builder.CreateBitCast(EmitScalarExpr(S.getTarget()), Int8PtrTy, "addr"); llvm::BasicBlock *CurBB = Builder.GetInsertBlock(); - // Get the basic block for the indirect goto. llvm::BasicBlock *IndGotoBB = GetIndirectGotoBlock(); - + // The first instruction in the block has to be the PHI for the switch dest, // add an entry for this branch. cast(IndGotoBB->begin())->addIncoming(V, CurBB); - + EmitBranch(IndGotoBB); } @@ -462,12 +462,12 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { if (S.getConditionVariable()) EmitAutoVarDecl(*S.getConditionVariable()); - + // Evaluate the conditional in the while header. C99 6.8.5.1: The // evaluation of the controlling expression takes place before each // execution of the loop body. llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); - + // while(1) is common, avoid extra exit blocks. Be sure // to correctly handle break/continue though. bool EmitBoolCondBranch = true; @@ -489,7 +489,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { EmitBranchThroughCleanup(LoopExit); } } - + // Emit the loop body. We have to emit this in a cleanup scope // because it might be a singleton DeclStmt. { @@ -584,7 +584,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // Create a cleanup scope for the condition variable cleanups. RunCleanupsScope ConditionScope(*this); - + llvm::Value *BoolCondVal = 0; if (S.getCond()) { // If the for statement has a condition scope, emit the local variable @@ -598,7 +598,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S) { // create a block to stage a loop exit along. if (ForScope.requiresCleanups()) ExitBlock = createBasicBlock("for.cond.cleanup"); - + // As long as the condition is true, iterate the loop. llvm::BasicBlock *ForBody = createBasicBlock("for.body"); @@ -679,7 +679,7 @@ void CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S) { llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); if (ForScope.requiresCleanups()) ExitBlock = createBasicBlock("for.cond.cleanup"); - + // The loop body, consisting of the specified body and the loop variable. llvm::BasicBlock *ForBody = createBasicBlock("for.body"); @@ -750,7 +750,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { // Apply the named return value optimization for this return statement, // which means doing nothing: the appropriate result has already been // constructed into the NRVO variable. - + // If there is an NRVO flag for this variable, set it to 1 into indicate // that the cleanup code should not destroy the variable. if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()]) @@ -901,7 +901,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { // try to not emit an empty block. if ((CGM.getCodeGenOpts().OptimizationLevel > 0) && isa(S.getSubStmt())) { JumpDest Block = BreakContinueStack.back().BreakBlock; - + // Only do this optimization if there are no cleanups that need emitting. if (isObviouslyBranchWithoutCleanups(Block)) { SwitchInsn->addCase(CaseVal, Block.getBlock()); @@ -915,7 +915,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { return; } } - + EmitBlock(createBasicBlock("sw.bb")); llvm::BasicBlock *CaseDest = Builder.GetInsertBlock(); SwitchInsn->addCase(CaseVal, CaseDest); @@ -984,7 +984,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // If this is a null statement, just succeed. if (S == 0) return Case ? CSFC_Success : CSFC_FallThrough; - + // If this is the switchcase (case 4: or default) that we're looking for, then // we're in business. Just add the substatement. if (const SwitchCase *SC = dyn_cast(S)) { @@ -993,7 +993,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, return CollectStatementsForCase(SC->getSubStmt(), 0, FoundCase, ResultStmts); } - + // Otherwise, this is some other case or default statement, just ignore it. return CollectStatementsForCase(SC->getSubStmt(), Case, FoundCase, ResultStmts); @@ -1003,7 +1003,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // return a success! if (Case == 0 && isa(S)) return CSFC_Success; - + // If this is a switch statement, then it might contain the SwitchCase, the // break, or neither. if (const CompoundStmt *CS = dyn_cast(S)) { @@ -1015,12 +1015,12 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // using the declaration even if it is skipped, so we can't optimize out // the decl if the kept statements might refer to it. bool HadSkippedDecl = false; - + // If we're looking for the case, just see if we can skip each of the // substatements. for (; Case && I != E; ++I) { HadSkippedDecl |= isa(*I); - + switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) { case CSFC_Failure: return CSFC_Failure; case CSFC_Success: @@ -1033,7 +1033,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, // optimization. if (HadSkippedDecl) return CSFC_Failure; - + for (++I; I != E; ++I) if (CodeGenFunction::ContainsLabel(*I, true)) return CSFC_Failure; @@ -1047,7 +1047,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, assert(FoundCase && "Didn't find case but returned fallthrough?"); // We recursively found Case, so we're not looking for it anymore. Case = 0; - + // If we found the case and skipped declarations, we can't do the // optimization. if (HadSkippedDecl) @@ -1074,9 +1074,9 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, if (CodeGenFunction::ContainsLabel(*I, true)) return CSFC_Failure; return CSFC_Success; - } + } } - + return Case ? CSFC_Success : CSFC_FallThrough; } @@ -1088,11 +1088,11 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, return CSFC_Failure; return CSFC_Success; } - + // Otherwise, we want to include this statement. Everything is cool with that // so long as it doesn't contain a break out of the switch we're in. if (CodeGenFunction::containsBreak(S)) return CSFC_Failure; - + // Otherwise, everything is great. Include the statement and tell the caller // that we fall through and include the next statement as well. ResultStmts.push_back(S); @@ -1104,14 +1104,14 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S, /// for a switch on constant. See the comment above CollectStatementsForCase /// for more details. static bool FindCaseStatementsForValue(const SwitchStmt &S, - const llvm::APInt &ConstantCondValue, + const llvm::APSInt &ConstantCondValue, SmallVectorImpl &ResultStmts, ASTContext &C) { // First step, find the switch case that is being branched to. We can do this // efficiently by scanning the SwitchCase list. const SwitchCase *Case = S.getSwitchCaseList(); const DefaultStmt *DefaultCase = 0; - + for (; Case; Case = Case->getNextSwitchCase()) { // It's either a default or case. Just remember the default statement in // case we're not jumping to any numbered cases. @@ -1119,17 +1119,17 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S, DefaultCase = DS; continue; } - + // Check to see if this case is the one we're looking for. const CaseStmt *CS = cast(Case); // Don't handle case ranges yet. if (CS->getRHS()) return false; - + // If we found our case, remember it as 'case'. if (CS->getLHS()->EvaluateKnownConstInt(C) == ConstantCondValue) break; } - + // If we didn't find a matching case, we use a default if it exists, or we // elide the whole switch body! if (Case == 0) { @@ -1168,7 +1168,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // See if we can constant fold the condition of the switch and therefore only // emit the live case statement (if any) of the switch. - llvm::APInt ConstantCondValue; + llvm::APSInt ConstantCondValue; if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) { SmallVector CaseStmts; if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts, @@ -1192,7 +1192,7 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { return; } } - + llvm::Value *CondV = EmitScalarExpr(S.getCond()); // Create basic block to hold stuff that comes after switch @@ -1380,7 +1380,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, if (!StrVal.empty()) { const SourceManager &SM = CGF.CGM.getContext().getSourceManager(); const LangOptions &LangOpts = CGF.CGM.getLangOpts(); - + // Add the location of the start of each subsequent line of the asm to the // MDNode. for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) { @@ -1390,8 +1390,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, Locs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())); } - } - + } + return llvm::MDNode::get(CGF.getLLVMContext(), Locs); } @@ -1441,7 +1441,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::vector ResultRegQualTys; std::vector ResultRegTypes; std::vector ResultTruncRegTypes; - std::vector ArgTypes; + std::vector ArgTypes; std::vector Args; // Keep track of inout constraints. @@ -1656,7 +1656,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // the expression, do the conversion. if (ResultRegTypes[i] != ResultTruncRegTypes[i]) { llvm::Type *TruncTy = ResultTruncRegTypes[i]; - + // Truncate the integer result to the right size, note that TruncTy can be // a pointer. if (TruncTy->isFloatingPointTy()) @@ -1681,3 +1681,25 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]); } } + +void CodeGenFunction::EmitMSAsmStmt(const MSAsmStmt &S) { + // MS-style inline assembly is not fully supported, so sema emits a warning. + if (!CGM.getCodeGenOpts().EmitMicrosoftInlineAsm) + return; + + assert (S.isSimple() && "CodeGen can only handle simple MSAsmStmts."); + + std::vector Args; + std::vector ArgTypes; + + std::string MachineClobbers = Target.getClobbers(); + + llvm::FunctionType *FTy = + llvm::FunctionType::get(VoidTy, ArgTypes, false); + + llvm::InlineAsm *IA = + llvm::InlineAsm::get(FTy, *S.getAsmString(), MachineClobbers, true); + llvm::CallInst *Result = Builder.CreateCall(IA, Args); + Result->addAttribute(~0, llvm::Attribute::NoUnwind); + Result->addAttribute(~0, llvm::Attribute::IANSDialect); +} diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp index 17a0537..cdaa26a 100644 --- a/lib/CodeGen/CGVTables.cpp +++ b/lib/CodeGen/CGVTables.cpp @@ -355,13 +355,14 @@ void CodeGenFunction::GenerateThunk(llvm::Function *Fn, llvm::Value *Callee = CGM.GetAddrOfFunction(GD, Ty, /*ForVTable=*/true); #ifndef NDEBUG - const CGFunctionInfo &CallFnInfo = - CGM.getTypes().arrangeFunctionCall(ResultType, CallArgs, FPT->getExtInfo(), + const CGFunctionInfo &CallFnInfo = + CGM.getTypes().arrangeCXXMethodCall(CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1)); assert(CallFnInfo.getRegParm() == FnInfo.getRegParm() && CallFnInfo.isNoReturn() == FnInfo.isNoReturn() && CallFnInfo.getCallingConvention() == FnInfo.getCallingConvention()); - assert(similar(CallFnInfo.getReturnInfo(), CallFnInfo.getReturnType(), + assert(isa(MD) || // ignore dtor return types + similar(CallFnInfo.getReturnInfo(), CallFnInfo.getReturnType(), FnInfo.getReturnInfo(), FnInfo.getReturnType())); assert(CallFnInfo.arg_size() == FnInfo.arg_size()); for (unsigned i = 0, e = FnInfo.arg_size(); i != e; ++i) @@ -386,6 +387,9 @@ void CodeGenFunction::GenerateThunk(llvm::Function *Fn, if (!ResultType->isVoidType() && Slot.isNull()) CGM.getCXXABI().EmitReturnFromThunk(*this, RV, ResultType); + // Disable the final ARC autorelease. + AutoreleaseResult = false; + FinishFunction(); // Set the right linkage. @@ -569,14 +573,13 @@ CodeGenVTables::CreateVTableInitializer(const CXXRecordDecl *RD, if (cast(GD.getDecl())->isPure()) { // We have a pure virtual member function. if (!PureVirtualFn) { - llvm::FunctionType *Ty = - llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); - PureVirtualFn = - CGM.CreateRuntimeFunction(Ty, "__cxa_pure_virtual"); - PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn, - Int8PtrTy); + llvm::FunctionType *Ty = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + StringRef PureCallName = CGM.getCXXABI().GetPureVirtualCallName(); + PureVirtualFn = CGM.CreateRuntimeFunction(Ty, PureCallName); + PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn, + CGM.Int8PtrTy); } - Init = PureVirtualFn; } else { // Check if we should use a thunk. diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index ac704e7..a46f313 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -153,7 +153,7 @@ class LValue { private: void Initialize(QualType Type, Qualifiers Quals, - CharUnits Alignment = CharUnits(), + CharUnits Alignment, llvm::MDNode *TBAAInfo = 0) { this->Type = Type; this->Quals = Quals; @@ -295,12 +295,12 @@ public: /// access. static LValue MakeBitfield(llvm::Value *BaseValue, const CGBitFieldInfo &Info, - QualType type) { + QualType type, CharUnits Alignment) { LValue R; R.LVType = BitField; R.V = BaseValue; R.BitFieldInfo = &Info; - R.Initialize(type, type.getQualifiers()); + R.Initialize(type, type.getQualifiers(), Alignment); return R; } @@ -389,7 +389,8 @@ public: return AV; } - static AggValueSlot forLValue(LValue LV, IsDestructed_t isDestructed, + static AggValueSlot forLValue(const LValue &LV, + IsDestructed_t isDestructed, NeedsGCBarriers_t needsGC, IsAliased_t isAliased, IsZeroed_t isZeroed = IsNotZeroed) { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7b1dbce..76be85f 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -8,8 +8,6 @@ set(LLVM_LINK_COMPONENTS vectorize ) -set(LLVM_USED_LIBS clangBasic clangAST clangFrontend) - add_clang_library(clangCodeGen BackendUtil.cpp CGBlocks.cpp @@ -52,5 +50,19 @@ add_clang_library(clangCodeGen TargetInfo.cpp ) -add_dependencies(clangCodeGen ClangAttrClasses ClangAttrList ClangDeclNodes - ClangStmtNodes) +add_dependencies(clangCodeGen + ClangARMNeon + ClangAttrClasses + ClangAttrList + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticCommon + ClangDiagnosticFrontend + ClangStmtNodes + ) + +target_link_libraries(clangCodeGen + clangBasic + clangAST + clangFrontend + ) diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp index 2939062..1d02861 100644 --- a/lib/CodeGen/CodeGenFunction.cpp +++ b/lib/CodeGen/CodeGenFunction.cpp @@ -23,12 +23,12 @@ #include "clang/AST/StmtCXX.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/Intrinsics.h" -#include "llvm/Support/MDBuilder.h" +#include "llvm/MDBuilder.h" #include "llvm/Target/TargetData.h" using namespace clang; using namespace CodeGen; -CodeGenFunction::CodeGenFunction(CodeGenModule &cgm) +CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) : CodeGenTypeCache(cgm), CGM(cgm), Target(CGM.getContext().getTargetInfo()), Builder(cgm.getModule().getContext()), @@ -42,7 +42,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm) TerminateHandler(0), TrapBB(0) { CatchUndefined = getContext().getLangOpts().CatchUndefined; - CGM.getCXXABI().getMangleContext().startNewFunction(); + if (!suppressNewContext) + CGM.getCXXABI().getMangleContext().startNewFunction(); } CodeGenFunction::~CodeGenFunction() { @@ -251,6 +252,81 @@ void CodeGenFunction::EmitMCountInstrumentation() { Builder.CreateCall(MCountFn); } +// OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument +// information in the program executable. The argument information stored +// includes the argument name, its type, the address and access qualifiers used. +// FIXME: Add type, address, and access qualifiers. +static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn, + CodeGenModule &CGM,llvm::LLVMContext &Context, + llvm::SmallVector &kernelMDArgs) { + + // Create MDNodes that represents the kernel arg metadata. + // Each MDNode is a list in the form of "key", N number of values which is + // the same number of values as their are kernel arguments. + + // MDNode for the kernel argument names. + SmallVector argNames; + argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name")); + + for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { + const ParmVarDecl *parm = FD->getParamDecl(i); + + // Get argument name. + argNames.push_back(llvm::MDString::get(Context, parm->getName())); + + } + // Add MDNode to the list of all metadata. + kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames)); +} + +void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, + llvm::Function *Fn) +{ + if (!FD->hasAttr()) + return; + + llvm::LLVMContext &Context = getLLVMContext(); + + llvm::SmallVector kernelMDArgs; + kernelMDArgs.push_back(Fn); + + if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata) + GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs); + + if (FD->hasAttr()) { + llvm::SmallVector attrMDArgs; + attrMDArgs.push_back(llvm::MDString::get(Context, "work_group_size_hint")); + WorkGroupSizeHintAttr *attr = FD->getAttr(); + llvm::Type *iTy = llvm::IntegerType::get(Context, 32); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getXDim()))); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getYDim()))); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getZDim()))); + kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs)); + } + + if (FD->hasAttr()) { + llvm::SmallVector attrMDArgs; + attrMDArgs.push_back(llvm::MDString::get(Context, "reqd_work_group_size")); + ReqdWorkGroupSizeAttr *attr = FD->getAttr(); + llvm::Type *iTy = llvm::IntegerType::get(Context, 32); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getXDim()))); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getYDim()))); + attrMDArgs.push_back(llvm::ConstantInt::get(iTy, + llvm::APInt(32, (uint64_t)attr->getZDim()))); + kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs)); + } + + llvm::MDNode *kernelMDNode = llvm::MDNode::get(Context, kernelMDArgs); + llvm::NamedMDNode *OpenCLKernelMetadata = + CGM.getModule().getOrInsertNamedMetadata("opencl.kernels"); + OpenCLKernelMetadata->addOperand(kernelMDNode); +} + void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, @@ -279,14 +355,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getContext().getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null(D)) - if (FD->hasAttr()) { - llvm::LLVMContext &Context = getLLVMContext(); - llvm::NamedMDNode *OpenCLMetadata = - CGM.getModule().getOrInsertNamedMetadata("opencl.kernels"); - - llvm::Value *Op = Fn; - OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Op)); - } + EmitOpenCLKernelMetadata(FD, Fn); } llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn); @@ -537,7 +606,7 @@ bool CodeGenFunction::containsBreak(const Stmt *S) { /// constant folds return true and set the boolean result in Result. bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, bool &ResultBool) { - llvm::APInt ResultInt; + llvm::APSInt ResultInt; if (!ConstantFoldsToSimpleInteger(Cond, ResultInt)) return false; @@ -549,7 +618,7 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the folded value. bool CodeGenFunction:: -ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APInt &ResultInt) { +ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) { // FIXME: Rename and handle conversion of other evaluatable things // to bool. llvm::APSInt Int; @@ -687,10 +756,10 @@ void CodeGenFunction::ErrorUnsupported(const Stmt *S, const char *Type, /// emitNonZeroVLAInit - Emit the "zero" initialization of a /// variable-length array whose elements have a non-zero bit-pattern. /// +/// \param baseType the inner-most element type of the array /// \param src - a char* pointing to the bit-pattern for a single /// base element of the array /// \param sizeInChars - the total size of the VLA, in chars -/// \param align - the total alignment of the VLA static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, llvm::Value *dest, llvm::Value *src, llvm::Value *sizeInChars) { @@ -881,33 +950,49 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, llvm::ConstantInt *zero = Builder.getInt32(0); gepIndices.push_back(zero); - // It's more efficient to calculate the count from the LLVM - // constant-length arrays than to re-evaluate the array bounds. uint64_t countFromCLAs = 1; + QualType eltType; llvm::ArrayType *llvmArrayType = - cast( + dyn_cast( cast(addr->getType())->getElementType()); - while (true) { + while (llvmArrayType) { assert(isa(arrayType)); assert(cast(arrayType)->getSize().getZExtValue() == llvmArrayType->getNumElements()); gepIndices.push_back(zero); countFromCLAs *= llvmArrayType->getNumElements(); + eltType = arrayType->getElementType(); llvmArrayType = dyn_cast(llvmArrayType->getElementType()); - if (!llvmArrayType) break; - arrayType = getContext().getAsArrayType(arrayType->getElementType()); - assert(arrayType && "LLVM and Clang types are out-of-synch"); + assert((!llvmArrayType || arrayType) && + "LLVM and Clang types are out-of-synch"); } - baseType = arrayType->getElementType(); + if (arrayType) { + // From this point onwards, the Clang array type has been emitted + // as some other type (probably a packed struct). Compute the array + // size, and just emit the 'begin' expression as a bitcast. + while (arrayType) { + countFromCLAs *= + cast(arrayType)->getSize().getZExtValue(); + eltType = arrayType->getElementType(); + arrayType = getContext().getAsArrayType(eltType); + } + + unsigned AddressSpace = + cast(addr->getType())->getAddressSpace(); + llvm::Type *BaseType = ConvertType(eltType)->getPointerTo(AddressSpace); + addr = Builder.CreateBitCast(addr, BaseType, "array.begin"); + } else { + // Create the actual GEP. + addr = Builder.CreateInBoundsGEP(addr, gepIndices, "array.begin"); + } - // Create the actual GEP. - addr = Builder.CreateInBoundsGEP(addr, gepIndices, "array.begin"); + baseType = eltType; llvm::Value *numElements = llvm::ConstantInt::get(SizeTy, countFromCLAs); @@ -1071,7 +1156,8 @@ void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E, llvm::Constant *Init) { assert (Init && "Invalid DeclRefExpr initializer!"); if (CGDebugInfo *Dbg = getDebugInfo()) - Dbg->EmitGlobalVariable(E->getDecl(), Init); + if (CGM.getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) + Dbg->EmitGlobalVariable(E->getDecl(), Init); } CodeGenFunction::PeepholeProtection diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 83f1e2d..ed3e43b 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -591,6 +591,11 @@ public: /// we prefer to insert allocas. llvm::AssertingVH AllocaInsertPt; + /// BoundsChecking - Emit run-time bounds checks. Higher values mean + /// potentially higher performance penalties. + unsigned char BoundsChecking; + + /// CatchUndefined - Emit run-time checks to catch undefined behaviors. bool CatchUndefined; /// In ARC, whether we should autorelease the return value. @@ -1192,8 +1197,18 @@ private: llvm::BasicBlock *TerminateHandler; llvm::BasicBlock *TrapBB; + /// Add a kernel metadata node to the named metadata node 'opencl.kernels'. + /// In the kernel metadata node, reference the kernel function and metadata + /// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2): + /// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string + /// "work_group_size_hint", and three 32-bit integers X, Y and Z. + /// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string + /// "reqd_work_group_size", and three 32-bit integers X, Y and Z. + void EmitOpenCLKernelMetadata(const FunctionDecl *FD, + llvm::Function *Fn); + public: - CodeGenFunction(CodeGenModule &cgm); + CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); CodeGenTypes &getTypes() const { return CGM.getTypes(); } @@ -1305,6 +1320,7 @@ public: const ObjCPropertyImplDecl *PID); void generateObjCGetterBody(const ObjCImplementationDecl *classImpl, const ObjCPropertyImplDecl *propImpl, + const ObjCMethodDecl *GetterMothodDecl, llvm::Constant *AtomicHelperFn); void GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP, @@ -1560,6 +1576,7 @@ public: return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T)); } + LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { CharUnits Alignment; if (!T->isIncompleteType()) @@ -1616,8 +1633,8 @@ public: /// /// \param IgnoreResult - True if the resulting value isn't used. RValue EmitAnyExpr(const Expr *E, - AggValueSlot AggSlot = AggValueSlot::ignored(), - bool IgnoreResult = false); + AggValueSlot aggSlot = AggValueSlot::ignored(), + bool ignoreResult = false); // EmitVAListRef - Emit a "reference" to a va_list; this is either the address // or the value of the expression, depending on how va_list is defined. @@ -1643,7 +1660,7 @@ public: /// volatile. void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr, QualType EltTy, bool isVolatile=false, - unsigned Alignment = 0); + CharUnits Alignment = CharUnits::Zero()); /// StartBlock - Start new block named N. If insert block is a dummy block /// then reuse it. @@ -1964,6 +1981,7 @@ public: void EmitCaseStmt(const CaseStmt &S); void EmitCaseStmtRange(const CaseStmt &S); void EmitAsmStmt(const AsmStmt &S); + void EmitMSAsmStmt(const MSAsmStmt &S); void EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S); void EmitObjCAtTryStmt(const ObjCAtTryStmt &S); @@ -2099,6 +2117,7 @@ public: LValue EmitMemberExpr(const MemberExpr *E); LValue EmitObjCIsaExpr(const ObjCIsaExpr *E); LValue EmitCompoundLiteralLValue(const CompoundLiteralExpr *E); + LValue EmitInitListLValue(const InitListExpr *E); LValue EmitConditionalOperatorLValue(const AbstractConditionalOperator *E); LValue EmitCastLValue(const CastExpr *E); LValue EmitNullInitializationLValue(const CXXScalarValueInitExpr *E); @@ -2143,9 +2162,6 @@ public: llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); - LValue EmitLValueForAnonRecordField(llvm::Value* Base, - const IndirectFieldDecl* Field, - unsigned CVRQualifiers); LValue EmitLValueForField(LValue Base, const FieldDecl* Field); /// EmitLValueForFieldInitialization - Like EmitLValueForField, except that @@ -2158,9 +2174,6 @@ public: llvm::Value* Base, const ObjCIvarDecl *Ivar, unsigned CVRQualifiers); - LValue EmitLValueForBitfield(llvm::Value* Base, const FieldDecl* Field, - unsigned CVRQualifiers); - LValue EmitCXXConstructLValue(const CXXConstructExpr *E); LValue EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E); LValue EmitLambdaLValue(const LambdaExpr *E); @@ -2259,12 +2272,11 @@ public: llvm::Value *BuildVector(ArrayRef Ops); llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); - llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E); llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E); - llvm::Value *EmitObjCNumericLiteral(const ObjCNumericLiteral *E); + llvm::Value *EmitObjCBoxedExpr(const ObjCBoxedExpr *E); llvm::Value *EmitObjCArrayLiteral(const ObjCArrayLiteral *E); llvm::Value *EmitObjCDictionaryLiteral(const ObjCDictionaryLiteral *E); llvm::Value *EmitObjCCollectionLiteral(const Expr *E, @@ -2359,7 +2371,7 @@ public: /// EmitAggExpr - Emit the computation of the specified expression /// of aggregate type. The result is computed into the given slot, /// which may be null to indicate that the value is not needed. - void EmitAggExpr(const Expr *E, AggValueSlot AS, bool IgnoreResult = false); + void EmitAggExpr(const Expr *E, AggValueSlot AS); /// EmitAggExprToLValue - Emit the computation of the specified expression of /// aggregate type into a temporary LValue. @@ -2411,10 +2423,9 @@ public: void EmitCXXGlobalVarDeclInit(const VarDecl &D, llvm::Constant *DeclPtr, bool PerformInit); - /// EmitCXXGlobalDtorRegistration - Emits a call to register the global ptr - /// with the C++ runtime so that its destructor will be called at exit. - void EmitCXXGlobalDtorRegistration(llvm::Constant *DtorFn, - llvm::Constant *DeclPtr); + /// Call atexit() with a function that passes the given argument to + /// the given function. + void registerGlobalDtorWithAtExit(llvm::Constant *fn, llvm::Constant *addr); /// Emit code in this function to perform a guarded variable /// initialization. Guarded initializations are used when it's not @@ -2497,7 +2508,7 @@ public: /// ConstantFoldsToSimpleInteger - If the specified expression does not fold /// to a constant, or if it does but contains a label, return false. If it /// constant folds return true and set the folded value. - bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APInt &Result); + bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result); /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an /// if statement) to the specified blocks. Based on the condition, this might diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 9a55c08..3ae3c52 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -102,14 +102,16 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, if (LangOpts.CUDA) createCUDARuntime(); - // Enable TBAA unless it's suppressed. - if (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0) - TBAA = new CodeGenTBAA(Context, VMContext, getLangOpts(), + // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. + if (LangOpts.ThreadSanitizer || + (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) + TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(), ABI.getMangleContext()); // If debug info or coverage generation is enabled, create the CGDebugInfo // object. - if (CodeGenOpts.DebugInfo || CodeGenOpts.EmitGcovArcs || + if (CodeGenOpts.DebugInfo != CodeGenOptions::NoDebugInfo || + CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes) DebugInfo = new CGDebugInfo(*this); @@ -133,10 +135,22 @@ CodeGenModule::~CodeGenModule() { } void CodeGenModule::createObjCRuntime() { - if (!LangOpts.NeXTRuntime) + // This is just isGNUFamily(), but we want to force implementors of + // new ABIs to decide how best to do this. + switch (LangOpts.ObjCRuntime.getKind()) { + case ObjCRuntime::GNUstep: + case ObjCRuntime::GCC: + case ObjCRuntime::ObjFW: ObjCRuntime = CreateGNUObjCRuntime(*this); - else + return; + + case ObjCRuntime::FragileMacOSX: + case ObjCRuntime::MacOSX: + case ObjCRuntime::iOS: ObjCRuntime = CreateMacObjCRuntime(*this); + return; + } + llvm_unreachable("bad runtime kind"); } void CodeGenModule::createOpenCLRuntime() { @@ -245,6 +259,45 @@ void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, GV->setVisibility(GetLLVMVisibility(LV.visibility())); } +static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { + return llvm::StringSwitch(S) + .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel) + .Case("local-dynamic", llvm::GlobalVariable::LocalDynamicTLSModel) + .Case("initial-exec", llvm::GlobalVariable::InitialExecTLSModel) + .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel); +} + +static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel( + CodeGenOptions::TLSModel M) { + switch (M) { + case CodeGenOptions::GeneralDynamicTLSModel: + return llvm::GlobalVariable::GeneralDynamicTLSModel; + case CodeGenOptions::LocalDynamicTLSModel: + return llvm::GlobalVariable::LocalDynamicTLSModel; + case CodeGenOptions::InitialExecTLSModel: + return llvm::GlobalVariable::InitialExecTLSModel; + case CodeGenOptions::LocalExecTLSModel: + return llvm::GlobalVariable::LocalExecTLSModel; + } + llvm_unreachable("Invalid TLS model!"); +} + +void CodeGenModule::setTLSMode(llvm::GlobalVariable *GV, + const VarDecl &D) const { + assert(D.isThreadSpecified() && "setting TLS mode on non-TLS var!"); + + llvm::GlobalVariable::ThreadLocalMode TLM; + TLM = GetLLVMTLSModel(CodeGenOpts.DefaultTLSModel); + + // Override the TLS model if it is explicitly specified. + if (D.hasAttr()) { + const TLSModelAttr *Attr = D.getAttr(); + TLM = GetLLVMTLSModel(Attr->getModel()); + } + + GV->setThreadLocalMode(TLM); +} + /// Set the symbol visibility of type information (vtable and RTTI) /// associated with the given type. void CodeGenModule::setTypeVisibility(llvm::GlobalValue *GV, @@ -334,7 +387,8 @@ StringRef CodeGenModule::getMangledName(GlobalDecl GD) { else if (const CXXDestructorDecl *D = dyn_cast(ND)) getCXXABI().getMangleContext().mangleCXXDtor(D, GD.getDtorType(), Out); else if (const BlockDecl *BD = dyn_cast(ND)) - getCXXABI().getMangleContext().mangleBlock(BD, Out); + getCXXABI().getMangleContext().mangleBlock(BD, Out, + dyn_cast_or_null(initializedGlobalDecl.getDecl())); else getCXXABI().getMangleContext().mangleName(ND, Out); @@ -355,7 +409,8 @@ void CodeGenModule::getBlockMangledName(GlobalDecl GD, MangleBuffer &Buffer, const Decl *D = GD.getDecl(); llvm::raw_svector_ostream Out(Buffer.getBuffer()); if (D == 0) - MangleCtx.mangleGlobalBlock(BD, Out); + MangleCtx.mangleGlobalBlock(BD, + dyn_cast_or_null(initializedGlobalDecl.getDecl()), Out); else if (const CXXConstructorDecl *CD = dyn_cast(D)) MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out); else if (const CXXDestructorDecl *DD = dyn_cast(D)) @@ -474,8 +529,7 @@ void CodeGenModule::SetLLVMFunctionAttributes(const Decl *D, unsigned CallingConv; AttributeListType AttributeList; ConstructAttributeList(Info, D, AttributeList, CallingConv); - F->setAttributes(llvm::AttrListPtr::get(AttributeList.begin(), - AttributeList.size())); + F->setAttributes(llvm::AttrListPtr::get(AttributeList)); F->setCallingConv(static_cast(CallingConv)); } @@ -493,7 +547,7 @@ static bool hasUnwindExceptions(const LangOptions &LangOpts) { // If ObjC exceptions are enabled, this depends on the ABI. if (LangOpts.ObjCExceptions) { - if (!LangOpts.ObjCNonFragileABI) return false; + return LangOpts.ObjCRuntime.hasUnwindExceptions(); } return true; @@ -517,10 +571,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, F->addFnAttr(llvm::Attribute::NoInline); // (noinline wins over always_inline, and we can't specify both in IR) - if (D->hasAttr() && + if ((D->hasAttr() || D->hasAttr()) && !F->hasFnAttr(llvm::Attribute::NoInline)) F->addFnAttr(llvm::Attribute::AlwaysInline); + // FIXME: Communicate hot and cold attributes to LLVM more directly. + if (D->hasAttr()) + F->addFnAttr(llvm::Attribute::OptimizeForSize); + if (isa(D) || isa(D)) F->setUnnamedAddr(true); @@ -652,7 +710,7 @@ void CodeGenModule::EmitDeferred() { if (!DeferredVTables.empty()) { const CXXRecordDecl *RD = DeferredVTables.back(); DeferredVTables.pop_back(); - getVTables().GenerateClassData(getVTableLinkage(RD), RD); + getCXXABI().EmitVTables(RD); continue; } @@ -930,7 +988,7 @@ CodeGenModule::shouldEmitFunction(const FunctionDecl *F) { if (getFunctionLinkage(F) != llvm::Function::AvailableExternallyLinkage) return true; if (CodeGenOpts.OptimizationLevel == 0 && - !F->hasAttr()) + !F->hasAttr() && !F->hasAttr()) return false; // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, @@ -1054,6 +1112,7 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName, } else if (getLangOpts().CPlusPlus && D.getDecl()) { // Look for a declaration that's lexically in a record. const FunctionDecl *FD = cast(D.getDecl()); + FD = FD->getMostRecentDecl(); do { if (isa(FD->getLexicalDeclContext())) { if (FD->isImplicit() && !ForVTable) { @@ -1166,11 +1225,12 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, DeferredDecls.erase(DDI); } + unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace()); llvm::GlobalVariable *GV = new llvm::GlobalVariable(getModule(), Ty->getElementType(), false, llvm::GlobalValue::ExternalLinkage, 0, MangledName, 0, - false, Ty->getAddressSpace()); + llvm::GlobalVariable::NotThreadLocal, AddrSpace); // Handle things which are present even on external declarations. if (D) { @@ -1193,10 +1253,14 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, GV->setVisibility(GetLLVMVisibility(LV.visibility())); } - GV->setThreadLocal(D->isThreadSpecified()); + if (D->isThreadSpecified()) + setTLSMode(GV, *D); } - return GV; + if (AddrSpace != Ty->getAddressSpace()) + return llvm::ConstantExpr::getBitCast(GV, Ty); + else + return GV; } @@ -1286,7 +1350,7 @@ void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { void CodeGenModule::EmitVTable(CXXRecordDecl *Class, bool DefinitionRequired) { if (DefinitionRequired) - getVTables().GenerateClassData(getVTableLinkage(Class), Class); + getCXXABI().EmitVTables(Class); } llvm::GlobalVariable::LinkageTypes @@ -1481,6 +1545,20 @@ CodeGenModule::MaybeEmitGlobalStdInitializerListInitializer(const VarDecl *D, return llvmInit; } +unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D, + unsigned AddrSpace) { + if (LangOpts.CUDA && CodeGenOpts.CUDAIsDevice) { + if (D->hasAttr()) + AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant); + else if (D->hasAttr()) + AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_shared); + else + AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_device); + } + + return AddrSpace; +} + void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { llvm::Constant *Init = 0; QualType ASTTy = D->getType(); @@ -1511,8 +1589,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { // FIXME: It does so in a global constructor, which is *not* what we // want. - if (!Init) + if (!Init) { + initializedGlobalDecl = GlobalDecl(D); Init = EmitConstantInit(*InitDecl); + } if (!Init) { QualType T = InitExpr->getType(); if (D->getType()->isReferenceType()) @@ -1560,7 +1640,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { if (GV == 0 || GV->getType()->getElementType() != InitType || GV->getType()->getAddressSpace() != - getContext().getTargetAddressSpace(ASTTy)) { + GetGlobalVarAddressSpace(D, getContext().getTargetAddressSpace(ASTTy))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); @@ -1604,7 +1684,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) { // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) - DI->EmitGlobalVariable(GV, D); + if (getCodeGenOpts().DebugInfo >= CodeGenOptions::LimitedDebugInfo) + DI->EmitGlobalVariable(GV, D); } llvm::GlobalValue::LinkageTypes @@ -1710,8 +1791,7 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, ArgList.clear(); if (!NewCall->getType()->isVoidTy()) NewCall->takeName(CI); - NewCall->setAttributes(llvm::AttrListPtr::get(AttrVec.begin(), - AttrVec.end())); + NewCall->setAttributes(llvm::AttrListPtr::get(AttrVec)); NewCall->setCallingConv(CI->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. @@ -2059,7 +2139,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) { std::string StringClass(getLangOpts().ObjCConstantStringClass); llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); llvm::Constant *GV; - if (LangOpts.ObjCNonFragileABI) { + if (LangOpts.ObjCRuntime.isNonFragile()) { std::string str = StringClass.empty() ? "OBJC_CLASS_$_NSConstantString" : "OBJC_CLASS_$_" + StringClass; @@ -2104,7 +2184,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) { FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); D->addDecl(Field); } @@ -2147,7 +2227,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) { "_unnamed_nsstring_"); // FIXME. Fix section. if (const char *Sect = - LangOpts.ObjCNonFragileABI + LangOpts.ObjCRuntime.isNonFragile() ? getContext().getTargetInfo().getNSStringNonFragileABISection() : getContext().getTargetInfo().getNSStringSection()) GV->setSection(Sect); @@ -2179,7 +2259,7 @@ QualType CodeGenModule::getObjCFastEnumerationStateType() { FieldTypes[i], /*TInfo=*/0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + ICIS_NoInit); Field->setAccess(AS_public); D->addDecl(Field); } @@ -2506,14 +2586,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { // Forward declarations, no (immediate) code generation. case Decl::ObjCInterface: + case Decl::ObjCCategory: break; - - case Decl::ObjCCategory: { - ObjCCategoryDecl *CD = cast(D); - if (CD->IsClassExtension() && CD->hasSynthBitfield()) - Context.ResetObjCLayout(CD->getClassInterface()); - break; - } case Decl::ObjCProtocol: { ObjCProtocolDecl *Proto = cast(D); @@ -2530,8 +2604,6 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { case Decl::ObjCImplementation: { ObjCImplementationDecl *OMD = cast(D); - if (LangOpts.ObjCNonFragileABI2 && OMD->hasSynthBitfield()) - Context.ResetObjCLayout(OMD->getClassInterface()); EmitObjCPropertyImplementations(OMD); EmitObjCIvarInitializations(OMD); ObjCRuntime->GenerateClass(OMD); @@ -2564,7 +2636,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { const std::string &S = getModule().getModuleInlineAsm(); if (S.empty()) getModule().setModuleInlineAsm(AsmString); - else if (*--S.end() == '\n') + else if (S.end()[-1] == '\n') getModule().setModuleInlineAsm(S + AsmString.str()); else getModule().setModuleInlineAsm(S + '\n' + AsmString.str()); diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 38f5008..d6ff50d 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -138,6 +138,7 @@ namespace CodeGen { union { unsigned char PointerAlignInBytes; unsigned char PointerSizeInBytes; + unsigned char SizeSizeInBytes; // sizeof(size_t) }; }; @@ -350,6 +351,8 @@ class CodeGenModule : public CodeGenTypeCache { struct { int GlobalUniqueCount; } Block; + + GlobalDecl initializedGlobalDecl; /// @} public: @@ -471,6 +474,10 @@ public: /// GlobalValue. void setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const; + /// setTLSMode - Set the TLS mode for the given LLVM GlobalVariable + /// for the thread-local variable declaration D. + void setTLSMode(llvm::GlobalVariable *GV, const VarDecl &D) const; + /// TypeVisibilityKind - The kind of global variable that is passed to /// setTypeVisibility enum TypeVisibilityKind { @@ -516,6 +523,12 @@ public: CreateOrReplaceCXXRuntimeVariable(StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage); + /// GetGlobalVarAddressSpace - Return the address space of the underlying + /// global variable for D, as determined by its declaration. Normally this + /// is the same as the address space of D's type, but in CUDA, address spaces + /// are associated with declarations, not types. + unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace); + /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be greated with the specified type instead of whatever the @@ -580,7 +593,7 @@ public: /// getUniqueBlockCount - Fetches the global unique block count. int getUniqueBlockCount() { return ++Block.GlobalUniqueCount; } - + /// getBlockDescriptorType - Fetches the type of a generic block /// descriptor. llvm::Type *getBlockDescriptorType(); diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp index a3cadcf..bab60af 100644 --- a/lib/CodeGen/CodeGenTBAA.cpp +++ b/lib/CodeGen/CodeGenTBAA.cpp @@ -18,6 +18,7 @@ #include "CodeGenTBAA.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Mangle.h" +#include "clang/Frontend/CodeGenOptions.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Constants.h" @@ -26,8 +27,9 @@ using namespace clang; using namespace CodeGen; CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext& VMContext, + const CodeGenOptions &CGO, const LangOptions &Features, MangleContext &MContext) - : Context(Ctx), VMContext(VMContext), Features(Features), MContext(MContext), + : Context(Ctx), CodeGenOpts(CGO), Features(Features), MContext(MContext), MDHelper(VMContext), Root(0), Char(0) { } @@ -74,6 +76,10 @@ static bool TypeHasMayAlias(QualType QTy) { llvm::MDNode * CodeGenTBAA::getTBAAInfo(QualType QTy) { + // At -O0 TBAA is not emitted for regular types. + if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) + return NULL; + // If the type has the may_alias attribute (even on a typedef), it is // effectively in the general char alias class. if (TypeHasMayAlias(QTy)) diff --git a/lib/CodeGen/CodeGenTBAA.h b/lib/CodeGen/CodeGenTBAA.h index 4a97852..c17a5cf 100644 --- a/lib/CodeGen/CodeGenTBAA.h +++ b/lib/CodeGen/CodeGenTBAA.h @@ -16,8 +16,8 @@ #define CLANG_CODEGEN_CODEGENTBAA_H #include "clang/Basic/LLVM.h" +#include "llvm/MDBuilder.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/MDBuilder.h" namespace llvm { class LLVMContext; @@ -26,6 +26,7 @@ namespace llvm { namespace clang { class ASTContext; + class CodeGenOptions; class LangOptions; class MangleContext; class QualType; @@ -38,7 +39,7 @@ namespace CodeGen { /// while lowering AST types to LLVM types. class CodeGenTBAA { ASTContext &Context; - llvm::LLVMContext& VMContext; + const CodeGenOptions &CodeGenOpts; const LangOptions &Features; MangleContext &MContext; @@ -61,6 +62,7 @@ class CodeGenTBAA { public: CodeGenTBAA(ASTContext &Ctx, llvm::LLVMContext &VMContext, + const CodeGenOptions &CGO, const LangOptions &Features, MangleContext &MContext); ~CodeGenTBAA(); diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp index 41fd536..9a78dae 100644 --- a/lib/CodeGen/CodeGenTypes.cpp +++ b/lib/CodeGen/CodeGenTypes.cpp @@ -474,11 +474,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { // build it. const CGFunctionInfo *FI; if (const FunctionProtoType *FPT = dyn_cast(FT)) { - FI = &arrangeFunctionType( + FI = &arrangeFreeFunctionType( CanQual::CreateUnsafe(QualType(FPT, 0))); } else { const FunctionNoProtoType *FNPT = cast(FT); - FI = &arrangeFunctionType( + FI = &arrangeFreeFunctionType( CanQual::CreateUnsafe(QualType(FNPT, 0))); } diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h index ba2b3ae..3c29d2d 100644 --- a/lib/CodeGen/CodeGenTypes.h +++ b/lib/CodeGen/CodeGenTypes.h @@ -189,26 +189,32 @@ public: const CGFunctionInfo &arrangeCXXDestructor(const CXXDestructorDecl *D, CXXDtorType Type); - const CGFunctionInfo &arrangeFunctionCall(const CallArgList &Args, - const FunctionType *Ty); - const CGFunctionInfo &arrangeFunctionCall(QualType ResTy, - const CallArgList &args, - const FunctionType::ExtInfo &info, - RequiredArgs required); - - const CGFunctionInfo &arrangeFunctionType(CanQual Ty); - const CGFunctionInfo &arrangeFunctionType(CanQual Ty); + const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args, + const FunctionType *Ty); + const CGFunctionInfo &arrangeFreeFunctionCall(QualType ResTy, + const CallArgList &args, + FunctionType::ExtInfo info, + RequiredArgs required); + + const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args, + const FunctionProtoType *type, + RequiredArgs required); + + const CGFunctionInfo &arrangeFreeFunctionType(CanQual Ty); + const CGFunctionInfo &arrangeFreeFunctionType(CanQual Ty); const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD, const FunctionProtoType *FTP); - /// Retrieves the ABI information for the given function signature. - /// This is the "core" routine to which all the others defer. + /// "Arrange" the LLVM information for a call or type with the given + /// signature. This is largely an internal method; other clients + /// should use one of the above routines, which ultimately defer to + /// this. /// /// \param argTypes - must all actually be canonical as params - const CGFunctionInfo &arrangeFunctionType(CanQualType returnType, - ArrayRef argTypes, - const FunctionType::ExtInfo &info, - RequiredArgs args); + const CGFunctionInfo &arrangeLLVMFunctionInfo(CanQualType returnType, + ArrayRef argTypes, + FunctionType::ExtInfo info, + RequiredArgs args); /// \brief Compute a new LLVM record layout object for the given record. CGRecordLayout *ComputeRecordLayout(const RecordDecl *D, diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index 98f67f3..0b7ce36 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -20,6 +20,7 @@ #include "CGCXXABI.h" #include "CGRecordLayout.h" +#include "CGVTables.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include @@ -48,10 +49,6 @@ protected: return PtrDiffTy; } - bool NeedsArrayCookie(const CXXNewExpr *expr); - bool NeedsArrayCookie(const CXXDeleteExpr *expr, - QualType elementType); - public: ItaniumCXXABI(CodeGen::CodeGenModule &CGM, bool IsARM = false) : CGCXXABI(CGM), PtrDiffTy(0), IsARM(IsARM) { } @@ -111,19 +108,24 @@ public: void EmitInstanceFunctionProlog(CodeGenFunction &CGF); - CharUnits GetArrayCookieSize(const CXXNewExpr *expr); + StringRef GetPureVirtualCallName() { return "__cxa_pure_virtual"; } + + CharUnits getArrayCookieSizeImpl(QualType elementType); llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF, llvm::Value *NewPtr, llvm::Value *NumElements, const CXXNewExpr *expr, QualType ElementType); - void ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *Ptr, - const CXXDeleteExpr *expr, - QualType ElementType, llvm::Value *&NumElements, - llvm::Value *&AllocPtr, CharUnits &CookieSize); + llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *allocPtr, + CharUnits cookieSize); void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); + void registerGlobalDtor(CodeGenFunction &CGF, llvm::Constant *dtor, + llvm::Constant *addr); + + void EmitVTables(const CXXRecordDecl *Class); }; class ARMCXXABI : public ItaniumCXXABI { @@ -148,16 +150,14 @@ public: void EmitReturnFromThunk(CodeGenFunction &CGF, RValue RV, QualType ResTy); - CharUnits GetArrayCookieSize(const CXXNewExpr *expr); + CharUnits getArrayCookieSizeImpl(QualType elementType); llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF, llvm::Value *NewPtr, llvm::Value *NumElements, const CXXNewExpr *expr, QualType ElementType); - void ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *Ptr, - const CXXDeleteExpr *expr, - QualType ElementType, llvm::Value *&NumElements, - llvm::Value *&AllocPtr, CharUnits &CookieSize); + llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, llvm::Value *allocPtr, + CharUnits cookieSize); private: /// \brief Returns true if the given instance method is one of the @@ -796,54 +796,11 @@ void ARMCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF, /************************** Array allocation cookies **************************/ -bool ItaniumCXXABI::NeedsArrayCookie(const CXXNewExpr *expr) { - // If the class's usual deallocation function takes two arguments, - // it needs a cookie. - if (expr->doesUsualArrayDeleteWantSize()) - return true; - - // Automatic Reference Counting: - // We need an array cookie for pointers with strong or weak lifetime. - QualType AllocatedType = expr->getAllocatedType(); - if (getContext().getLangOpts().ObjCAutoRefCount && - AllocatedType->isObjCLifetimeType()) { - switch (AllocatedType.getObjCLifetime()) { - case Qualifiers::OCL_None: - case Qualifiers::OCL_ExplicitNone: - case Qualifiers::OCL_Autoreleasing: - return false; - - case Qualifiers::OCL_Strong: - case Qualifiers::OCL_Weak: - return true; - } - } - - // Otherwise, if the class has a non-trivial destructor, it always - // needs a cookie. - const CXXRecordDecl *record = - AllocatedType->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); - return (record && !record->hasTrivialDestructor()); -} - -bool ItaniumCXXABI::NeedsArrayCookie(const CXXDeleteExpr *expr, - QualType elementType) { - // If the class's usual deallocation function takes two arguments, - // it needs a cookie. - if (expr->doesUsualArrayDeleteWantSize()) - return true; - - return elementType.isDestructedType(); -} - -CharUnits ItaniumCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) { - if (!NeedsArrayCookie(expr)) - return CharUnits::Zero(); - - // Padding is the maximum of sizeof(size_t) and alignof(elementType) - ASTContext &Ctx = getContext(); - return std::max(Ctx.getTypeSizeInChars(Ctx.getSizeType()), - Ctx.getTypeAlignInChars(expr->getAllocatedType())); +CharUnits ItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) { + // The array cookie is a size_t; pad that up to the element alignment. + // The cookie is actually right-justified in that space. + return std::max(CharUnits::fromQuantity(CGM.SizeSizeInBytes), + CGM.getContext().getTypeAlignInChars(elementType)); } llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, @@ -851,7 +808,7 @@ llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, llvm::Value *NumElements, const CXXNewExpr *expr, QualType ElementType) { - assert(NeedsArrayCookie(expr)); + assert(requiresArrayCookie(expr)); unsigned AS = cast(NewPtr->getType())->getAddressSpace(); @@ -862,6 +819,7 @@ llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, // The size of the cookie. CharUnits CookieSize = std::max(SizeSize, Ctx.getTypeAlignInChars(ElementType)); + assert(CookieSize == getArrayCookieSizeImpl(ElementType)); // Compute an offset to the cookie. llvm::Value *CookiePtr = NewPtr; @@ -882,53 +840,25 @@ llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CookieSize.getQuantity()); } -void ItaniumCXXABI::ReadArrayCookie(CodeGenFunction &CGF, - llvm::Value *Ptr, - const CXXDeleteExpr *expr, - QualType ElementType, - llvm::Value *&NumElements, - llvm::Value *&AllocPtr, - CharUnits &CookieSize) { - // Derive a char* in the same address space as the pointer. - unsigned AS = cast(Ptr->getType())->getAddressSpace(); - llvm::Type *CharPtrTy = CGF.Builder.getInt8Ty()->getPointerTo(AS); - - // If we don't need an array cookie, bail out early. - if (!NeedsArrayCookie(expr, ElementType)) { - AllocPtr = CGF.Builder.CreateBitCast(Ptr, CharPtrTy); - NumElements = 0; - CookieSize = CharUnits::Zero(); - return; - } - - QualType SizeTy = getContext().getSizeType(); - CharUnits SizeSize = getContext().getTypeSizeInChars(SizeTy); - llvm::Type *SizeLTy = CGF.ConvertType(SizeTy); - - CookieSize - = std::max(SizeSize, getContext().getTypeAlignInChars(ElementType)); - - CharUnits NumElementsOffset = CookieSize - SizeSize; - - // Compute the allocated pointer. - AllocPtr = CGF.Builder.CreateBitCast(Ptr, CharPtrTy); - AllocPtr = CGF.Builder.CreateConstInBoundsGEP1_64(AllocPtr, - -CookieSize.getQuantity()); - - llvm::Value *NumElementsPtr = AllocPtr; - if (!NumElementsOffset.isZero()) - NumElementsPtr = - CGF.Builder.CreateConstInBoundsGEP1_64(NumElementsPtr, - NumElementsOffset.getQuantity()); - NumElementsPtr = - CGF.Builder.CreateBitCast(NumElementsPtr, SizeLTy->getPointerTo(AS)); - NumElements = CGF.Builder.CreateLoad(NumElementsPtr); +llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *allocPtr, + CharUnits cookieSize) { + // The element size is right-justified in the cookie. + llvm::Value *numElementsPtr = allocPtr; + CharUnits numElementsOffset = + cookieSize - CharUnits::fromQuantity(CGF.SizeSizeInBytes); + if (!numElementsOffset.isZero()) + numElementsPtr = + CGF.Builder.CreateConstInBoundsGEP1_64(numElementsPtr, + numElementsOffset.getQuantity()); + + unsigned AS = cast(allocPtr->getType())->getAddressSpace(); + numElementsPtr = + CGF.Builder.CreateBitCast(numElementsPtr, CGF.SizeTy->getPointerTo(AS)); + return CGF.Builder.CreateLoad(numElementsPtr); } -CharUnits ARMCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) { - if (!NeedsArrayCookie(expr)) - return CharUnits::Zero(); - +CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) { // On ARM, the cookie is always: // struct array_cookie { // std::size_t element_size; // element_size != 0 @@ -936,7 +866,7 @@ CharUnits ARMCXXABI::GetArrayCookieSize(const CXXNewExpr *expr) { // }; // TODO: what should we do if the allocated type actually wants // greater alignment? - return getContext().getTypeSizeInChars(getContext().getSizeType()) * 2; + return CharUnits::fromQuantity(2 * CGM.SizeSizeInBytes); } llvm::Value *ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, @@ -944,7 +874,7 @@ llvm::Value *ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, llvm::Value *NumElements, const CXXNewExpr *expr, QualType ElementType) { - assert(NeedsArrayCookie(expr)); + assert(requiresArrayCookie(expr)); // NewPtr is a char*. @@ -975,44 +905,18 @@ llvm::Value *ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, CookieSize.getQuantity()); } -void ARMCXXABI::ReadArrayCookie(CodeGenFunction &CGF, - llvm::Value *Ptr, - const CXXDeleteExpr *expr, - QualType ElementType, - llvm::Value *&NumElements, - llvm::Value *&AllocPtr, - CharUnits &CookieSize) { - // Derive a char* in the same address space as the pointer. - unsigned AS = cast(Ptr->getType())->getAddressSpace(); - llvm::Type *CharPtrTy = CGF.Builder.getInt8Ty()->getPointerTo(AS); - - // If we don't need an array cookie, bail out early. - if (!NeedsArrayCookie(expr, ElementType)) { - AllocPtr = CGF.Builder.CreateBitCast(Ptr, CharPtrTy); - NumElements = 0; - CookieSize = CharUnits::Zero(); - return; - } - - QualType SizeTy = getContext().getSizeType(); - CharUnits SizeSize = getContext().getTypeSizeInChars(SizeTy); - llvm::Type *SizeLTy = CGF.ConvertType(SizeTy); - - // The cookie size is always 2 * sizeof(size_t). - CookieSize = 2 * SizeSize; - - // The allocated pointer is the input ptr, minus that amount. - AllocPtr = CGF.Builder.CreateBitCast(Ptr, CharPtrTy); - AllocPtr = CGF.Builder.CreateConstInBoundsGEP1_64(AllocPtr, - -CookieSize.getQuantity()); - - // The number of elements is at offset sizeof(size_t) relative to that. - llvm::Value *NumElementsPtr - = CGF.Builder.CreateConstInBoundsGEP1_64(AllocPtr, - SizeSize.getQuantity()); - NumElementsPtr = - CGF.Builder.CreateBitCast(NumElementsPtr, SizeLTy->getPointerTo(AS)); - NumElements = CGF.Builder.CreateLoad(NumElementsPtr); +llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *allocPtr, + CharUnits cookieSize) { + // The number of elements is at offset sizeof(size_t) relative to + // the allocated pointer. + llvm::Value *numElementsPtr + = CGF.Builder.CreateConstInBoundsGEP1_64(allocPtr, CGF.SizeSizeInBytes); + + unsigned AS = cast(allocPtr->getType())->getAddressSpace(); + numElementsPtr = + CGF.Builder.CreateBitCast(numElementsPtr, CGF.SizeTy->getPointerTo(AS)); + return CGF.Builder.CreateLoad(numElementsPtr); } /*********************** Static local initialization **************************/ @@ -1200,3 +1104,60 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, CGF.EmitBlock(EndBlock); } + +/// Register a global destructor using __cxa_atexit. +static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF, + llvm::Constant *dtor, + llvm::Constant *addr) { + // We're assuming that the destructor function is something we can + // reasonably call with the default CC. Go ahead and cast it to the + // right prototype. + llvm::Type *dtorTy = + llvm::FunctionType::get(CGF.VoidTy, CGF.Int8PtrTy, false)->getPointerTo(); + + // extern "C" int __cxa_atexit(void (*f)(void *), void *p, void *d); + llvm::Type *paramTys[] = { dtorTy, CGF.Int8PtrTy, CGF.Int8PtrTy }; + llvm::FunctionType *atexitTy = + llvm::FunctionType::get(CGF.IntTy, paramTys, false); + + // Fetch the actual function. + llvm::Constant *atexit = + CGF.CGM.CreateRuntimeFunction(atexitTy, "__cxa_atexit"); + if (llvm::Function *fn = dyn_cast(atexit)) + fn->setDoesNotThrow(); + + // Create a variable that binds the atexit to this shared object. + llvm::Constant *handle = + CGF.CGM.CreateRuntimeVariable(CGF.Int8Ty, "__dso_handle"); + + llvm::Value *args[] = { + llvm::ConstantExpr::getBitCast(dtor, dtorTy), + llvm::ConstantExpr::getBitCast(addr, CGF.Int8PtrTy), + handle + }; + CGF.Builder.CreateCall(atexit, args)->setDoesNotThrow(); +} + +/// Register a global destructor as best as we know how. +void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF, + llvm::Constant *dtor, + llvm::Constant *addr) { + // Use __cxa_atexit if available. + if (CGM.getCodeGenOpts().CXAAtExit) { + return emitGlobalDtorWithCXAAtExit(CGF, dtor, addr); + } + + // In Apple kexts, we want to add a global destructor entry. + // FIXME: shouldn't this be guarded by some variable? + if (CGM.getContext().getLangOpts().AppleKext) { + // Generate a global destructor entry. + return CGM.AddCXXDtorEntry(dtor, addr); + } + + CGF.registerGlobalDtorWithAtExit(dtor, addr); +} + +/// Generate and emit virtual tables for the given class. +void ItaniumCXXABI::EmitVTables(const CXXRecordDecl *Class) { + CGM.getVTables().GenerateClassData(CGM.getVTableLinkage(Class), Class); +} diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 825e041..6a2925b 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -28,6 +28,8 @@ class MicrosoftCXXABI : public CGCXXABI { public: MicrosoftCXXABI(CodeGenModule &CGM) : CGCXXABI(CGM) {} + StringRef GetPureVirtualCallName() { return "_purecall"; } + void BuildConstructorSignature(const CXXConstructorDecl *Ctor, CXXCtorType Type, CanQualType &ResTy, @@ -56,6 +58,13 @@ public: // TODO: 'for base' flag } + void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, + llvm::GlobalVariable *DeclPtr, + bool PerformInit); + + void EmitVTables(const CXXRecordDecl *Class); + + // ==== Notes on array cookies ========= // // MSVC seems to only use cookies when the class has a destructor; a @@ -78,17 +87,92 @@ public: // delete[] p; // } // Whereas it prints "104" and "104" if you give A a destructor. - void ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *Ptr, - const CXXDeleteExpr *expr, - QualType ElementType, llvm::Value *&NumElements, - llvm::Value *&AllocPtr, CharUnits &CookieSize) { - CGF.CGM.ErrorUnsupported(expr, "don't know how to handle array cookies " - "in the Microsoft C++ ABI"); - } + + bool requiresArrayCookie(const CXXDeleteExpr *expr, QualType elementType); + bool requiresArrayCookie(const CXXNewExpr *expr); + CharUnits getArrayCookieSizeImpl(QualType type); + llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF, + llvm::Value *NewPtr, + llvm::Value *NumElements, + const CXXNewExpr *expr, + QualType ElementType); + llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *allocPtr, + CharUnits cookieSize); }; } +bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr, + QualType elementType) { + // Microsoft seems to completely ignore the possibility of a + // two-argument usual deallocation function. + return elementType.isDestructedType(); +} + +bool MicrosoftCXXABI::requiresArrayCookie(const CXXNewExpr *expr) { + // Microsoft seems to completely ignore the possibility of a + // two-argument usual deallocation function. + return expr->getAllocatedType().isDestructedType(); +} + +CharUnits MicrosoftCXXABI::getArrayCookieSizeImpl(QualType type) { + // The array cookie is always a size_t; we then pad that out to the + // alignment of the element type. + ASTContext &Ctx = getContext(); + return std::max(Ctx.getTypeSizeInChars(Ctx.getSizeType()), + Ctx.getTypeAlignInChars(type)); +} + +llvm::Value *MicrosoftCXXABI::readArrayCookieImpl(CodeGenFunction &CGF, + llvm::Value *allocPtr, + CharUnits cookieSize) { + unsigned AS = cast(allocPtr->getType())->getAddressSpace(); + llvm::Value *numElementsPtr = + CGF.Builder.CreateBitCast(allocPtr, CGF.SizeTy->getPointerTo(AS)); + return CGF.Builder.CreateLoad(numElementsPtr); +} + +llvm::Value* MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF, + llvm::Value *newPtr, + llvm::Value *numElements, + const CXXNewExpr *expr, + QualType elementType) { + assert(requiresArrayCookie(expr)); + + // The size of the cookie. + CharUnits cookieSize = getArrayCookieSizeImpl(elementType); + + // Compute an offset to the cookie. + llvm::Value *cookiePtr = newPtr; + + // Write the number of elements into the appropriate slot. + unsigned AS = cast(newPtr->getType())->getAddressSpace(); + llvm::Value *numElementsPtr + = CGF.Builder.CreateBitCast(cookiePtr, CGF.SizeTy->getPointerTo(AS)); + CGF.Builder.CreateStore(numElements, numElementsPtr); + + // Finally, compute a pointer to the actual data buffer by skipping + // over the cookie completely. + return CGF.Builder.CreateConstInBoundsGEP1_64(newPtr, + cookieSize.getQuantity()); +} + +void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, + llvm::GlobalVariable *DeclPtr, + bool PerformInit) { + // FIXME: this code was only tested for global initialization. + // Not sure whether we want thread-safe static local variables as VS + // doesn't make them thread-safe. + + // Emit the initializer and add a global destructor if appropriate. + CGF.EmitCXXGlobalVarDeclInit(D, DeclPtr, PerformInit); +} + +void MicrosoftCXXABI::EmitVTables(const CXXRecordDecl *Class) { + // FIXME: implement +} + CGCXXABI *clang::CodeGen::CreateMicrosoftCXXABI(CodeGenModule &CGM) { return new MicrosoftCXXABI(CGM); } diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 2b71fdd..9c23ed9 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -413,12 +413,18 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public ABIInfo { + enum Class { + Integer, + Float + }; + static const unsigned MinABIStackAlignInBytes = 4; bool IsDarwinVectorABI; bool IsSmallStructInRegABI; bool IsMMXDisabled; bool IsWin32FloatStructABI; + unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); @@ -434,33 +440,31 @@ class X86_32ABIInfo : public ABIInfo { /// \brief Return the alignment to use for the given type on the stack. unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; -public: - - ABIArgInfo classifyReturnType(QualType RetTy, + Class classify(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy, unsigned callingConvention) const; + ABIArgInfo classifyArgumentTypeWithReg(QualType RetTy, + unsigned &FreeRegs) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; - virtual void computeInfo(CGFunctionInfo &FI) const { - FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), - FI.getCallingConvention()); - for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); - it != ie; ++it) - it->info = classifyArgumentType(it->type); - } +public: + virtual void computeInfo(CGFunctionInfo &FI) const; virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const; - X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool m, bool w) + X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool m, bool w, + unsigned r) : ABIInfo(CGT), IsDarwinVectorABI(d), IsSmallStructInRegABI(p), - IsMMXDisabled(m), IsWin32FloatStructABI(w) {} + IsMMXDisabled(m), IsWin32FloatStructABI(w), + DefaultNumRegisterParameters(r) {} }; class X86_32TargetCodeGenInfo : public TargetCodeGenInfo { public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - bool d, bool p, bool m, bool w) - :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, m, w)) {} + bool d, bool p, bool m, bool w, unsigned r) + :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, m, w, r)) {} void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const; @@ -626,6 +630,10 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy, ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } +static bool isSSEVectorType(ASTContext &Context, QualType Ty) { + return Ty->getAs() && Context.getTypeSize(Ty) == 128; +} + static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { const RecordType *RT = Ty->getAs(); if (!RT) @@ -643,7 +651,7 @@ static bool isRecordWithSSEVectorType(ASTContext &Context, QualType Ty) { i != e; ++i) { QualType FT = i->getType(); - if (FT->getAs() && Context.getTypeSize(FT) == 128) + if (isSSEVectorType(Context, FT)) return true; if (isRecordWithSSEVectorType(Context, FT)) @@ -667,7 +675,8 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, } // Otherwise, if the type contains an SSE vector type, the alignment is 16. - if (Align >= 16 && isRecordWithSSEVectorType(getContext(), Ty)) + if (Align >= 16 && (isSSEVectorType(getContext(), Ty) || + isRecordWithSSEVectorType(getContext(), Ty))) return 16; return MinABIStackAlignInBytes; @@ -692,6 +701,57 @@ ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal) const { return ABIArgInfo::getIndirect(StackAlign); } +X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const { + const Type *T = isSingleElementStruct(Ty, getContext()); + if (!T) + T = Ty.getTypePtr(); + + if (const BuiltinType *BT = T->getAs()) { + BuiltinType::Kind K = BT->getKind(); + if (K == BuiltinType::Float || K == BuiltinType::Double) + return Float; + } + return Integer; +} + +ABIArgInfo +X86_32ABIInfo::classifyArgumentTypeWithReg(QualType Ty, + unsigned &FreeRegs) const { + // Common case first. + if (FreeRegs == 0) + return classifyArgumentType(Ty); + + Class C = classify(Ty); + if (C == Float) + return classifyArgumentType(Ty); + + unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; + if (SizeInRegs == 0) + return classifyArgumentType(Ty); + + if (SizeInRegs > FreeRegs) { + FreeRegs = 0; + return classifyArgumentType(Ty); + } + assert(SizeInRegs >= 1 && SizeInRegs <= 3); + FreeRegs -= SizeInRegs; + + // If it is a simple scalar, keep the type so that we produce a cleaner IR. + ABIArgInfo Foo = classifyArgumentType(Ty); + if (Foo.isDirect() && !Foo.getDirectOffset() && !Foo.getPaddingType()) + return ABIArgInfo::getDirectInReg(Foo.getCoerceToType()); + if (Foo.isExtend()) + return ABIArgInfo::getExtendInReg(Foo.getCoerceToType()); + + llvm::LLVMContext &LLVMContext = getVMContext(); + llvm::Type *Int32 = llvm::Type::getInt32Ty(LLVMContext); + SmallVector Elements; + for (unsigned I = 0; I < SizeInRegs; ++I) + Elements.push_back(Int32); + llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements); + return ABIArgInfo::getDirectInReg(Result); +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty) const { // FIXME: Set alignment on indirect arguments. if (isAggregateTypeForABI(Ty)) { @@ -753,6 +813,28 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty) const { ABIArgInfo::getExtend() : ABIArgInfo::getDirect()); } +void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { + FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), + FI.getCallingConvention()); + + unsigned FreeRegs = FI.getHasRegParm() ? FI.getRegParm() : + DefaultNumRegisterParameters; + + // If the return value is indirect, then the hidden argument is consuming one + // integer register. + if (FI.getReturnInfo().isIndirect() && FreeRegs) { + --FreeRegs; + ABIArgInfo &Old = FI.getReturnInfo(); + Old = ABIArgInfo::getIndirectInReg(Old.getIndirectAlign(), + Old.getIndirectByVal(), + Old.getIndirectRealign()); + } + + for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); + it != ie; ++it) + it->info = classifyArgumentTypeWithReg(it->type, FreeRegs); +} + llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const { llvm::Type *BPP = CGF.Int8PtrPtrTy; @@ -1345,7 +1427,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, // single eightbyte, each is classified separately. Each eightbyte gets // initialized to class NO_CLASS. Class FieldLo, FieldHi; - uint64_t Offset = OffsetBase + Layout.getBaseClassOffsetInBits(Base); + uint64_t Offset = + OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base)); classify(i->getType(), Offset, FieldLo, FieldHi); Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); @@ -1584,7 +1667,7 @@ static bool BitsContainNoUserData(QualType Ty, unsigned StartBit, cast(i->getType()->getAs()->getDecl()); // If the base is after the span we care about, ignore it. - unsigned BaseOffset = (unsigned)Layout.getBaseClassOffsetInBits(Base); + unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base)); if (BaseOffset >= EndBit) continue; unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0; @@ -2411,6 +2494,64 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, return false; } +// PowerPC-64 + +namespace { +class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo { +public: + PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {} + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { + // This is recovered from gcc output. + return 1; // r1 is the dedicated stack pointer + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const; +}; + +} + +bool +PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + // This is calculated from the LLVM and GCC tables and verified + // against gcc output. AFAIK all ABIs use the same encoding. + + CodeGen::CGBuilderTy &Builder = CGF.Builder; + + llvm::IntegerType *i8 = CGF.Int8Ty; + llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4); + llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8); + llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16); + + // 0-31: r0-31, the 8-byte general-purpose registers + AssignToArrayRange(Builder, Address, Eight8, 0, 31); + + // 32-63: fp0-31, the 8-byte floating-point registers + AssignToArrayRange(Builder, Address, Eight8, 32, 63); + + // 64-76 are various 4-byte special-purpose registers: + // 64: mq + // 65: lr + // 66: ctr + // 67: ap + // 68-75 cr0-7 + // 76: xer + AssignToArrayRange(Builder, Address, Four8, 64, 76); + + // 77-108: v0-31, the 16-byte vector registers + AssignToArrayRange(Builder, Address, Sixteen8, 77, 108); + + // 109: vrsave + // 110: vscr + // 111: spe_acc + // 112: spefscr + // 113: sfp + AssignToArrayRange(Builder, Address, Four8, 109, 113); + + return false; +} //===----------------------------------------------------------------------===// // ARM ABI Implementation @@ -2559,7 +2700,8 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base, // double, or 64-bit or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() != BuiltinType::Float && - BT->getKind() != BuiltinType::Double) + BT->getKind() != BuiltinType::Double && + BT->getKind() != BuiltinType::LongDouble) return false; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = Context.getTypeSize(VT); @@ -2615,19 +2757,23 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const { } } + // Support byval for ARM. + if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64) || + getContext().getTypeAlign(Ty) > 64) { + return ABIArgInfo::getIndirect(0, /*ByVal=*/true); + } + // Otherwise, pass by coercing to a structure of the appropriate size. - // - // FIXME: This is kind of nasty... but there isn't much choice because the ARM - // backend doesn't support byval. - // FIXME: This doesn't handle alignment > 64 bits. llvm::Type* ElemTy; unsigned SizeRegs; - if (getContext().getTypeAlign(Ty) > 32) { - ElemTy = llvm::Type::getInt64Ty(getVMContext()); - SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; - } else { + // FIXME: Try to match the types of the arguments more accurately where + // we can. + if (getContext().getTypeAlign(Ty) <= 32) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; + } else { + ElemTy = llvm::Type::getInt64Ty(getVMContext()); + SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; } llvm::Type *STy = @@ -2833,14 +2979,14 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, } //===----------------------------------------------------------------------===// -// PTX ABI Implementation +// NVPTX ABI Implementation //===----------------------------------------------------------------------===// namespace { -class PTXABIInfo : public ABIInfo { +class NVPTXABIInfo : public ABIInfo { public: - PTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; @@ -2850,16 +2996,16 @@ public: CodeGenFunction &CFG) const; }; -class PTXTargetCodeGenInfo : public TargetCodeGenInfo { +class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: - PTXTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new PTXABIInfo(CGT)) {} + NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const; }; -ABIArgInfo PTXABIInfo::classifyReturnType(QualType RetTy) const { +ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) @@ -2867,14 +3013,14 @@ ABIArgInfo PTXABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getDirect(); } -ABIArgInfo PTXABIInfo::classifyArgumentType(QualType Ty) const { +ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const { if (isAggregateTypeForABI(Ty)) return ABIArgInfo::getIndirect(0); return ABIArgInfo::getDirect(); } -void PTXABIInfo::computeInfo(CGFunctionInfo &FI) const { +void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const { FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it) @@ -2885,6 +3031,8 @@ void PTXABIInfo::computeInfo(CGFunctionInfo &FI) const { return; // Calling convention as default by an ABI. + // We're still using the PTX_Kernel/PTX_Device calling conventions here, + // but we should switch to NVVM metadata later on. llvm::CallingConv::ID DefaultCC; const LangOptions &LangOpts = getContext().getLangOpts(); if (LangOpts.OpenCL || LangOpts.CUDA) { @@ -2903,14 +3051,14 @@ void PTXABIInfo::computeInfo(CGFunctionInfo &FI) const { } -llvm::Value *PTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CFG) const { - llvm_unreachable("PTX does not support varargs"); +llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CFG) const { + llvm_unreachable("NVPTX does not support varargs"); } -void PTXTargetCodeGenInfo::SetTargetAttributes(const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const{ +void NVPTXTargetCodeGenInfo:: +SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const{ const FunctionDecl *FD = dyn_cast(D); if (!FD) return; @@ -3097,13 +3245,16 @@ void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D, namespace { class MipsABIInfo : public ABIInfo { bool IsO32; - unsigned MinABIStackAlignInBytes; - llvm::Type* HandleAggregates(QualType Ty) const; + unsigned MinABIStackAlignInBytes, StackAlignInBytes; + void CoerceToIntArgs(uint64_t TySize, + SmallVector &ArgList) const; + llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const; llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const; llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const; public: MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) : - ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8) {} + ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8), + StackAlignInBytes(IsO32 ? 8 : 16) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const; @@ -3132,36 +3283,56 @@ public: }; } +void MipsABIInfo::CoerceToIntArgs(uint64_t TySize, + SmallVector &ArgList) const { + llvm::IntegerType *IntTy = + llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); + + // Add (TySize / MinABIStackAlignInBytes) args of IntTy. + for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N) + ArgList.push_back(IntTy); + + // If necessary, add one more integer type to ArgList. + unsigned R = TySize % (MinABIStackAlignInBytes * 8); + + if (R) + ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); +} + // In N32/64, an aligned double precision floating point field is passed in // a register. -llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty) const { - if (IsO32) - return 0; +llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const { + SmallVector ArgList, IntArgList; + + if (IsO32) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } if (Ty->isComplexType()) return CGT.ConvertType(Ty); const RecordType *RT = Ty->getAs(); - // Unions are passed in integer registers. - if (!RT || !RT->isStructureOrClassType()) - return 0; + // Unions/vectors are passed in integer registers. + if (!RT || !RT->isStructureOrClassType()) { + CoerceToIntArgs(TySize, ArgList); + return llvm::StructType::get(getVMContext(), ArgList); + } const RecordDecl *RD = RT->getDecl(); const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); - uint64_t StructSize = getContext().getTypeSize(Ty); - assert(!(StructSize % 8) && "Size of structure must be multiple of 8."); + assert(!(TySize % 8) && "Size of structure must be multiple of 8."); uint64_t LastOffset = 0; unsigned idx = 0; llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64); - SmallVector ArgList; // Iterate over fields in the struct/class and check if there are any aligned // double fields. for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { - const QualType Ty = (*i)->getType(); + const QualType Ty = i->getType(); const BuiltinType *BT = Ty->getAs(); if (!BT || BT->getKind() != BuiltinType::Double) @@ -3180,43 +3351,33 @@ llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty) const { LastOffset = Offset + 64; } - // This struct/class doesn't have an aligned double field. - if (!LastOffset) - return 0; - - // Add ((StructSize - LastOffset) / 64) args of type i64. - for (unsigned N = (StructSize - LastOffset) / 64; N; --N) - ArgList.push_back(I64); - - // If the size of the remainder is not zero, add one more integer type to - // ArgList. - unsigned R = (StructSize - LastOffset) % 64; - if (R) - ArgList.push_back(llvm::IntegerType::get(getVMContext(), R)); + CoerceToIntArgs(TySize - LastOffset, IntArgList); + ArgList.append(IntArgList.begin(), IntArgList.end()); return llvm::StructType::get(getVMContext(), ArgList); } llvm::Type *MipsABIInfo::getPaddingType(uint64_t Align, uint64_t Offset) const { - // Padding is inserted only for N32/64. - if (IsO32) - return 0; + assert((Offset % MinABIStackAlignInBytes) == 0); + + if ((Align - 1) & Offset) + return llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8); - assert(Align <= 16 && "Alignment larger than 16 not handled."); - return (Align == 16 && Offset & 0xf) ? - llvm::IntegerType::get(getVMContext(), 64) : 0; + return 0; } ABIArgInfo MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { uint64_t OrigOffset = Offset; - uint64_t TySize = - llvm::RoundUpToAlignment(getContext().getTypeSize(Ty), 64) / 8; + uint64_t TySize = getContext().getTypeSize(Ty); uint64_t Align = getContext().getTypeAlign(Ty) / 8; - Offset = llvm::RoundUpToAlignment(Offset, std::max(Align, (uint64_t)8)); - Offset += TySize; - if (isAggregateTypeForABI(Ty)) { + Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes), + (uint64_t)StackAlignInBytes); + Offset = llvm::RoundUpToAlignment(Offset, Align); + Offset += llvm::RoundUpToAlignment(TySize, Align * 8) / 8; + + if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) { // Ignore empty aggregates. if (TySize == 0) return ABIArgInfo::getIgnore(); @@ -3224,20 +3385,15 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { // Records with non trivial destructors/constructors should not be passed // by value. if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) { - Offset = OrigOffset + 8; + Offset = OrigOffset + MinABIStackAlignInBytes; return ABIArgInfo::getIndirect(0, /*ByVal=*/false); } - // If we have reached here, aggregates are passed either indirectly via a - // byval pointer or directly by coercing to another structure type. In the - // latter case, padding is inserted if the offset of the aggregate is - // unaligned. - llvm::Type *ResType = HandleAggregates(Ty); - - if (!ResType) - return ABIArgInfo::getIndirect(0); - - return ABIArgInfo::getDirect(ResType, 0, getPaddingType(Align, OrigOffset)); + // If we have reached here, aggregates are passed directly by coercing to + // another structure type. Padding is inserted if the offset of the + // aggregate is unaligned. + return ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0, + getPaddingType(Align, OrigOffset)); } // Treat an enum type as its underlying type. @@ -3253,7 +3409,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const { llvm::Type* MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { const RecordType *RT = RetTy->getAs(); - SmallVector RTList; + SmallVector RTList; if (RT && RT->isStructureOrClassType()) { const RecordDecl *RD = RT->getDecl(); @@ -3272,12 +3428,12 @@ MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) { RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end(); for (; b != e; ++b) { - const BuiltinType *BT = (*b)->getType()->getAs(); + const BuiltinType *BT = b->getType()->getAs(); if (!BT || !BT->isFloatingPoint()) break; - RTList.push_back(CGT.ConvertType((*b)->getType())); + RTList.push_back(CGT.ConvertType(b->getType())); } if (b == e) @@ -3288,11 +3444,7 @@ MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const { } } - RTList.push_back(llvm::IntegerType::get(getVMContext(), - std::min(Size, (uint64_t)64))); - if (Size > 64) - RTList.push_back(llvm::IntegerType::get(getVMContext(), Size - 64)); - + CoerceToIntArgs(Size, RTList); return llvm::StructType::get(getVMContext(), RTList); } @@ -3302,11 +3454,15 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const { if (RetTy->isVoidType() || Size == 0) return ABIArgInfo::getIgnore(); - if (isAggregateTypeForABI(RetTy)) { + if (isAggregateTypeForABI(RetTy) || RetTy->isVectorType()) { if (Size <= 128) { if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(); + // O32 returns integer vectors in registers. + if (IsO32 && RetTy->isVectorType() && !RetTy->hasFloatingRepresentation()) + return ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); + if (!IsO32 && !isRecordWithNonTrivialDestructorOrCopyConstructor(RetTy)) return ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size)); } @@ -3327,7 +3483,7 @@ void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const { RetInfo = classifyReturnType(FI.getReturnType()); // Check if a pointer to an aggregate is passed as a hidden argument. - uint64_t Offset = RetInfo.isIndirect() ? 8 : 0; + uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0; for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); it != ie; ++it) @@ -3634,10 +3790,12 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::ppc: return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types)); + case llvm::Triple::ppc64: + return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types)); - case llvm::Triple::ptx32: - case llvm::Triple::ptx64: - return *(TheTargetCodeGenInfo = new PTXTargetCodeGenInfo(Types)); + case llvm::Triple::nvptx: + case llvm::Triple::nvptx64: + return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types)); case llvm::Triple::mblaze: return *(TheTargetCodeGenInfo = new MBlazeTargetCodeGenInfo(Types)); @@ -3653,8 +3811,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (Triple.isOSDarwin()) return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo( - Types, true, true, DisableMMX, false)); + new X86_32TargetCodeGenInfo(Types, true, true, DisableMMX, false, + CodeGenOpts.NumRegisterParameters)); switch (Triple.getOS()) { case llvm::Triple::Cygwin: @@ -3663,19 +3821,22 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::DragonFly: case llvm::Triple::FreeBSD: case llvm::Triple::OpenBSD: + case llvm::Triple::Bitrig: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo( - Types, false, true, DisableMMX, false)); + new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, + false, + CodeGenOpts.NumRegisterParameters)); case llvm::Triple::Win32: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo( - Types, false, true, DisableMMX, true)); + new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, true, + CodeGenOpts.NumRegisterParameters)); default: return *(TheTargetCodeGenInfo = - new X86_32TargetCodeGenInfo( - Types, false, false, DisableMMX, false)); + new X86_32TargetCodeGenInfo(Types, false, false, DisableMMX, + false, + CodeGenOpts.NumRegisterParameters)); } } diff --git a/lib/Driver/ArgList.cpp b/lib/Driver/ArgList.cpp index 55a0ddf..7fd439e 100644 --- a/lib/Driver/ArgList.cpp +++ b/lib/Driver/ArgList.cpp @@ -140,6 +140,68 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, return Res; } +Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, + OptSpecifier Id2, OptSpecifier Id3, + OptSpecifier Id4, OptSpecifier Id5) const { + Arg *Res = 0; + for (const_iterator it = begin(), ie = end(); it != ie; ++it) { + if ((*it)->getOption().matches(Id0) || + (*it)->getOption().matches(Id1) || + (*it)->getOption().matches(Id2) || + (*it)->getOption().matches(Id3) || + (*it)->getOption().matches(Id4) || + (*it)->getOption().matches(Id5)) { + Res = *it; + Res->claim(); + } + } + + return Res; +} + +Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, + OptSpecifier Id2, OptSpecifier Id3, + OptSpecifier Id4, OptSpecifier Id5, + OptSpecifier Id6) const { + Arg *Res = 0; + for (const_iterator it = begin(), ie = end(); it != ie; ++it) { + if ((*it)->getOption().matches(Id0) || + (*it)->getOption().matches(Id1) || + (*it)->getOption().matches(Id2) || + (*it)->getOption().matches(Id3) || + (*it)->getOption().matches(Id4) || + (*it)->getOption().matches(Id5) || + (*it)->getOption().matches(Id6)) { + Res = *it; + Res->claim(); + } + } + + return Res; +} + +Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, + OptSpecifier Id2, OptSpecifier Id3, + OptSpecifier Id4, OptSpecifier Id5, + OptSpecifier Id6, OptSpecifier Id7) const { + Arg *Res = 0; + for (const_iterator it = begin(), ie = end(); it != ie; ++it) { + if ((*it)->getOption().matches(Id0) || + (*it)->getOption().matches(Id1) || + (*it)->getOption().matches(Id2) || + (*it)->getOption().matches(Id3) || + (*it)->getOption().matches(Id4) || + (*it)->getOption().matches(Id5) || + (*it)->getOption().matches(Id6) || + (*it)->getOption().matches(Id7)) { + Res = *it; + Res->claim(); + } + } + + return Res; +} + bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { if (Arg *A = getLastArg(Pos, Neg)) return A->getOption().matches(Pos); diff --git a/lib/Driver/CC1Options.cpp b/lib/Driver/CC1Options.cpp deleted file mode 100644 index 884b363..0000000 --- a/lib/Driver/CC1Options.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===--- CC1Options.cpp - Clang CC1 Options Table -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "clang/Driver/CC1Options.h" -#include "clang/Driver/Option.h" -#include "clang/Driver/OptTable.h" -using namespace clang; -using namespace clang::driver; -using namespace clang::driver::options; -using namespace clang::driver::cc1options; - -static const OptTable::Info CC1InfoTable[] = { -#define OPTION(NAME, ID, KIND, GROUP, ALIAS, FLAGS, PARAM, \ - HELPTEXT, METAVAR) \ - { NAME, HELPTEXT, METAVAR, Option::KIND##Class, PARAM, FLAGS, \ - OPT_##GROUP, OPT_##ALIAS }, -#include "clang/Driver/CC1Options.inc" -}; - -namespace { - -class CC1OptTable : public OptTable { -public: - CC1OptTable() - : OptTable(CC1InfoTable, sizeof(CC1InfoTable) / sizeof(CC1InfoTable[0])) {} -}; - -} - -OptTable *clang::driver::createCC1OptTable() { - return new CC1OptTable(); -} diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt index a798e20..4ada7d9 100644 --- a/lib/Driver/CMakeLists.txt +++ b/lib/Driver/CMakeLists.txt @@ -1,10 +1,7 @@ -set(LLVM_USED_LIBS clangBasic clangAST clangParse) - add_clang_library(clangDriver Action.cpp Arg.cpp ArgList.cpp - CC1Options.cpp CC1AsOptions.cpp Compilation.cpp Driver.cpp @@ -21,13 +18,14 @@ add_clang_library(clangDriver Types.cpp ) -IF(MSVC) - get_target_property(NON_ANSI_COMPILE_FLAGS clangDriver COMPILE_FLAGS) - string(REPLACE /Za - "" NON_ANSI_COMPILE_FLAGS - ${NON_ANSI_COMPILE_FLAGS}) - set_target_properties(clangDriver PROPERTIES COMPILE_FLAGS ${NON_ANSI_COMPILE_FLAGS}) -ENDIF(MSVC) +add_dependencies(clangDriver + ClangAttrList + ClangCC1AsOptions + ClangDiagnosticCommon + ClangDiagnosticDriver + ClangDriverOptions + ) -add_dependencies(clangDriver ClangAttrList ClangDiagnosticDriver - ClangDriverOptions ClangCC1Options ClangCC1AsOptions) +target_link_libraries(clangDriver + clangBasic + ) diff --git a/lib/Driver/Compilation.cpp b/lib/Driver/Compilation.cpp index 5553fc9..c962fca 100644 --- a/lib/Driver/Compilation.cpp +++ b/lib/Driver/Compilation.cpp @@ -219,7 +219,7 @@ void Compilation::initCompilationForDiagnostics(void) { // to avoid emitting warnings about unused args. OptSpecifier OutputOpts[] = { options::OPT_o, options::OPT_MD, options::OPT_MMD }; - for (unsigned i = 0; i != sizeof(OutputOpts)/sizeof(OutputOpts[0]); ++i) { + for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) { if (TranslatedArgs->hasArg(OutputOpts[i])) TranslatedArgs->eraseArg(OutputOpts[i]); } diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp index 3ddac69..87d533d 100644 --- a/lib/Driver/Driver.cpp +++ b/lib/Driver/Driver.cpp @@ -59,7 +59,7 @@ Driver::Driver(StringRef ClangExecutable, CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true), CCCUseClang(true), CCCUseClangCXX(true), CCCUseClangCPP(true), - CCCUsePCH(true), SuppressMissingInputWarning(false) { + ForcedClangUse(false), CCCUsePCH(true), SuppressMissingInputWarning(false) { if (IsProduction) { // In a "production" build, only use clang on architectures we expect to // work. @@ -115,9 +115,10 @@ InputArgList *Driver::ParseArgStrings(ArrayRef ArgList) { } // Warn about -mcpu= without an argument. - if (A->getOption().matches(options::OPT_mcpu_EQ) && + if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) { - Diag(clang::diag::warn_drv_empty_joined_argument) << A->getAsString(*Args); + Diag(clang::diag::warn_drv_empty_joined_argument) << + A->getAsString(*Args); } } @@ -253,7 +254,7 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { if (char *env = ::getenv("COMPILER_PATH")) { StringRef CompilerPath = env; while (!CompilerPath.empty()) { - std::pair Split = CompilerPath.split(':'); + std::pair Split = CompilerPath.split(':'); PrefixDirs.push_back(Split.first); CompilerPath = Split.second; } @@ -376,24 +377,33 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { void Driver::generateCompilationDiagnostics(Compilation &C, const Command *FailingCommand) { if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics)) - return; + return; // Don't try to generate diagnostics for link jobs. - if (FailingCommand->getCreator().isLinkJob()) + if (FailingCommand && FailingCommand->getCreator().isLinkJob()) return; + // Print the version of the compiler. + PrintVersion(C, llvm::errs()); + Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Please submit a bug report to " BUG_REPORT_URL " and include command" - " line arguments and all diagnostic information."; + << "PLEASE submit a bug report to " BUG_REPORT_URL " and include the " + "crash backtrace, preprocessed source, and associated run script."; // Suppress driver output and emit preprocessor output to temp file. CCCIsCPP = true; CCGenDiagnostics = true; + C.getArgs().AddFlagArg(0, Opts->getOption(options::OPT_frewrite_includes)); // Save the original job command(s). std::string Cmd; llvm::raw_string_ostream OS(Cmd); - C.PrintJob(OS, C.getJobs(), "\n", false); + if (FailingCommand) + C.PrintJob(OS, *FailingCommand, "\n", false); + else + // Crash triggered by FORCE_CLANG_DIAGNOSTICS_CRASH, which doesn't have an + // associated FailingCommand, so just pass all jobs. + C.PrintJob(OS, C.getJobs(), "\n", false); OS.flush(); // Clear stale state and suppress tool output. @@ -473,7 +483,9 @@ void Driver::generateCompilationDiagnostics(Compilation &C, // If the command succeeded, we are done. if (Res == 0) { Diag(clang::diag::note_drv_command_failed_diag_msg) - << "Preprocessed source(s) and associated run script(s) are located at:"; + << "\n********************\n\n" + "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" + "Preprocessed source(s) and associated run script(s) are located at:"; ArgStringList Files = C.getTempFiles(); for (ArgStringList::const_iterator it = Files.begin(), ie = Files.end(); it != ie; ++it) { @@ -489,10 +501,76 @@ void Driver::generateCompilationDiagnostics(Compilation &C, Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating run script: " + Script + " " + Err; } else { + // Strip away options not necessary to reproduce the crash. + // FIXME: This doesn't work with quotes (e.g., -D "foo bar"). + SmallVector Flag; + Flag.push_back("-D "); + Flag.push_back("-F"); + Flag.push_back("-I "); + Flag.push_back("-M "); + Flag.push_back("-MD "); + Flag.push_back("-MF "); + Flag.push_back("-MG "); + Flag.push_back("-MM "); + Flag.push_back("-MMD "); + Flag.push_back("-MP "); + Flag.push_back("-MQ "); + Flag.push_back("-MT "); + Flag.push_back("-o "); + Flag.push_back("-coverage-file "); + Flag.push_back("-dependency-file "); + Flag.push_back("-fdebug-compilation-dir "); + Flag.push_back("-fmodule-cache-path "); + Flag.push_back("-idirafter "); + Flag.push_back("-include "); + Flag.push_back("-include-pch "); + Flag.push_back("-internal-isystem "); + Flag.push_back("-internal-externc-isystem "); + Flag.push_back("-iprefix "); + Flag.push_back("-iwithprefix "); + Flag.push_back("-iwithprefixbefore "); + Flag.push_back("-isysroot "); + Flag.push_back("-isystem "); + Flag.push_back("-iquote "); + Flag.push_back("-resource-dir "); + Flag.push_back("-serialize-diagnostic-file "); + for (unsigned i = 0, e = Flag.size(); i < e; ++i) { + size_t I = 0, E = 0; + do { + I = Cmd.find(Flag[i], I); + if (I == std::string::npos) break; + + E = Cmd.find(" ", I + Flag[i].length()); + if (E == std::string::npos) break; + // The -D option is not removed. Instead, the argument is quoted. + if (Flag[i] != "-D ") { + Cmd.erase(I, E - I + 1); + } else { + Cmd.insert(I+3, "\""); + Cmd.insert(++E, "\""); + I = E; + } + } while(1); + } + // Append the new filename with correct preprocessed suffix. + size_t I, E; + I = Cmd.find("-main-file-name "); + assert (I != std::string::npos && "Expected to find -main-file-name"); + I += 16; + E = Cmd.find(" ", I); + assert (E != std::string::npos && "-main-file-name missing argument?"); + StringRef OldFilename = StringRef(Cmd).slice(I, E); + StringRef NewFilename = llvm::sys::path::filename(*it); + I = StringRef(Cmd).rfind(OldFilename); + E = I + OldFilename.size(); + I = Cmd.rfind(" ", I) + 1; + Cmd.replace(I, E - I, NewFilename.data(), NewFilename.size()); ScriptOS << Cmd; Diag(clang::diag::note_drv_command_failed_diag_msg) << Script; } } + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "\n\n********************"; } else { // Failure, remove preprocessed files. if (!C.getArgs().hasArg(options::OPT_save_temps)) @@ -529,14 +607,8 @@ int Driver::ExecuteCompilation(const Compilation &C, C.CleanupFileList(C.getResultFiles(), true); // Failure result files are valid unless we crashed. - if (Res < 0) { + if (Res < 0) C.CleanupFileList(C.getFailureResultFiles(), true); -#ifdef _WIN32 - // Exit status should not be negative on Win32, - // unless abnormal termination. - Res = 1; -#endif - } } // Print extra information about abnormal failures, if possible. @@ -630,7 +702,7 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { return false; } - if (C.getArgs().hasArg(options::OPT__help) || + if (C.getArgs().hasArg(options::OPT_help) || C.getArgs().hasArg(options::OPT__help_hidden)) { PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden)); return false; @@ -748,8 +820,7 @@ static unsigned PrintActions1(const Compilation &C, Action *A, if (InputAction *IA = dyn_cast(A)) { os << "\"" << IA->getInputArg().getValue(C.getArgs()) << "\""; } else if (BindArchAction *BIA = dyn_cast(A)) { - os << '"' << (BIA->getArchName() ? BIA->getArchName() : - C.getDefaultToolChain().getArchName()) << '"' + os << '"' << BIA->getArchName() << '"' << ", {" << PrintActions1(C, *BIA->begin(), Ids) << "}"; } else { os << "{"; @@ -823,7 +894,7 @@ void Driver::BuildUniversalActions(const ToolChain &TC, // When there is no explicit arch for this platform, make sure we still bind // the architecture (to the default) so that -Xarch_ is handled correctly. if (!Archs.size()) - Archs.push_back(0); + Archs.push_back(Args.MakeArgString(TC.getArchName())); // FIXME: We killed off some others but these aren't yet detected in a // functional manner. If we added information to jobs about which "auxiliary" @@ -873,7 +944,7 @@ void Driver::BuildUniversalActions(const ToolChain &TC, if (A && !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_gstabs) && ContainsCompileOrAssembleAction(Actions.back())) { - + // Add a 'dsymutil' step if necessary, when debug info is enabled and we // have a compile input. We need to run 'dsymutil' ourselves in such cases // because the debug info will refer to a temporary object file which is @@ -1060,18 +1131,27 @@ void Driver::BuildActions(const ToolChain &TC, const DerivedArgList &Args, if (Args.hasArg(options::OPT_Qunused_arguments)) continue; + // Special case when final phase determined by binary name, rather than + // by a command-line argument with a corresponding Arg. + if (CCCIsCPP) + Diag(clang::diag::warn_drv_input_file_unused_by_cpp) + << InputArg->getAsString(Args) + << getPhaseName(InitialPhase); // Special case '-E' warning on a previously preprocessed file to make // more sense. - if (InitialPhase == phases::Compile && FinalPhase == phases::Preprocess && - getPreprocessedType(InputType) == types::TY_INVALID) + else if (InitialPhase == phases::Compile && + FinalPhase == phases::Preprocess && + getPreprocessedType(InputType) == types::TY_INVALID) Diag(clang::diag::warn_drv_preprocessed_input_file_unused) << InputArg->getAsString(Args) - << FinalPhaseArg->getOption().getName(); + << !!FinalPhaseArg + << FinalPhaseArg ? FinalPhaseArg->getOption().getName() : ""; else Diag(clang::diag::warn_drv_input_file_unused) << InputArg->getAsString(Args) << getPhaseName(InitialPhase) - << FinalPhaseArg->getOption().getName(); + << !!FinalPhaseArg + << FinalPhaseArg ? FinalPhaseArg->getOption().getName() : ""; continue; } @@ -1130,14 +1210,23 @@ Action *Driver::ConstructPhaseAction(const ArgList &Args, phases::ID Phase, if (Args.hasArg(options::OPT_M, options::OPT_MM)) { OutputTy = types::TY_Dependencies; } else { - OutputTy = types::getPreprocessedType(Input->getType()); + OutputTy = Input->getType(); + if (!Args.hasFlag(options::OPT_frewrite_includes, + options::OPT_fno_rewrite_includes, false)) + OutputTy = types::getPreprocessedType(OutputTy); assert(OutputTy != types::TY_INVALID && "Cannot preprocess this input type!"); } return new PreprocessJobAction(Input, OutputTy); } - case phases::Precompile: - return new PrecompileJobAction(Input, types::TY_PCH); + case phases::Precompile: { + types::ID OutputTy = types::TY_PCH; + if (Args.hasArg(options::OPT_fsyntax_only)) { + // Syntax checks should not emit a PCH file + OutputTy = types::TY_Nothing; + } + return new PrecompileJobAction(Input, OutputTy); + } case phases::Compile: { if (Args.hasArg(options::OPT_fsyntax_only)) { return new CompileJobAction(Input, types::TY_Nothing); @@ -1332,10 +1421,13 @@ void Driver::BuildJobsForAction(Compilation &C, } if (const BindArchAction *BAA = dyn_cast(A)) { - const ToolChain *TC = &C.getDefaultToolChain(); + const ToolChain *TC; + const char *ArchName = BAA->getArchName(); - if (BAA->getArchName()) - TC = &getToolChain(C.getArgs(), BAA->getArchName()); + if (ArchName) + TC = &getToolChain(C.getArgs(), ArchName); + else + TC = &C.getDefaultToolChain(); BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel, LinkingOutput, Result); @@ -1453,15 +1545,24 @@ const char *Driver::GetNamedOutputPath(Compilation &C, NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str()); } - // If we're saving temps and the temp filename conflicts with the input - // filename, then avoid overwriting input file. + // If we're saving temps and the temp file conflicts with the input file, + // then avoid overwriting input file. if (!AtTopLevel && C.getArgs().hasArg(options::OPT_save_temps) && NamedOutput == BaseName) { - StringRef Name = llvm::sys::path::filename(BaseInput); - std::pair Split = Name.split('.'); - std::string TmpName = - GetTemporaryPath(Split.first, types::getTypeTempSuffix(JA.getType())); - return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + + bool SameFile = false; + SmallString<256> Result; + llvm::sys::fs::current_path(Result); + llvm::sys::path::append(Result, BaseName); + llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile); + // Must share the same path to conflict. + if (SameFile) { + StringRef Name = llvm::sys::path::filename(BaseInput); + std::pair Split = Name.split('.'); + std::string TmpName = + GetTemporaryPath(Split.first, types::getTypeTempSuffix(JA.getType())); + return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + } } // As an annoying special case, PCH generation doesn't strip the pathname. @@ -1564,7 +1665,7 @@ std::string Driver::GetProgramPath(const char *Name, const ToolChain &TC, return Name; } -std::string Driver::GetTemporaryPath(StringRef Prefix, const char *Suffix) +std::string Driver::GetTemporaryPath(StringRef Prefix, const char *Suffix) const { // FIXME: This is lame; sys::Path should provide this function (in particular, // it should know how to find the temporary files dir). @@ -1579,14 +1680,15 @@ std::string Driver::GetTemporaryPath(StringRef Prefix, const char *Suffix) llvm::sys::Path P(TmpDir); P.appendComponent(Prefix); if (P.makeUnique(false, &Error)) { - Diag(clang::diag::err_drv_unable_to_make_temp) << Error; + Diag(clang::diag::err_unable_to_make_temp) << Error; return ""; } // FIXME: Grumble, makeUnique sometimes leaves the file around!? PR3837. P.eraseFromDisk(false, 0); - P.appendSuffix(Suffix); + if (Suffix) + P.appendSuffix(Suffix); return P.str(); } @@ -1674,6 +1776,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::OpenBSD: TC = new toolchains::OpenBSD(*this, Target, Args); break; + case llvm::Triple::Bitrig: + TC = new toolchains::Bitrig(*this, Target, Args); + break; case llvm::Triple::NetBSD: TC = new toolchains::NetBSD(*this, Target, Args); break; diff --git a/lib/Driver/OptTable.cpp b/lib/Driver/OptTable.cpp index 4f5390b..a3e38b2 100644 --- a/lib/Driver/OptTable.cpp +++ b/lib/Driver/OptTable.cpp @@ -181,6 +181,8 @@ Option *OptTable::CreateOption(unsigned id) const { } if (info.Flags & Unsupported) Opt->setUnsupported(true); + if (info.Flags & CC1Option) + Opt->setIsCC1Option(true); return Opt; } diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp index db4d2a8..48ed044 100644 --- a/lib/Driver/ToolChain.cpp +++ b/lib/Driver/ToolChain.cpp @@ -14,10 +14,10 @@ #include "clang/Driver/ArgList.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/ObjCRuntime.h" #include "clang/Driver/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" +#include "clang/Basic/ObjCRuntime.h" using namespace clang::driver; using namespace clang; @@ -49,25 +49,9 @@ bool ToolChain::HasNativeLLVMSupport() const { return false; } -void ToolChain::configureObjCRuntime(ObjCRuntime &runtime) const { - switch (runtime.getKind()) { - case ObjCRuntime::NeXT: - // Assume a minimal NeXT runtime. - runtime.HasARC = false; - runtime.HasWeak = false; - runtime.HasSubscripting = false; - runtime.HasTerminate = false; - return; - - case ObjCRuntime::GNU: - // Assume a maximal GNU runtime. - runtime.HasARC = true; - runtime.HasWeak = true; - runtime.HasSubscripting = false; // to be added - runtime.HasTerminate = false; // to be added - return; - } - llvm_unreachable("invalid runtime kind!"); +ObjCRuntime ToolChain::getDefaultObjCRuntime(bool isNonFragile) const { + return ObjCRuntime(isNonFragile ? ObjCRuntime::GNUstep : ObjCRuntime::GCC, + VersionTuple()); } /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. @@ -189,6 +173,9 @@ void ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, // Each toolchain should provide the appropriate include flags. } +void ToolChain::addClangTargetOptions(ArgStringList &CC1Args) const { +} + ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType( const ArgList &Args) const { diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp index 7f9ed9a..01c6623 100644 --- a/lib/Driver/ToolChains.cpp +++ b/lib/Driver/ToolChains.cpp @@ -14,10 +14,10 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" -#include "clang/Driver/ObjCRuntime.h" #include "clang/Driver/OptTable.h" #include "clang/Driver/Option.h" #include "clang/Driver/Options.h" +#include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Version.h" #include "llvm/ADT/SmallString.h" @@ -42,9 +42,7 @@ using namespace clang; /// Darwin - Darwin tool chain for i386 and x86_64. Darwin::Darwin(const Driver &D, const llvm::Triple& Triple) - : ToolChain(D, Triple), TargetInitialized(false), - ARCRuntimeForSimulator(ARCSimulator_None), - LibCXXForSimulator(LibCXXSimulator_None) + : ToolChain(D, Triple), TargetInitialized(false) { // Compute the initial Darwin version from the triple unsigned Major, Minor, Micro; @@ -59,6 +57,11 @@ Darwin::Darwin(const Driver &D, const llvm::Triple& Triple) DarwinVersion[0] = Minor + 4; DarwinVersion[1] = Micro; DarwinVersion[2] = 0; + + // Compute the initial iOS version from the triple + Triple.getiOSVersion(Major, Minor, Micro); + llvm::raw_string_ostream(iOSVersionMin) + << Major << '.' << Minor << '.' << Micro; } types::ID Darwin::LookupTypeForExtension(const char *Ext) const { @@ -75,42 +78,19 @@ bool Darwin::HasNativeLLVMSupport() const { return true; } -bool Darwin::hasARCRuntime() const { - // FIXME: Remove this once there is a proper way to detect an ARC runtime - // for the simulator. - switch (ARCRuntimeForSimulator) { - case ARCSimulator_None: - break; - case ARCSimulator_HasARCRuntime: - return true; - case ARCSimulator_NoARCRuntime: - return false; - } - - if (isTargetIPhoneOS()) - return !isIPhoneOSVersionLT(5); - else - return !isMacosxVersionLT(10, 7); -} - -bool Darwin::hasSubscriptingRuntime() const { - return !isTargetIPhoneOS() && !isMacosxVersionLT(10, 8); -} - /// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0. -void Darwin::configureObjCRuntime(ObjCRuntime &runtime) const { - if (runtime.getKind() != ObjCRuntime::NeXT) - return ToolChain::configureObjCRuntime(runtime); - - runtime.HasARC = runtime.HasWeak = hasARCRuntime(); - runtime.HasSubscripting = hasSubscriptingRuntime(); - - // So far, objc_terminate is only available in iOS 5. - // FIXME: do the simulator logic properly. - if (!ARCRuntimeForSimulator && isTargetIPhoneOS()) - runtime.HasTerminate = !isIPhoneOSVersionLT(5); - else - runtime.HasTerminate = false; +ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const { + if (isTargetIPhoneOS()) { + return ObjCRuntime(ObjCRuntime::iOS, TargetVersion); + } else if (TargetSimulatorVersionFromDefines != VersionTuple()) { + return ObjCRuntime(ObjCRuntime::iOS, TargetSimulatorVersionFromDefines); + } else { + if (isNonFragile) { + return ObjCRuntime(ObjCRuntime::MacOSX, TargetVersion); + } else { + return ObjCRuntime(ObjCRuntime::FragileMacOSX, TargetVersion); + } + } } /// Darwin provides a blocks runtime starting in MacOS X 10.6 and iOS 3.2. @@ -194,21 +174,25 @@ void Generic_ELF::anchor() {} Tool &Darwin::SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const { - Action::ActionClass Key; + Action::ActionClass Key = JA.getKind(); + bool useClang = false; if (getDriver().ShouldUseClangCompiler(C, JA, getTriple())) { + useClang = true; // Fallback to llvm-gcc for i386 kext compiles, we don't support that ABI. - if (Inputs.size() == 1 && + if (!getDriver().shouldForceClangUse() && + Inputs.size() == 1 && types::isCXX(Inputs[0]->getType()) && getTriple().isOSDarwin() && getTriple().getArch() == llvm::Triple::x86 && (C.getArgs().getLastArg(options::OPT_fapple_kext) || C.getArgs().getLastArg(options::OPT_mkernel))) - Key = JA.getKind(); - else - Key = Action::AnalyzeJobClass; - } else - Key = JA.getKind(); + useClang = false; + } + + // FIXME: This seems like a hacky way to choose clang frontend. + if (useClang) + Key = Action::AnalyzeJobClass; bool UseIntegratedAs = C.getArgs().hasFlag(options::OPT_integrated_as, options::OPT_no_integrated_as, @@ -287,76 +271,6 @@ void DarwinClang::AddGCCLibexecPath(unsigned darwinVersion) { getProgramPaths().push_back(Path); } -void DarwinClang::AddLinkSearchPathArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - // The Clang toolchain uses explicit paths for internal libraries. - - // Unfortunately, we still might depend on a few of the libraries that are - // only available in the gcc library directory (in particular - // libstdc++.dylib). For now, hardcode the path to the known install location. - // FIXME: This should get ripped out someday. However, when building on - // 10.6 (darwin10), we're still relying on this to find libstdc++.dylib. - llvm::sys::Path P(getDriver().Dir); - P.eraseComponent(); // .../usr/bin -> ../usr - P.appendComponent("llvm-gcc-4.2"); - P.appendComponent("lib"); - P.appendComponent("gcc"); - switch (getTriple().getArch()) { - default: - llvm_unreachable("Invalid Darwin arch!"); - case llvm::Triple::x86: - case llvm::Triple::x86_64: - P.appendComponent("i686-apple-darwin10"); - break; - case llvm::Triple::arm: - case llvm::Triple::thumb: - P.appendComponent("arm-apple-darwin10"); - break; - case llvm::Triple::ppc: - case llvm::Triple::ppc64: - P.appendComponent("powerpc-apple-darwin10"); - break; - } - P.appendComponent("4.2.1"); - - // Determine the arch specific GCC subdirectory. - const char *ArchSpecificDir = 0; - switch (getTriple().getArch()) { - default: - break; - case llvm::Triple::arm: - case llvm::Triple::thumb: { - std::string Triple = ComputeLLVMTriple(Args); - StringRef TripleStr = Triple; - if (TripleStr.startswith("armv5") || TripleStr.startswith("thumbv5")) - ArchSpecificDir = "v5"; - else if (TripleStr.startswith("armv6") || TripleStr.startswith("thumbv6")) - ArchSpecificDir = "v6"; - else if (TripleStr.startswith("armv7") || TripleStr.startswith("thumbv7")) - ArchSpecificDir = "v7"; - break; - } - case llvm::Triple::ppc64: - ArchSpecificDir = "ppc64"; - break; - case llvm::Triple::x86_64: - ArchSpecificDir = "x86_64"; - break; - } - - if (ArchSpecificDir) { - P.appendComponent(ArchSpecificDir); - bool Exists; - if (!llvm::sys::fs::exists(P.str(), Exists) && Exists) - CmdArgs.push_back(Args.MakeArgString("-L" + P.str())); - P.eraseComponent(); - } - - bool Exists; - if (!llvm::sys::fs::exists(P.str(), Exists) && Exists) - CmdArgs.push_back(Args.MakeArgString("-L" + P.str())); -} - void DarwinClang::AddLinkARCArgs(const ArgList &Args, ArgStringList &CmdArgs) const { @@ -374,7 +288,7 @@ void DarwinClang::AddLinkARCArgs(const ArgList &Args, else if (isTargetIPhoneOS()) s += "iphoneos"; // FIXME: Remove this once we depend fully on -mios-simulator-version-min. - else if (ARCRuntimeForSimulator != ARCSimulator_None) + else if (TargetSimulatorVersionFromDefines != VersionTuple()) s += "iphonesimulator"; else s += "macosx"; @@ -545,11 +459,13 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const { unsigned Major = 0, Minor = 0, Micro = 0; if (GetVersionFromSimulatorDefine(define, Major, Minor, Micro) && Major < 10 && Minor < 100 && Micro < 100) { - ARCRuntimeForSimulator = Major < 5 ? ARCSimulator_NoARCRuntime - : ARCSimulator_HasARCRuntime; - LibCXXForSimulator = Major < 5 ? LibCXXSimulator_NotAvailable - : LibCXXSimulator_Available; + TargetSimulatorVersionFromDefines = VersionTuple(Major, Minor, Micro); } + // When using the define to indicate the simulator, we force + // 10.6 macosx target. + const Option *O = Opts.getOption(options::OPT_mmacosx_version_min_EQ); + OSXVersion = Args.MakeJoinedArg(0, O, "10.6"); + Args.append(OSXVersion); break; } } @@ -593,9 +509,9 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const { // If no OSX or iOS target has been specified and we're compiling for armv7, // go ahead as assume we're targeting iOS. - if (OSXTarget.empty() && iOSTarget.empty()) - if (getDarwinArchName(Args) == "armv7") - iOSTarget = "0.0"; + if (OSXTarget.empty() && iOSTarget.empty() && + getDarwinArchName(Args) == "armv7") + iOSTarget = iOSVersionMin; // Handle conflicting deployment targets // @@ -956,27 +872,27 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args, // Add an explicit version min argument for the deployment target. We do this // after argument translation because -Xarch_ arguments may add a version min // argument. - AddDeploymentTarget(*DAL); + if (BoundArch) + AddDeploymentTarget(*DAL); // Validate the C++ standard library choice. CXXStdlibType Type = GetCXXStdlibType(*DAL); if (Type == ToolChain::CST_Libcxx) { - switch (LibCXXForSimulator) { - case LibCXXSimulator_None: - // Handle non-simulator cases. - if (isTargetIPhoneOS()) { - if (isIPhoneOSVersionLT(5, 0)) { - getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment) - << "iOS 5.0"; - } - } - break; - case LibCXXSimulator_NotAvailable: + // Check whether the target provides libc++. + StringRef where; + + // Complain about targetting iOS < 5.0 in any way. + if (TargetSimulatorVersionFromDefines != VersionTuple()) { + if (TargetSimulatorVersionFromDefines < VersionTuple(5, 0)) + where = "iOS 5.0"; + } else if (isTargetIPhoneOS()) { + if (isIPhoneOSVersionLT(5, 0)) + where = "iOS 5.0"; + } + + if (where != StringRef()) { getDriver().Diag(clang::diag::err_drv_invalid_libcxx_deployment) - << "iOS 5.0"; - break; - case LibCXXSimulator_Available: - break; + << where; } } @@ -1187,6 +1103,9 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( "arm-linux-gnueabi", "arm-linux-androideabi" }; + static const char *const ARMHFTriples[] = { + "arm-linux-gnueabihf", + }; static const char *const X86_64LibDirs[] = { "/lib64", "/lib" }; static const char *const X86_64Triples[] = { @@ -1210,7 +1129,8 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( "i586-redhat-linux", "i386-redhat-linux", "i586-suse-linux", - "i486-slackware-linux" + "i486-slackware-linux", + "i686-montavista-linux" }; static const char *const MIPSLibDirs[] = { "/lib" }; @@ -1218,11 +1138,17 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( static const char *const MIPSELLibDirs[] = { "/lib" }; static const char *const MIPSELTriples[] = { "mipsel-linux-gnu" }; + static const char *const MIPS64LibDirs[] = { "/lib64", "/lib" }; + static const char *const MIPS64Triples[] = { "mips64-linux-gnu" }; + static const char *const MIPS64ELLibDirs[] = { "/lib64", "/lib" }; + static const char *const MIPS64ELTriples[] = { "mips64el-linux-gnu" }; + static const char *const PPCLibDirs[] = { "/lib32", "/lib" }; static const char *const PPCTriples[] = { "powerpc-linux-gnu", "powerpc-unknown-linux-gnu", - "powerpc-suse-linux" + "powerpc-suse-linux", + "powerpc-montavista-linuxspe" }; static const char *const PPC64LibDirs[] = { "/lib64", "/lib" }; static const char *const PPC64Triples[] = { @@ -1236,8 +1162,13 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( case llvm::Triple::arm: case llvm::Triple::thumb: LibDirs.append(ARMLibDirs, ARMLibDirs + llvm::array_lengthof(ARMLibDirs)); - TripleAliases.append( - ARMTriples, ARMTriples + llvm::array_lengthof(ARMTriples)); + if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) { + TripleAliases.append( + ARMHFTriples, ARMHFTriples + llvm::array_lengthof(ARMHFTriples)); + } else { + TripleAliases.append( + ARMTriples, ARMTriples + llvm::array_lengthof(ARMTriples)); + } break; case llvm::Triple::x86_64: LibDirs.append( @@ -1263,12 +1194,40 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( MIPSLibDirs, MIPSLibDirs + llvm::array_lengthof(MIPSLibDirs)); TripleAliases.append( MIPSTriples, MIPSTriples + llvm::array_lengthof(MIPSTriples)); + MultiarchLibDirs.append( + MIPS64LibDirs, MIPS64LibDirs + llvm::array_lengthof(MIPS64LibDirs)); + MultiarchTripleAliases.append( + MIPS64Triples, MIPS64Triples + llvm::array_lengthof(MIPS64Triples)); break; case llvm::Triple::mipsel: LibDirs.append( MIPSELLibDirs, MIPSELLibDirs + llvm::array_lengthof(MIPSELLibDirs)); TripleAliases.append( MIPSELTriples, MIPSELTriples + llvm::array_lengthof(MIPSELTriples)); + MultiarchLibDirs.append( + MIPS64ELLibDirs, MIPS64ELLibDirs + llvm::array_lengthof(MIPS64ELLibDirs)); + MultiarchTripleAliases.append( + MIPS64ELTriples, MIPS64ELTriples + llvm::array_lengthof(MIPS64ELTriples)); + break; + case llvm::Triple::mips64: + LibDirs.append( + MIPS64LibDirs, MIPS64LibDirs + llvm::array_lengthof(MIPS64LibDirs)); + TripleAliases.append( + MIPS64Triples, MIPS64Triples + llvm::array_lengthof(MIPS64Triples)); + MultiarchLibDirs.append( + MIPSLibDirs, MIPSLibDirs + llvm::array_lengthof(MIPSLibDirs)); + MultiarchTripleAliases.append( + MIPSTriples, MIPSTriples + llvm::array_lengthof(MIPSTriples)); + break; + case llvm::Triple::mips64el: + LibDirs.append( + MIPS64ELLibDirs, MIPS64ELLibDirs + llvm::array_lengthof(MIPS64ELLibDirs)); + TripleAliases.append( + MIPS64ELTriples, MIPS64ELTriples + llvm::array_lengthof(MIPS64ELTriples)); + MultiarchLibDirs.append( + MIPSELLibDirs, MIPSELLibDirs + llvm::array_lengthof(MIPSELLibDirs)); + MultiarchTripleAliases.append( + MIPSELTriples, MIPSELTriples + llvm::array_lengthof(MIPSELTriples)); break; case llvm::Triple::ppc: LibDirs.append(PPCLibDirs, PPCLibDirs + llvm::array_lengthof(PPCLibDirs)); @@ -1350,7 +1309,9 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple( // crtbegin.o without the subdirectory. StringRef MultiarchSuffix = (TargetArch == llvm::Triple::x86_64 || - TargetArch == llvm::Triple::ppc64) ? "/64" : "/32"; + TargetArch == llvm::Triple::ppc64 || + TargetArch == llvm::Triple::mips64 || + TargetArch == llvm::Triple::mips64el) ? "/64" : "/32"; if (llvm::sys::fs::exists(LI->path() + MultiarchSuffix + "/crtbegin.o")) { GCCMultiarchSuffix = MultiarchSuffix.str(); } else { @@ -1606,6 +1567,67 @@ Tool &OpenBSD::SelectTool(const Compilation &C, const JobAction &JA, return *T; } +/// Bitrig - Bitrig tool chain which can call as(1) and ld(1) directly. + +Bitrig::Bitrig(const Driver &D, const llvm::Triple& Triple, const ArgList &Args) + : Generic_ELF(D, Triple, Args) { + getFilePaths().push_back(getDriver().Dir + "/../lib"); + getFilePaths().push_back("/usr/lib"); +} + +Tool &Bitrig::SelectTool(const Compilation &C, const JobAction &JA, + const ActionList &Inputs) const { + Action::ActionClass Key; + if (getDriver().ShouldUseClangCompiler(C, JA, getTriple())) + Key = Action::AnalyzeJobClass; + else + Key = JA.getKind(); + + bool UseIntegratedAs = C.getArgs().hasFlag(options::OPT_integrated_as, + options::OPT_no_integrated_as, + IsIntegratedAssemblerDefault()); + + Tool *&T = Tools[Key]; + if (!T) { + switch (Key) { + case Action::AssembleJobClass: { + if (UseIntegratedAs) + T = new tools::ClangAs(*this); + else + T = new tools::bitrig::Assemble(*this); + break; + } + case Action::LinkJobClass: + T = new tools::bitrig::Link(*this); break; + default: + T = &Generic_GCC::SelectTool(C, JA, Inputs); + } + } + + return *T; +} + +void Bitrig::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nostdlibinc) || + DriverArgs.hasArg(options::OPT_nostdincxx)) + return; + + std::string Triple = getTriple().str(); + if (Triple.substr(0, 5) == "amd64") + Triple.replace(0, 5, "x86_64"); + + addSystemInclude(DriverArgs, CC1Args, "/usr/include/c++/4.6.2"); + addSystemInclude(DriverArgs, CC1Args, "/usr/include/c++/4.6.2/backward"); + addSystemInclude(DriverArgs, CC1Args, "/usr/include/c++/4.6.2/" + Triple); + +} + +void Bitrig::AddCXXStdlibLibArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + CmdArgs.push_back("-lstdc++"); +} + /// FreeBSD - FreeBSD tool chain which can call as(1) and ld(1) directly. FreeBSD::FreeBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args) @@ -1957,6 +1979,16 @@ static std::string getMultiarchTriple(const llvm::Triple TargetTriple, // common linux triples that don't quite match the Clang triple for both // 32-bit and 64-bit targets. Multiarch fixes its install triples to these // regardless of what the actual target triple is. + case llvm::Triple::arm: + case llvm::Triple::thumb: + if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) { + if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabihf")) + return "arm-linux-gnueabihf"; + } else { + if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabi")) + return "arm-linux-gnueabi"; + } + return TargetTriple.str(); case llvm::Triple::x86: if (llvm::sys::fs::exists(SysRoot + "/lib/i386-linux-gnu")) return "i386-linux-gnu"; @@ -2139,6 +2171,12 @@ Tool &Linux::SelectTool(const Compilation &C, const JobAction &JA, return *T; } +void Linux::addClangTargetOptions(ArgStringList &CC1Args) const { + const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion(); + if (V >= Generic_GCC::GCCVersion::Parse("4.7.0")) + CC1Args.push_back("-fuse-init-array"); +} + void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Driver &D = getDriver(); @@ -2197,6 +2235,9 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, const StringRef ARMMultiarchIncludeDirs[] = { "/usr/include/arm-linux-gnueabi" }; + const StringRef ARMHFMultiarchIncludeDirs[] = { + "/usr/include/arm-linux-gnueabihf" + }; const StringRef MIPSMultiarchIncludeDirs[] = { "/usr/include/mips-linux-gnu" }; @@ -2215,7 +2256,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } else if (getTriple().getArch() == llvm::Triple::x86) { MultiarchIncludeDirs = X86MultiarchIncludeDirs; } else if (getTriple().getArch() == llvm::Triple::arm) { - MultiarchIncludeDirs = ARMMultiarchIncludeDirs; + if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) + MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs; + else + MultiarchIncludeDirs = ARMMultiarchIncludeDirs; } else if (getTriple().getArch() == llvm::Triple::mips) { MultiarchIncludeDirs = MIPSMultiarchIncludeDirs; } else if (getTriple().getArch() == llvm::Triple::mipsel) { @@ -2281,7 +2325,7 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, // equivalent to '/usr/include/c++/X.Y' in almost all cases. StringRef LibDir = GCCInstallation.getParentLibPath(); StringRef InstallDir = GCCInstallation.getInstallPath(); - StringRef Version = GCCInstallation.getVersion(); + StringRef Version = GCCInstallation.getVersion().Text; if (!addLibStdCXXIncludePaths(LibDir + "/../include/c++/" + Version, (GCCInstallation.getTriple().str() + GCCInstallation.getMultiarchSuffix()), diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h index eaa6be1..95a11be 100644 --- a/lib/Driver/ToolChains.h +++ b/lib/Driver/ToolChains.h @@ -99,7 +99,7 @@ protected: StringRef getParentLibPath() const { return GCCParentLibPath; } /// \brief Get the detected GCC version string. - StringRef getVersion() const { return Version.Text; } + const GCCVersion &getVersion() const { return Version; } private: static void CollectLibDirsAndTriples( @@ -176,22 +176,6 @@ private: // the argument translation business. mutable bool TargetInitialized; - // FIXME: Remove this once there is a proper way to detect an ARC runtime - // for the simulator. - public: - mutable enum { - ARCSimulator_None, - ARCSimulator_HasARCRuntime, - ARCSimulator_NoARCRuntime - } ARCRuntimeForSimulator; - - mutable enum { - LibCXXSimulator_None, - LibCXXSimulator_NotAvailable, - LibCXXSimulator_Available - } LibCXXForSimulator; - -private: /// Whether we are targeting iPhoneOS target. mutable bool TargetIsIPhoneOS; @@ -201,12 +185,19 @@ private: /// The OS version we are targeting. mutable VersionTuple TargetVersion; +protected: + // FIXME: Remove this once there is a proper way to detect an ARC runtime + // for the simulator. + mutable VersionTuple TargetSimulatorVersionFromDefines; + +private: /// The default macosx-version-min of this tool chain; empty until /// initialized. std::string MacosxVersionMin; - bool hasARCRuntime() const; - bool hasSubscriptingRuntime() const; + /// The default ios-version-min of this tool chain; empty until + /// initialized. + std::string iOSVersionMin; private: void AddDeploymentTarget(DerivedArgList &Args) const; @@ -254,7 +245,7 @@ public: bool isTargetMacOS() const { return !isTargetIOSSimulator() && !isTargetIPhoneOS() && - ARCRuntimeForSimulator == ARCSimulator_None; + TargetSimulatorVersionFromDefines == VersionTuple(); } bool isTargetInitialized() const { return TargetInitialized; } @@ -279,14 +270,6 @@ public: return TargetVersion < VersionTuple(V0, V1, V2); } - /// AddLinkSearchPathArgs - Add the linker search paths to \arg CmdArgs. - /// - /// \param Args - The input argument list. - /// \param CmdArgs [out] - The command argument list to append the paths - /// (prefixed by -L) to. - virtual void AddLinkSearchPathArgs(const ArgList &Args, - ArgStringList &CmdArgs) const = 0; - /// AddLinkARCArgs - Add the linker arguments to link the ARC runtime library. virtual void AddLinkARCArgs(const ArgList &Args, ArgStringList &CmdArgs) const = 0; @@ -304,7 +287,7 @@ public: virtual bool HasNativeLLVMSupport() const; - virtual void configureObjCRuntime(ObjCRuntime &runtime) const; + virtual ObjCRuntime getDefaultObjCRuntime(bool isNonFragile) const; virtual bool hasBlocksRuntime() const; virtual DerivedArgList *TranslateArgs(const DerivedArgList &Args, @@ -333,7 +316,11 @@ public: return ToolChain::IsStrictAliasingDefault(); #endif } - + + virtual bool IsMathErrnoDefault() const { + return false; + } + virtual bool IsObjCDefaultSynthPropertiesDefault() const { return true; } @@ -342,12 +329,7 @@ public: // Non-fragile ABI is default for everything but i386. return getTriple().getArch() != llvm::Triple::x86; } - virtual bool IsObjCLegacyDispatchDefault() const { - // This is only used with the non-fragile ABI. - // Legacy dispatch is used everywhere except on x86_64. - return getTriple().getArch() != llvm::Triple::x86_64; - } virtual bool UseObjCMixedDispatch() const { // This is only used with the non-fragile ABI and non-legacy dispatch. @@ -392,9 +374,6 @@ public: /// @name Darwin ToolChain Implementation /// { - virtual void AddLinkSearchPathArgs(const ArgList &Args, - ArgStringList &CmdArgs) const; - virtual void AddLinkRuntimeLibArgs(const ArgList &Args, ArgStringList &CmdArgs) const; void AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs, @@ -459,33 +438,39 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF { public: OpenBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args); + virtual bool IsMathErrnoDefault() const { return false; } virtual bool IsObjCNonFragileABIDefault() const { return true; } - virtual bool IsObjCLegacyDispatchDefault() const { - llvm::Triple::ArchType Arch = getTriple().getArch(); - if (Arch == llvm::Triple::arm || - Arch == llvm::Triple::x86 || - Arch == llvm::Triple::x86_64) - return false; - return true; - } virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const; }; +class LLVM_LIBRARY_VISIBILITY Bitrig : public Generic_ELF { +public: + Bitrig(const Driver &D, const llvm::Triple& Triple, const ArgList &Args); + + virtual bool IsMathErrnoDefault() const { return false; } + virtual bool IsObjCNonFragileABIDefault() const { return true; } + virtual bool IsObjCLegacyDispatchDefault() const { return false; } + + virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, + const ActionList &Inputs) const; + + virtual void AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const; + virtual void AddCXXStdlibLibArgs(const ArgList &Args, + ArgStringList &CmdArgs) const; + virtual unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const { + return 1; + } +}; + class LLVM_LIBRARY_VISIBILITY FreeBSD : public Generic_ELF { public: FreeBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args); + virtual bool IsMathErrnoDefault() const { return false; } virtual bool IsObjCNonFragileABIDefault() const { return true; } - virtual bool IsObjCLegacyDispatchDefault() const { - llvm::Triple::ArchType Arch = getTriple().getArch(); - if (Arch == llvm::Triple::arm || - Arch == llvm::Triple::x86 || - Arch == llvm::Triple::x86_64) - return false; - return true; - } virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const; @@ -495,15 +480,8 @@ class LLVM_LIBRARY_VISIBILITY NetBSD : public Generic_ELF { public: NetBSD(const Driver &D, const llvm::Triple& Triple, const ArgList &Args); + virtual bool IsMathErrnoDefault() const { return false; } virtual bool IsObjCNonFragileABIDefault() const { return true; } - virtual bool IsObjCLegacyDispatchDefault() const { - llvm::Triple::ArchType Arch = getTriple().getArch(); - if (Arch == llvm::Triple::arm || - Arch == llvm::Triple::x86 || - Arch == llvm::Triple::x86_64) - return false; - return true; - } virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const; @@ -521,6 +499,8 @@ class LLVM_LIBRARY_VISIBILITY DragonFly : public Generic_ELF { public: DragonFly(const Driver &D, const llvm::Triple& Triple, const ArgList &Args); + virtual bool IsMathErrnoDefault() const { return false; } + virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const; }; @@ -536,6 +516,7 @@ public: virtual void AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const; + virtual void addClangTargetOptions(ArgStringList &CC1Args) const; virtual void AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const; @@ -577,6 +558,10 @@ public: virtual Tool &SelectTool(const Compilation &C, const JobAction &JA, const ActionList &Inputs) const; + virtual bool IsObjCDefaultSynthPropertiesDefault() const { + return true; + } + virtual bool IsIntegratedAssemblerDefault() const; virtual bool IsUnwindTablesDefault() const; virtual const char *GetDefaultRelocationModel() const; diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index 47b5294..b4234cf 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -16,11 +16,11 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Job.h" -#include "clang/Driver/ObjCRuntime.h" #include "clang/Driver/Option.h" #include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" +#include "clang/Basic/ObjCRuntime.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -174,8 +174,10 @@ static bool isObjCAutoRefCount(const ArgList &Args) { /// \brief Determine whether we are linking the ObjC runtime. static bool isObjCRuntimeLinked(const ArgList &Args) { - if (isObjCAutoRefCount(Args)) + if (isObjCAutoRefCount(Args)) { + Args.ClaimAllArgs(options::OPT_fobjc_link_runtime); return true; + } return Args.hasArg(options::OPT_fobjc_link_runtime); } @@ -422,16 +424,47 @@ void Clang::AddPreprocessingOptions(Compilation &C, getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs); } +/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular +/// CPU. +// +// FIXME: This is redundant with -mcpu, why does LLVM use this. +// FIXME: tblgen this, or kill it! +static const char *getLLVMArchSuffixForARM(StringRef CPU) { + return llvm::StringSwitch(CPU) + .Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "v4t") + .Cases("arm720t", "arm9", "arm9tdmi", "v4t") + .Cases("arm920", "arm920t", "arm922t", "v4t") + .Cases("arm940t", "ep9312","v4t") + .Cases("arm10tdmi", "arm1020t", "v5") + .Cases("arm9e", "arm926ej-s", "arm946e-s", "v5e") + .Cases("arm966e-s", "arm968e-s", "arm10e", "v5e") + .Cases("arm1020e", "arm1022e", "xscale", "iwmmxt", "v5e") + .Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "v6") + .Cases("arm1176jzf-s", "mpcorenovfp", "mpcore", "v6") + .Cases("arm1156t2-s", "arm1156t2f-s", "v6t2") + .Cases("cortex-a8", "cortex-a9", "v7") + .Case("cortex-m3", "v7m") + .Case("cortex-m4", "v7m") + .Case("cortex-m0", "v6m") + .Default(""); +} + /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. // // FIXME: tblgen this. -static const char *getARMTargetCPU(const ArgList &Args, +static std::string getARMTargetCPU(const ArgList &Args, const llvm::Triple &Triple) { // FIXME: Warn on inconsistent use of -mcpu and -march. // If we have -mcpu=, use that. - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - return A->getValue(Args); + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + StringRef MCPU = A->getValue(Args); + // Handle -mcpu=native. + if (MCPU == "native") + return llvm::sys::getHostCPUName(); + else + return MCPU; + } StringRef MArch; if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) { @@ -442,13 +475,25 @@ static const char *getARMTargetCPU(const ArgList &Args, MArch = Triple.getArchName(); } + // Handle -march=native. + std::string NativeMArch; + if (MArch == "native") { + std::string CPU = llvm::sys::getHostCPUName(); + if (CPU != "generic") { + // Translate the native cpu into the architecture. The switch below will + // then chose the minimum cpu for that arch. + NativeMArch = std::string("arm") + getLLVMArchSuffixForARM(CPU); + MArch = NativeMArch; + } + } + return llvm::StringSwitch(MArch) .Cases("armv2", "armv2a","arm2") .Case("armv3", "arm6") .Case("armv3m", "arm7m") .Cases("armv4", "armv4t", "arm7tdmi") .Cases("armv5", "armv5t", "arm10tdmi") - .Cases("armv5e", "armv5te", "arm1026ejs") + .Cases("armv5e", "armv5te", "arm1022e") .Case("armv5tej", "arm926ej-s") .Cases("armv6", "armv6k", "arm1136jf-s") .Case("armv6j", "arm1136j-s") @@ -465,31 +510,6 @@ static const char *getARMTargetCPU(const ArgList &Args, .Default("arm7tdmi"); } -/// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular -/// CPU. -// -// FIXME: This is redundant with -mcpu, why does LLVM use this. -// FIXME: tblgen this, or kill it! -static const char *getLLVMArchSuffixForARM(StringRef CPU) { - return llvm::StringSwitch(CPU) - .Cases("arm7tdmi", "arm7tdmi-s", "arm710t", "v4t") - .Cases("arm720t", "arm9", "arm9tdmi", "v4t") - .Cases("arm920", "arm920t", "arm922t", "v4t") - .Cases("arm940t", "ep9312","v4t") - .Cases("arm10tdmi", "arm1020t", "v5") - .Cases("arm9e", "arm926ej-s", "arm946e-s", "v5e") - .Cases("arm966e-s", "arm968e-s", "arm10e", "v5e") - .Cases("arm1020e", "arm1022e", "xscale", "iwmmxt", "v5e") - .Cases("arm1136j-s", "arm1136jf-s", "arm1176jz-s", "v6") - .Cases("arm1176jzf-s", "mpcorenovfp", "mpcore", "v6") - .Cases("arm1156t2-s", "arm1156t2f-s", "v6t2") - .Cases("cortex-a8", "cortex-a9", "v7") - .Case("cortex-m3", "v7m") - .Case("cortex-m4", "v7m") - .Case("cortex-m0", "v6m") - .Default(""); -} - // FIXME: Move to target hook. static bool isSignedCharDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { @@ -601,25 +621,21 @@ static StringRef getARMFloatABI(const Driver &D, // Darwin defaults to "softfp" for v6 and v7. // // FIXME: Factor out an ARM class so we can cache the arch somewhere. - StringRef ArchName = + std::string ArchName = getLLVMArchSuffixForARM(getARMTargetCPU(Args, Triple)); - if (ArchName.startswith("v6") || ArchName.startswith("v7")) + if (StringRef(ArchName).startswith("v6") || + StringRef(ArchName).startswith("v7")) FloatABI = "softfp"; else FloatABI = "soft"; break; } - case llvm::Triple::Linux: { - if (Triple.getEnvironment() == llvm::Triple::GNUEABI) { - FloatABI = "softfp"; - break; - } - } - // fall through - default: switch(Triple.getEnvironment()) { + case llvm::Triple::GNUEABIHF: + FloatABI = "hard"; + break; case llvm::Triple::GNUEABI: FloatABI = "softfp"; break; @@ -628,9 +644,9 @@ static StringRef getARMFloatABI(const Driver &D, FloatABI = "softfp"; break; case llvm::Triple::ANDROIDEABI: { - StringRef ArchName = + std::string ArchName = getLLVMArchSuffixForARM(getARMTargetCPU(Args, Triple)); - if (ArchName.startswith("v7")) + if (StringRef(ArchName).startswith("v7")) FloatABI = "softfp"; else FloatABI = "soft"; @@ -666,6 +682,7 @@ void Clang::AddARMTargetArgs(const ArgList &Args, switch(Triple.getEnvironment()) { case llvm::Triple::ANDROIDEABI: case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIHF: ABIName = "aapcs-linux"; break; case llvm::Triple::EABI: @@ -680,7 +697,7 @@ void Clang::AddARMTargetArgs(const ArgList &Args, // Set the CPU based on -march= and -mcpu=. CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(getARMTargetCPU(Args, Triple)); + CmdArgs.push_back(Args.MakeArgString(getARMTargetCPU(Args, Triple))); // Determine floating point ABI from the options & target defaults. StringRef FloatABI = getARMFloatABI(D, Args, Triple); @@ -755,6 +772,9 @@ void Clang::AddARMTargetArgs(const ArgList &Args, if (A->getOption().matches(options::OPT_mno_global_merge)) CmdArgs.push_back("-mno-global-merge"); } + + if (Args.hasArg(options::OPT_mno_implicit_float)) + CmdArgs.push_back("-no-implicit-float"); } // Get default architecture. @@ -825,19 +845,9 @@ static void getMipsCPUAndABI(const ArgList &Args, ABIName = getMipsABIFromArch(ArchName); } -void Clang::AddMIPSTargetArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - const Driver &D = getToolChain().getDriver(); - StringRef CPUName; - StringRef ABIName; - getMipsCPUAndABI(Args, getToolChain(), CPUName, ABIName); - - CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(CPUName.data()); - - CmdArgs.push_back("-target-abi"); - CmdArgs.push_back(ABIName.data()); - +// Select the MIPS float ABI as determined by -msoft-float, -mhard-float, +// and -mfloat-abi=. +static StringRef getMipsFloatABI(const Driver &D, const ArgList &Args) { // Select the float ABI as determined by -msoft-float, -mhard-float, // and -mfloat-abi=. StringRef FloatABI; @@ -851,8 +861,7 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, else { FloatABI = A->getValue(Args); if (FloatABI != "soft" && FloatABI != "single" && FloatABI != "hard") { - D.Diag(diag::err_drv_invalid_mfloat_abi) - << A->getAsString(Args); + D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); FloatABI = "hard"; } } @@ -866,6 +875,38 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, FloatABI = "hard"; } + return FloatABI; +} + +static void AddTargetFeature(const ArgList &Args, + ArgStringList &CmdArgs, + OptSpecifier OnOpt, + OptSpecifier OffOpt, + StringRef FeatureName) { + if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) { + CmdArgs.push_back("-target-feature"); + if (A->getOption().matches(OnOpt)) + CmdArgs.push_back(Args.MakeArgString("+" + FeatureName)); + else + CmdArgs.push_back(Args.MakeArgString("-" + FeatureName)); + } +} + +void Clang::AddMIPSTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); + StringRef CPUName; + StringRef ABIName; + getMipsCPUAndABI(Args, getToolChain(), CPUName, ABIName); + + CmdArgs.push_back("-target-cpu"); + CmdArgs.push_back(CPUName.data()); + + CmdArgs.push_back("-target-abi"); + CmdArgs.push_back(ABIName.data()); + + StringRef FloatABI = getMipsFloatABI(D, Args); + if (FloatABI == "soft") { // Floating point operations and argument passing are soft. CmdArgs.push_back("-msoft-float"); @@ -890,6 +931,82 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args, CmdArgs.push_back("-mfloat-abi"); CmdArgs.push_back("hard"); } + + AddTargetFeature(Args, CmdArgs, + options::OPT_mips16, options::OPT_mno_mips16, + "mips16"); + AddTargetFeature(Args, CmdArgs, + options::OPT_mdsp, options::OPT_mno_dsp, + "dsp"); + AddTargetFeature(Args, CmdArgs, + options::OPT_mdspr2, options::OPT_mno_dspr2, + "dspr2"); +} + +/// getPPCTargetCPU - Get the (LLVM) name of the PowerPC cpu we are targeting. +static std::string getPPCTargetCPU(const ArgList &Args) { + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + StringRef CPUName = A->getValue(Args); + + if (CPUName == "native") { + std::string CPU = llvm::sys::getHostCPUName(); + if (!CPU.empty() && CPU != "generic") + return CPU; + else + return ""; + } + + return llvm::StringSwitch(CPUName) + .Case("common", "generic") + .Case("440", "440") + .Case("440fp", "440") + .Case("450", "450") + .Case("601", "601") + .Case("602", "602") + .Case("603", "603") + .Case("603e", "603e") + .Case("603ev", "603ev") + .Case("604", "604") + .Case("604e", "604e") + .Case("620", "620") + .Case("G3", "g3") + .Case("7400", "7400") + .Case("G4", "g4") + .Case("7450", "7450") + .Case("G4+", "g4+") + .Case("750", "750") + .Case("970", "970") + .Case("G5", "g5") + .Case("a2", "a2") + .Case("power6", "pwr6") + .Case("power7", "pwr7") + .Case("powerpc", "ppc") + .Case("powerpc64", "ppc64") + .Default(""); + } + + return ""; +} + +void Clang::AddPPCTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + std::string TargetCPUName = getPPCTargetCPU(Args); + + // LLVM may default to generating code for the native CPU, + // but, like gcc, we default to a more generic option for + // each architecture. (except on Darwin) + llvm::Triple Triple = getToolChain().getTriple(); + if (TargetCPUName.empty() && !Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::ppc64) + TargetCPUName = "ppc64"; + else + TargetCPUName = "ppc"; + } + + if (!TargetCPUName.empty()) { + CmdArgs.push_back("-target-cpu"); + CmdArgs.push_back(Args.MakeArgString(TargetCPUName.c_str())); + } } void Clang::AddSparcTargetArgs(const ArgList &Args, @@ -958,7 +1075,7 @@ void Clang::AddX86TargetArgs(const ArgList &Args, // FIXME: We should also incorporate the detected target features for use // with -native. std::string CPU = llvm::sys::getHostCPUName(); - if (!CPU.empty()) + if (!CPU.empty() && CPU != "generic") CPUName = Args.MakeArgString(CPU); } else CPUName = A->getValue(Args); @@ -982,6 +1099,11 @@ void Clang::AddX86TargetArgs(const ArgList &Args, CPUName = "x86-64"; else if (getToolChain().getArch() == llvm::Triple::x86) CPUName = "i486"; + } else if (getToolChain().getOS().startswith("bitrig")) { + if (getToolChain().getArch() == llvm::Triple::x86_64) + CPUName = "x86-64"; + else if (getToolChain().getArch() == llvm::Triple::x86) + CPUName = "i686"; } else if (getToolChain().getOS().startswith("freebsd")) { if (getToolChain().getArch() == llvm::Triple::x86_64) CPUName = "x86-64"; @@ -1088,7 +1210,7 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, CmdArgs.push_back("-fno-signed-char"); CmdArgs.push_back("-nobuiltininc"); - if (Args.hasArg(options::OPT_mqdsp6_compat)) + if (Args.hasArg(options::OPT_mqdsp6_compat)) CmdArgs.push_back("-mqdsp6-compat"); if (Arg *A = Args.getLastArg(options::OPT_G, @@ -1100,18 +1222,23 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, A->claim(); } + if (!Args.hasArg(options::OPT_fno_short_enums)) + CmdArgs.push_back("-fshort-enums"); + if (Args.getLastArg(options::OPT_mieee_rnd_near)) { + CmdArgs.push_back ("-mllvm"); + CmdArgs.push_back ("-enable-hexagon-ieee-rnd-near"); + } CmdArgs.push_back ("-mllvm"); CmdArgs.push_back ("-machine-sink-split=0"); } static bool -shouldUseExceptionTablesForObjCExceptions(unsigned objcABIVersion, +shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, const llvm::Triple &Triple) { // We use the zero-cost exception tables for Objective-C if the non-fragile // ABI is enabled or when compiling for x86_64 and ARM on Snow Leopard and // later. - - if (objcABIVersion >= 2) + if (runtime.isNonFragile()) return true; if (!Triple.isOSDarwin()) @@ -1130,7 +1257,7 @@ shouldUseExceptionTablesForObjCExceptions(unsigned objcABIVersion, static void addExceptionArgs(const ArgList &Args, types::ID InputType, const llvm::Triple &Triple, bool KernelOrKext, - unsigned objcABIVersion, + const ObjCRuntime &objcRuntime, ArgStringList &CmdArgs) { if (KernelOrKext) { // -mkernel and -fapple-kext imply no exceptions, so claim exception related @@ -1176,7 +1303,7 @@ static void addExceptionArgs(const ArgList &Args, types::ID InputType, CmdArgs.push_back("-fobjc-exceptions"); ShouldUseExceptionTables |= - shouldUseExceptionTablesForObjCExceptions(objcABIVersion, Triple); + shouldUseExceptionTablesForObjCExceptions(objcRuntime, Triple); } if (types::isCXX(InputType)) { @@ -1269,22 +1396,56 @@ static bool UseRelaxAll(Compilation &C, const ArgList &Args) { /// This needs to be called before we add the C run-time (malloc, etc). static void addAsanRTLinux(const ToolChain &TC, const ArgList &Args, ArgStringList &CmdArgs) { - // Add asan linker flags when linking an executable, but not a shared object. - if (Args.hasArg(options::OPT_shared) || - !Args.hasFlag(options::OPT_faddress_sanitizer, + if (!Args.hasFlag(options::OPT_faddress_sanitizer, options::OPT_fno_address_sanitizer, false)) return; + if(TC.getTriple().getEnvironment() == llvm::Triple::ANDROIDEABI) { + if (!Args.hasArg(options::OPT_shared)) { + if (!Args.hasArg(options::OPT_pie)) + TC.getDriver().Diag(diag::err_drv_asan_android_requires_pie); + // For an executable, we add a .preinit_array stub. + CmdArgs.push_back("-u"); + CmdArgs.push_back("__asan_preinit"); + CmdArgs.push_back("-lasan"); + } - // LibAsan is "libclang_rt.asan-.a" in the Linux library resource - // directory. - SmallString<128> LibAsan(TC.getDriver().ResourceDir); - llvm::sys::path::append(LibAsan, "lib", "linux", - (Twine("libclang_rt.asan-") + - TC.getArchName() + ".a")); - CmdArgs.push_back(Args.MakeArgString(LibAsan)); - CmdArgs.push_back("-lpthread"); - CmdArgs.push_back("-ldl"); - CmdArgs.push_back("-export-dynamic"); + CmdArgs.push_back("-lasan_preload"); + CmdArgs.push_back("-ldl"); + } else { + if (!Args.hasArg(options::OPT_shared)) { + // LibAsan is "libclang_rt.asan-.a" in the Linux library + // resource directory. + SmallString<128> LibAsan(TC.getDriver().ResourceDir); + llvm::sys::path::append(LibAsan, "lib", "linux", + (Twine("libclang_rt.asan-") + + TC.getArchName() + ".a")); + CmdArgs.push_back(Args.MakeArgString(LibAsan)); + CmdArgs.push_back("-lpthread"); + CmdArgs.push_back("-ldl"); + CmdArgs.push_back("-export-dynamic"); + } + } +} + +/// If ThreadSanitizer is enabled, add appropriate linker flags (Linux). +/// This needs to be called before we add the C run-time (malloc, etc). +static void addTsanRTLinux(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { + if (!Args.hasFlag(options::OPT_fthread_sanitizer, + options::OPT_fno_thread_sanitizer, false)) + return; + if (!Args.hasArg(options::OPT_shared)) { + // LibTsan is "libclang_rt.tsan-.a" in the Linux library + // resource directory. + SmallString<128> LibTsan(TC.getDriver().ResourceDir); + llvm::sys::path::append(LibTsan, "lib", "linux", + (Twine("libclang_rt.tsan-") + + TC.getArchName() + ".a")); + CmdArgs.push_back(Args.MakeArgString(LibTsan)); + CmdArgs.push_back("-lpthread"); + CmdArgs.push_back("-ldl"); + CmdArgs.push_back("-export-dynamic"); + } } static bool shouldUseFramePointer(const ArgList &Args, @@ -1328,8 +1489,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(TripleStr)); // Select the appropriate action. - bool IsRewriter = false; - bool IsModernRewriter = false; + RewriteKind rewriteKind = RK_None; if (isa(JA)) { assert(JA.getType() == types::TY_Plist && "Invalid output type."); @@ -1380,7 +1540,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Use PCH if the user requested it. bool UsePCH = D.CCCUsePCH; - if (UsePCH) + if (JA.getType() == types::TY_Nothing) + CmdArgs.push_back("-fsyntax-only"); + else if (UsePCH) CmdArgs.push_back("-emit-pch"); else CmdArgs.push_back("-emit-pth"); @@ -1401,10 +1563,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-emit-pch"); } else if (JA.getType() == types::TY_RewrittenObjC) { CmdArgs.push_back("-rewrite-objc"); - IsModernRewriter = true; + rewriteKind = RK_NonFragile; } else if (JA.getType() == types::TY_RewrittenLegacyObjC) { CmdArgs.push_back("-rewrite-objc"); - IsRewriter = true; + rewriteKind = RK_Fragile; } else { assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!"); @@ -1488,22 +1650,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // This comes from the default translation the driver + cc1 // would do to enable flag_pic. - // - // FIXME: Centralize this code. - Arg *LastPICArg = 0; - for (ArgList::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I) { - if ((*I)->getOption().matches(options::OPT_fPIC) || - (*I)->getOption().matches(options::OPT_fno_PIC) || - (*I)->getOption().matches(options::OPT_fpic) || - (*I)->getOption().matches(options::OPT_fno_pic) || - (*I)->getOption().matches(options::OPT_fPIE) || - (*I)->getOption().matches(options::OPT_fno_PIE) || - (*I)->getOption().matches(options::OPT_fpie) || - (*I)->getOption().matches(options::OPT_fno_pie)) { - LastPICArg = *I; - (*I)->claim(); - } - } + + Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, + options::OPT_fpic, options::OPT_fno_pic, + options::OPT_fPIE, options::OPT_fno_PIE, + options::OPT_fpie, options::OPT_fno_pie); bool PICDisabled = false; bool PICEnabled = false; bool PICForPIE = false; @@ -1606,16 +1757,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, A->getOption().getID() != options::OPT_fhonor_nans) CmdArgs.push_back("-menable-no-nans"); - // -fno-math-errno is default. - bool MathErrno = false; + // -fmath-errno is the default on some platforms, e.g. BSD-derived OSes. + bool MathErrno = getToolChain().IsMathErrnoDefault(); if (Arg *A = Args.getLastArg(options::OPT_ffast_math, options::OPT_fmath_errno, - options::OPT_fno_math_errno)) { - if (A->getOption().getID() == options::OPT_fmath_errno) { - CmdArgs.push_back("-fmath-errno"); - MathErrno = true; - } - } + options::OPT_fno_math_errno)) + MathErrno = A->getOption().getID() == options::OPT_fmath_errno; + if (MathErrno) + CmdArgs.push_back("-fmath-errno"); // There are several flags which require disabling very specific // optimizations. Any of these being disabled forces us to turn off the @@ -1661,12 +1810,33 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, !TrappingMath) CmdArgs.push_back("-menable-unsafe-fp-math"); - // We separately look for the '-ffast-math' flag, and if we find it, tell the - // frontend to provide the appropriate preprocessor macros. This is distinct - // from enabling any optimizations as it induces a language change which must - // survive serialization and deserialization, etc. + + // Validate and pass through -fp-contract option. + if (Arg *A = Args.getLastArg(options::OPT_ffast_math, + options::OPT_ffp_contract)) { + if (A->getOption().getID() == options::OPT_ffp_contract) { + StringRef Val = A->getValue(Args); + if (Val == "fast" || Val == "on" || Val == "off") { + CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + Val)); + } else { + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << Val; + } + } else { // A is OPT_ffast_math + // If fast-math is set then set the fp-contract mode to fast. + CmdArgs.push_back(Args.MakeArgString("-ffp-contract=fast")); + } + } + + // We separately look for the '-ffast-math' and '-ffinite-math-only' flags, + // and if we find them, tell the frontend to provide the appropriate + // preprocessor macros. This is distinct from enabling any optimizations as + // these options induce language changes which must survive serialization + // and deserialization, etc. if (Args.hasArg(options::OPT_ffast_math)) CmdArgs.push_back("-ffast-math"); + if (Args.hasArg(options::OPT_ffinite_math_only)) + CmdArgs.push_back("-ffinite-math-only"); // Decide whether to use verbose asm. Verbose assembly is the default on // toolchains which have the integrated assembler on by default. @@ -1711,6 +1881,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, AsynchronousUnwindTables)) CmdArgs.push_back("-munwind-tables"); + getToolChain().addClangTargetOptions(CmdArgs); + if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); CmdArgs.push_back(A->getValue(Args)); @@ -1741,6 +1913,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, AddMIPSTargetArgs(Args, CmdArgs); break; + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + AddPPCTargetArgs(Args, CmdArgs); + break; + case llvm::Triple::sparc: AddSparcTargetArgs(Args, CmdArgs); break; @@ -1800,13 +1977,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, D.CCLogDiagnosticsFilename : "-"); } - // Special case debug options to only pass -g to clang. This is - // wrong. + // Use the last option from "-g" group. "-gline-tables-only" is + // preserved, all other debug options are substituted with "-g". Args.ClaimAllArgs(options::OPT_g_Group); - if (Arg *A = Args.getLastArg(options::OPT_g_Group)) - if (!A->getOption().matches(options::OPT_g0)) { + if (Arg *A = Args.getLastArg(options::OPT_g_Group)) { + if (A->getOption().matches(options::OPT_gline_tables_only)) { + CmdArgs.push_back("-gline-tables-only"); + } else if (!A->getOption().matches(options::OPT_g0) && + !A->getOption().matches(options::OPT_ggdb0)) { CmdArgs.push_back("-g"); } + } + + // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now. + Args.ClaimAllArgs(options::OPT_g_flags_Group); Args.AddAllArgs(CmdArgs, options::OPT_ffunction_sections); Args.AddAllArgs(CmdArgs, options::OPT_fdata_sections); @@ -1917,7 +2101,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } Args.AddAllArgs(CmdArgs, options::OPT_W_Group); - Args.AddLastArg(CmdArgs, options::OPT_pedantic); + if (Args.hasFlag(options::OPT_pedantic, options::OPT_no_pedantic, false)) + CmdArgs.push_back("-pedantic"); Args.AddLastArg(CmdArgs, options::OPT_pedantic_errors); Args.AddLastArg(CmdArgs, options::OPT_w); @@ -2007,11 +2192,20 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_Wlarge_by_value_copy_EQ, options::OPT_Wlarge_by_value_copy_def)) { - CmdArgs.push_back("-Wlarge-by-value-copy"); - if (A->getNumValues()) - CmdArgs.push_back(A->getValue(Args)); - else - CmdArgs.push_back("64"); // default value for -Wlarge-by-value-copy. + if (A->getNumValues()) { + StringRef bytes = A->getValue(Args); + CmdArgs.push_back(Args.MakeArgString("-Wlarge-by-value-copy=" + bytes)); + } else + CmdArgs.push_back("-Wlarge-by-value-copy=64"); // default value + } + + if (Arg *A = Args.getLastArg(options::OPT_fbounds_checking, + options::OPT_fbounds_checking_EQ)) { + if (A->getNumValues()) { + StringRef val = A->getValue(Args); + CmdArgs.push_back(Args.MakeArgString("-fbounds-checking=" + val)); + } else + CmdArgs.push_back("-fbounds-checking=1"); } if (Args.hasArg(options::OPT__relocatable_pch)) @@ -2066,6 +2260,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden); + Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ); + // -fhosted is default. if (Args.hasFlag(options::OPT_ffreestanding, options::OPT_fhosted, false) || KernelOrKext) @@ -2079,6 +2275,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fno_limit_debug_info); Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names); Args.AddLastArg(CmdArgs, options::OPT_faltivec); + Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree); + Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type); // Report and error for -faltivec on anything other then PowerPC. if (const Arg *A = Args.getLastArg(options::OPT_faltivec)) @@ -2106,6 +2304,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.getLastArg(options::OPT_fapple_kext)) CmdArgs.push_back("-fapple-kext"); + if (Args.hasFlag(options::OPT_frewrite_includes, + options::OPT_fno_rewrite_includes, false)) + CmdArgs.push_back("-frewrite-includes"); + Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_print_source_range_info); Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_parseable_fixits); @@ -2259,6 +2461,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().getTriple().getOS() == llvm::Triple::Win32)) CmdArgs.push_back("-fms-extensions"); + // -fms-inline-asm. + if (Args.hasArg(options::OPT_fenable_experimental_ms_inline_asm)) + CmdArgs.push_back("-fenable-experimental-ms-inline-asm"); + // -fms-compatibility=0 is default. if (Args.hasFlag(options::OPT_fms_compatibility, options::OPT_fno_ms_compatibility, @@ -2309,83 +2515,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fno_inline_functions)) CmdArgs.push_back("-fno-inline-functions"); - // -fobjc-nonfragile-abi=0 is default. - ObjCRuntime objCRuntime; - unsigned objcABIVersion = 0; - bool NeXTRuntimeIsDefault - = (IsRewriter || IsModernRewriter || - getToolChain().getTriple().isOSDarwin()); - if (Args.hasFlag(options::OPT_fnext_runtime, options::OPT_fgnu_runtime, - NeXTRuntimeIsDefault)) { - objCRuntime.setKind(ObjCRuntime::NeXT); - } else { - CmdArgs.push_back("-fgnu-runtime"); - objCRuntime.setKind(ObjCRuntime::GNU); - } - getToolChain().configureObjCRuntime(objCRuntime); - if (objCRuntime.HasARC) - CmdArgs.push_back("-fobjc-runtime-has-arc"); - if (objCRuntime.HasWeak) - CmdArgs.push_back("-fobjc-runtime-has-weak"); - if (objCRuntime.HasTerminate) - CmdArgs.push_back("-fobjc-runtime-has-terminate"); - - // Compute the Objective-C ABI "version" to use. Version numbers are - // slightly confusing for historical reasons: - // 1 - Traditional "fragile" ABI - // 2 - Non-fragile ABI, version 1 - // 3 - Non-fragile ABI, version 2 - objcABIVersion = 1; - // If -fobjc-abi-version= is present, use that to set the version. - if (Arg *A = Args.getLastArg(options::OPT_fobjc_abi_version_EQ)) { - if (StringRef(A->getValue(Args)) == "1") - objcABIVersion = 1; - else if (StringRef(A->getValue(Args)) == "2") - objcABIVersion = 2; - else if (StringRef(A->getValue(Args)) == "3") - objcABIVersion = 3; - else - D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args); - } else { - // Otherwise, determine if we are using the non-fragile ABI. - bool NonFragileABIIsDefault = - (IsModernRewriter || - (!IsRewriter && getToolChain().IsObjCNonFragileABIDefault())); - if (Args.hasFlag(options::OPT_fobjc_nonfragile_abi, - options::OPT_fno_objc_nonfragile_abi, - NonFragileABIIsDefault)) { - // Determine the non-fragile ABI version to use. -#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO - unsigned NonFragileABIVersion = 1; -#else - unsigned NonFragileABIVersion = 2; -#endif - - if (Arg *A = Args.getLastArg( - options::OPT_fobjc_nonfragile_abi_version_EQ)) { - if (StringRef(A->getValue(Args)) == "1") - NonFragileABIVersion = 1; - else if (StringRef(A->getValue(Args)) == "2") - NonFragileABIVersion = 2; - else - D.Diag(diag::err_drv_clang_unsupported) - << A->getAsString(Args); - } - - objcABIVersion = 1 + NonFragileABIVersion; - } else { - objcABIVersion = 1; - } - } + ObjCRuntime objcRuntime = AddObjCRuntimeArgs(Args, CmdArgs, rewriteKind); - if (objcABIVersion == 1) { - CmdArgs.push_back("-fobjc-fragile-abi"); - } else { - // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and - // legacy is the default. + // -fobjc-dispatch-method is only relevant with the nonfragile-abi, and + // legacy is the default. + if (objcRuntime.isNonFragile()) { if (!Args.hasFlag(options::OPT_fobjc_legacy_dispatch, options::OPT_fno_objc_legacy_dispatch, - getToolChain().IsObjCLegacyDispatchDefault())) { + objcRuntime.isLegacyDispatchDefaultForArch( + getToolChain().getTriple().getArch()))) { if (getToolChain().UseObjCMixedDispatch()) CmdArgs.push_back("-fobjc-dispatch-method=mixed"); else @@ -2428,7 +2566,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // -fobjc-infer-related-result-type is the default, except in the Objective-C // rewriter. - if (IsRewriter || IsModernRewriter) + if (rewriteKind != RK_None) CmdArgs.push_back("-fno-objc-infer-related-result-type"); // Handle -fobjc-gc and -fobjc-gc-only. They are exclusive, and -fobjc-gc-only @@ -2451,7 +2589,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Add exception args. addExceptionArgs(Args, InputType, getToolChain().getTriple(), - KernelOrKext, objcABIVersion, CmdArgs); + KernelOrKext, objcRuntime, CmdArgs); if (getToolChain().UseSjLjExceptions()) CmdArgs.push_back("-fsjlj-exceptions"); @@ -2490,12 +2628,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Honor -fpack-struct= and -fpack-struct, if given. Note that // -fno-pack-struct doesn't apply to -fpack-struct=. if (Arg *A = Args.getLastArg(options::OPT_fpack_struct_EQ)) { - CmdArgs.push_back("-fpack-struct"); - CmdArgs.push_back(A->getValue(Args)); + std::string PackStructStr = "-fpack-struct="; + PackStructStr += A->getValue(Args); + CmdArgs.push_back(Args.MakeArgString(PackStructStr)); } else if (Args.hasFlag(options::OPT_fpack_struct, options::OPT_fno_pack_struct, false)) { - CmdArgs.push_back("-fpack-struct"); - CmdArgs.push_back("1"); + CmdArgs.push_back("-fpack-struct=1"); } if (Args.hasArg(options::OPT_mkernel) || @@ -2729,7 +2867,7 @@ void ClangAs::AddARMTargetArgs(const ArgList &Args, // Set the CPU based on -march= and -mcpu=. CmdArgs.push_back("-target-cpu"); - CmdArgs.push_back(getARMTargetCPU(Args, Triple)); + CmdArgs.push_back(Args.MakeArgString(getARMTargetCPU(Args, Triple))); // Honor -mfpu=. if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) @@ -2740,6 +2878,131 @@ void ClangAs::AddARMTargetArgs(const ArgList &Args, addFPMathArgs(D, A, Args, CmdArgs, getARMTargetCPU(Args, Triple)); } +/// Add options related to the Objective-C runtime/ABI. +/// +/// Returns true if the runtime is non-fragile. +ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args, + ArgStringList &cmdArgs, + RewriteKind rewriteKind) const { + // Look for the controlling runtime option. + Arg *runtimeArg = args.getLastArg(options::OPT_fnext_runtime, + options::OPT_fgnu_runtime, + options::OPT_fobjc_runtime_EQ); + + // Just forward -fobjc-runtime= to the frontend. This supercedes + // options about fragility. + if (runtimeArg && + runtimeArg->getOption().matches(options::OPT_fobjc_runtime_EQ)) { + ObjCRuntime runtime; + StringRef value = runtimeArg->getValue(args); + if (runtime.tryParse(value)) { + getToolChain().getDriver().Diag(diag::err_drv_unknown_objc_runtime) + << value; + } + + runtimeArg->render(args, cmdArgs); + return runtime; + } + + // Otherwise, we'll need the ABI "version". Version numbers are + // slightly confusing for historical reasons: + // 1 - Traditional "fragile" ABI + // 2 - Non-fragile ABI, version 1 + // 3 - Non-fragile ABI, version 2 + unsigned objcABIVersion = 1; + // If -fobjc-abi-version= is present, use that to set the version. + if (Arg *abiArg = args.getLastArg(options::OPT_fobjc_abi_version_EQ)) { + StringRef value = abiArg->getValue(args); + if (value == "1") + objcABIVersion = 1; + else if (value == "2") + objcABIVersion = 2; + else if (value == "3") + objcABIVersion = 3; + else + getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) + << value; + } else { + // Otherwise, determine if we are using the non-fragile ABI. + bool nonFragileABIIsDefault = + (rewriteKind == RK_NonFragile || + (rewriteKind == RK_None && + getToolChain().IsObjCNonFragileABIDefault())); + if (args.hasFlag(options::OPT_fobjc_nonfragile_abi, + options::OPT_fno_objc_nonfragile_abi, + nonFragileABIIsDefault)) { + // Determine the non-fragile ABI version to use. +#ifdef DISABLE_DEFAULT_NONFRAGILEABI_TWO + unsigned nonFragileABIVersion = 1; +#else + unsigned nonFragileABIVersion = 2; +#endif + + if (Arg *abiArg = args.getLastArg( + options::OPT_fobjc_nonfragile_abi_version_EQ)) { + StringRef value = abiArg->getValue(args); + if (value == "1") + nonFragileABIVersion = 1; + else if (value == "2") + nonFragileABIVersion = 2; + else + getToolChain().getDriver().Diag(diag::err_drv_clang_unsupported) + << value; + } + + objcABIVersion = 1 + nonFragileABIVersion; + } else { + objcABIVersion = 1; + } + } + + // We don't actually care about the ABI version other than whether + // it's non-fragile. + bool isNonFragile = objcABIVersion != 1; + + // If we have no runtime argument, ask the toolchain for its default runtime. + // However, the rewriter only really supports the Mac runtime, so assume that. + ObjCRuntime runtime; + if (!runtimeArg) { + switch (rewriteKind) { + case RK_None: + runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); + break; + case RK_Fragile: + runtime = ObjCRuntime(ObjCRuntime::FragileMacOSX, VersionTuple()); + break; + case RK_NonFragile: + runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); + break; + } + + // -fnext-runtime + } else if (runtimeArg->getOption().matches(options::OPT_fnext_runtime)) { + // On Darwin, make this use the default behavior for the toolchain. + if (getToolChain().getTriple().isOSDarwin()) { + runtime = getToolChain().getDefaultObjCRuntime(isNonFragile); + + // Otherwise, build for a generic macosx port. + } else { + runtime = ObjCRuntime(ObjCRuntime::MacOSX, VersionTuple()); + } + + // -fgnu-runtime + } else { + assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime)); + // Legacy behaviour is to target the gnustep runtime if we are i + // non-fragile mode or the GCC runtime in fragile mode. + if (isNonFragile) + runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple()); + else + runtime = ObjCRuntime(ObjCRuntime::GCC, VersionTuple()); + } + + cmdArgs.push_back(args.MakeArgString( + "-fobjc-runtime=" + runtime.getAsString())); + return runtime; +} + void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -4024,9 +4287,6 @@ void darwin::Link::AddLinkArgs(Compilation &C, } else if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) { CmdArgs.push_back("-syslibroot"); CmdArgs.push_back(A->getValue(Args)); - } else if (getDarwinToolChain().isTargetIPhoneOS()) { - CmdArgs.push_back("-syslibroot"); - CmdArgs.push_back("/Developer/SDKs/Extra"); } Args.AddLastArg(CmdArgs, options::OPT_twolevel__namespace); @@ -4085,7 +4345,6 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_t); Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag); Args.AddAllArgs(CmdArgs, options::OPT_u_Group); - Args.AddAllArgs(CmdArgs, options::OPT_A); Args.AddLastArg(CmdArgs, options::OPT_e); Args.AddAllArgs(CmdArgs, options::OPT_m_Separate); Args.AddAllArgs(CmdArgs, options::OPT_r); @@ -4099,8 +4358,7 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - if (!Args.hasArg(options::OPT_A) && - !Args.hasArg(options::OPT_nostdlib) && + if (!Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nostartfiles)) { // Derived from startfile spec. if (Args.hasArg(options::OPT_dynamiclib)) { @@ -4144,6 +4402,14 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA, // darwin_crt2 spec is empty. } + // By default on OS X 10.8 and later, we don't link with a crt1.o + // file and the linker knows to use _main as the entry point. But, + // when compiling with -pg, we need to link with the gcrt1.o file, + // so pass the -no_new_main option to tell the linker to use the + // "start" symbol as the entry point. + if (getDarwinToolChain().isTargetMacOS() && + !getDarwinToolChain().isMacosxVersionLT(10, 8)) + CmdArgs.push_back("-no_new_main"); } else { if (Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_object) || @@ -4201,30 +4467,30 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA, // This is more complicated in gcc... CmdArgs.push_back("-lgomp"); - getDarwinToolChain().AddLinkSearchPathArgs(Args, CmdArgs); - - if (isObjCRuntimeLinked(Args)) { + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); + + if (isObjCRuntimeLinked(Args) && + !Args.hasArg(options::OPT_nostdlib) && + !Args.hasArg(options::OPT_nodefaultlibs)) { // Avoid linking compatibility stubs on i386 mac. if (!getDarwinToolChain().isTargetMacOS() || getDarwinToolChain().getArchName() != "i386") { // If we don't have ARC or subscripting runtime support, link in the // runtime stubs. We have to do this *before* adding any of the normal // linker inputs so that its initializer gets run first. - ObjCRuntime runtime; - getDarwinToolChain().configureObjCRuntime(runtime); + ObjCRuntime runtime = + getDarwinToolChain().getDefaultObjCRuntime(/*nonfragile*/ true); // We use arclite library for both ARC and subscripting support. - if ((!runtime.HasARC && isObjCAutoRefCount(Args)) || - !runtime.HasSubscripting) + if ((!runtime.hasARC() && isObjCAutoRefCount(Args)) || + !runtime.hasSubscripting()) getDarwinToolChain().AddLinkARCArgs(Args, CmdArgs); - CmdArgs.push_back("-framework"); - CmdArgs.push_back("Foundation"); } + CmdArgs.push_back("-framework"); + CmdArgs.push_back("Foundation"); // Link libobj. CmdArgs.push_back("-lobjc"); } - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); - if (LinkingOutput) { CmdArgs.push_back("-arch_multiple"); CmdArgs.push_back("-final_output"); @@ -4245,8 +4511,7 @@ void darwin::Link::ConstructJob(Compilation &C, const JobAction &JA, getDarwinToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs); } - if (!Args.hasArg(options::OPT_A) && - !Args.hasArg(options::OPT_nostdlib) && + if (!Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nostartfiles)) { // endfile_spec is empty. } @@ -4698,6 +4963,142 @@ void openbsd::Link::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(new Command(JA, *this, Exec, CmdArgs)); } +void bitrig::Assemble::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, + options::OPT_Xassembler); + + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + for (InputInfoList::const_iterator + it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) { + const InputInfo &II = *it; + CmdArgs.push_back(II.getFilename()); + } + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("as")); + C.addCommand(new Command(JA, *this, Exec, CmdArgs)); +} + +void bitrig::Link::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + const Driver &D = getToolChain().getDriver(); + ArgStringList CmdArgs; + + if ((!Args.hasArg(options::OPT_nostdlib)) && + (!Args.hasArg(options::OPT_shared))) { + CmdArgs.push_back("-e"); + CmdArgs.push_back("__start"); + } + + if (Args.hasArg(options::OPT_static)) { + CmdArgs.push_back("-Bstatic"); + } else { + if (Args.hasArg(options::OPT_rdynamic)) + CmdArgs.push_back("-export-dynamic"); + CmdArgs.push_back("--eh-frame-hdr"); + CmdArgs.push_back("-Bdynamic"); + if (Args.hasArg(options::OPT_shared)) { + CmdArgs.push_back("-shared"); + } else { + CmdArgs.push_back("-dynamic-linker"); + CmdArgs.push_back("/usr/libexec/ld.so"); + } + } + + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + } else { + assert(Output.isNothing() && "Invalid output."); + } + + if (!Args.hasArg(options::OPT_nostdlib) && + !Args.hasArg(options::OPT_nostartfiles)) { + if (!Args.hasArg(options::OPT_shared)) { + if (Args.hasArg(options::OPT_pg)) + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("gcrt0.o"))); + else + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("crt0.o"))); + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("crtbegin.o"))); + } else { + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("crtbeginS.o"))); + } + } + + Args.AddAllArgs(CmdArgs, options::OPT_L); + Args.AddAllArgs(CmdArgs, options::OPT_T_Group); + Args.AddAllArgs(CmdArgs, options::OPT_e); + + AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); + + if (!Args.hasArg(options::OPT_nostdlib) && + !Args.hasArg(options::OPT_nodefaultlibs)) { + if (D.CCCIsCXX) { + getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); + if (Args.hasArg(options::OPT_pg)) + CmdArgs.push_back("-lm_p"); + else + CmdArgs.push_back("-lm"); + } + + if (Args.hasArg(options::OPT_pthread)) + CmdArgs.push_back("-lpthread"); + if (!Args.hasArg(options::OPT_shared)) { + if (Args.hasArg(options::OPT_pg)) + CmdArgs.push_back("-lc_p"); + else + CmdArgs.push_back("-lc"); + } + + std::string myarch = "-lclang_rt."; + const llvm::Triple &T = getToolChain().getTriple(); + llvm::Triple::ArchType Arch = T.getArch(); + switch (Arch) { + case llvm::Triple::arm: + myarch += ("arm"); + break; + case llvm::Triple::x86: + myarch += ("i386"); + break; + case llvm::Triple::x86_64: + myarch += ("amd64"); + break; + default: + assert(0 && "Unsupported architecture"); + } + CmdArgs.push_back(Args.MakeArgString(myarch)); + } + + if (!Args.hasArg(options::OPT_nostdlib) && + !Args.hasArg(options::OPT_nostartfiles)) { + if (!Args.hasArg(options::OPT_shared)) + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("crtend.o"))); + else + CmdArgs.push_back(Args.MakeArgString( + getToolChain().GetFilePath("crtendS.o"))); + } + + const char *Exec = + Args.MakeArgString(getToolChain().GetProgramPath("ld")); + C.addCommand(new Command(JA, *this, Exec, CmdArgs)); +} + void freebsd::Assemble::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -4744,6 +5145,14 @@ void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA, const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; + // Silence warning for "clang -g foo.o -o foo" + Args.ClaimAllArgs(options::OPT_g_Group); + // and "clang -emit-llvm foo.o -o foo" + Args.ClaimAllArgs(options::OPT_emit_llvm); + // and for "clang -w foo.o -o foo". Other warning options are already + // handled somewhere else. + Args.ClaimAllArgs(options::OPT_w); + if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); @@ -4759,6 +5168,14 @@ void freebsd::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back("/libexec/ld-elf.so.1"); } + if (getToolChain().getTriple().getOSMajorVersion() >= 9) { + llvm::Triple::ArchType Arch = getToolChain().getArch(); + if (Arch == llvm::Triple::arm || Arch == llvm::Triple::sparc || + Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64) { + CmdArgs.push_back("--hash-style=both"); + } + } + CmdArgs.push_back("--enable-new-dtags"); } // When building 32-bit code on FreeBSD/amd64, we have to explicitly @@ -5064,6 +5481,14 @@ void linuxtools::Assemble::ConstructJob(Compilation &C, const JobAction &JA, StringRef MArch = getToolChain().getArchName(); if (MArch == "armv7" || MArch == "armv7a" || MArch == "armv7-a") CmdArgs.push_back("-mfpu=neon"); + + StringRef ARMFloatABI = getARMFloatABI(getToolChain().getDriver(), Args, + getToolChain().getTriple()); + CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=" + ARMFloatABI)); + + Args.AddLastArg(CmdArgs, options::OPT_march_EQ); + Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); + Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); } else if (getToolChain().getArch() == llvm::Triple::mips || getToolChain().getArch() == llvm::Triple::mipsel || getToolChain().getArch() == llvm::Triple::mips64 || @@ -5089,11 +5514,19 @@ void linuxtools::Assemble::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-EB"); else CmdArgs.push_back("-EL"); - } - Args.AddLastArg(CmdArgs, options::OPT_march_EQ); - Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); - Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); + Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC, + options::OPT_fpic, options::OPT_fno_pic, + options::OPT_fPIE, options::OPT_fno_PIE, + options::OPT_fpie, options::OPT_fno_pie); + if (LastPICArg && + (LastPICArg->getOption().matches(options::OPT_fPIC) || + LastPICArg->getOption().matches(options::OPT_fpic) || + LastPICArg->getOption().matches(options::OPT_fPIE) || + LastPICArg->getOption().matches(options::OPT_fpie))) { + CmdArgs.push_back("-KPIC"); + } + } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); @@ -5112,9 +5545,10 @@ void linuxtools::Assemble::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(new Command(JA, *this, Exec, CmdArgs)); } -static void AddLibgcc(const Driver &D, ArgStringList &CmdArgs, - const ArgList &Args) { - bool StaticLibgcc = Args.hasArg(options::OPT_static) || +static void AddLibgcc(llvm::Triple Triple, const Driver &D, + ArgStringList &CmdArgs, const ArgList &Args) { + bool isAndroid = Triple.getEnvironment() == llvm::Triple::ANDROIDEABI; + bool StaticLibgcc = isAndroid || Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_libgcc); if (!D.CCCIsCXX) CmdArgs.push_back("-lgcc"); @@ -5130,7 +5564,7 @@ static void AddLibgcc(const Driver &D, ArgStringList &CmdArgs, CmdArgs.push_back("--no-as-needed"); } - if (StaticLibgcc) + if (StaticLibgcc && !isAndroid) CmdArgs.push_back("-lgcc_eh"); else if (!Args.hasArg(options::OPT_shared) && D.CCCIsCXX) CmdArgs.push_back("-lgcc"); @@ -5144,13 +5578,16 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, const toolchains::Linux& ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); + const bool isAndroid = ToolChain.getTriple().getEnvironment() == + llvm::Triple::ANDROIDEABI; + ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); - // and for "clang -g foo.o -o foo". Other warning options are already + // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); @@ -5204,6 +5641,10 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-static"); } else if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); + if ((ToolChain.getArch() == llvm::Triple::arm + || ToolChain.getArch() == llvm::Triple::thumb) && isAndroid) { + CmdArgs.push_back("-Bsymbolic"); + } } if (ToolChain.getArch() == llvm::Triple::arm || @@ -5211,11 +5652,17 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, (!Args.hasArg(options::OPT_static) && !Args.hasArg(options::OPT_shared))) { CmdArgs.push_back("-dynamic-linker"); - if (ToolChain.getArch() == llvm::Triple::x86) + if (isAndroid) + CmdArgs.push_back("/system/bin/linker"); + else if (ToolChain.getArch() == llvm::Triple::x86) CmdArgs.push_back("/lib/ld-linux.so.2"); else if (ToolChain.getArch() == llvm::Triple::arm || - ToolChain.getArch() == llvm::Triple::thumb) - CmdArgs.push_back("/lib/ld-linux.so.3"); + ToolChain.getArch() == llvm::Triple::thumb) { + if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) + CmdArgs.push_back("/lib/ld-linux-armhf.so.3"); + else + CmdArgs.push_back("/lib/ld-linux.so.3"); + } else if (ToolChain.getArch() == llvm::Triple::mips || ToolChain.getArch() == llvm::Triple::mipsel) CmdArgs.push_back("/lib/ld.so.1"); @@ -5235,25 +5682,27 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib) && !Args.hasArg(options::OPT_nostartfiles)) { - const char *crt1 = NULL; - if (!Args.hasArg(options::OPT_shared)){ - if (Args.hasArg(options::OPT_pie)) - crt1 = "Scrt1.o"; - else - crt1 = "crt1.o"; - } - if (crt1) - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); + if (!isAndroid) { + const char *crt1 = NULL; + if (!Args.hasArg(options::OPT_shared)){ + if (Args.hasArg(options::OPT_pie)) + crt1 = "Scrt1.o"; + else + crt1 = "crt1.o"; + } + if (crt1) + CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); + CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); + } const char *crtbegin; if (Args.hasArg(options::OPT_static)) - crtbegin = "crtbeginT.o"; + crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o"; else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - crtbegin = "crtbeginS.o"; + crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o"; else - crtbegin = "crtbegin.o"; + crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin))); } @@ -5276,7 +5725,9 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs); - if (D.CCCIsCXX && !Args.hasArg(options::OPT_nostdlib)) { + if (D.CCCIsCXX && + !Args.hasArg(options::OPT_nostdlib) && + !Args.hasArg(options::OPT_nodefaultlibs)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) @@ -5289,34 +5740,37 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, // Call this before we add the C run-time. addAsanRTLinux(getToolChain(), Args, CmdArgs); + addTsanRTLinux(getToolChain(), Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib)) { - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--start-group"); + if (!Args.hasArg(options::OPT_nodefaultlibs)) { + if (Args.hasArg(options::OPT_static)) + CmdArgs.push_back("--start-group"); - AddLibgcc(D, CmdArgs, Args); + AddLibgcc(ToolChain.getTriple(), D, CmdArgs, Args); - if (Args.hasArg(options::OPT_pthread) || - Args.hasArg(options::OPT_pthreads)) - CmdArgs.push_back("-lpthread"); - - CmdArgs.push_back("-lc"); + if (Args.hasArg(options::OPT_pthread) || + Args.hasArg(options::OPT_pthreads)) + CmdArgs.push_back("-lpthread"); - if (Args.hasArg(options::OPT_static)) - CmdArgs.push_back("--end-group"); - else - AddLibgcc(D, CmdArgs, Args); + CmdArgs.push_back("-lc"); + if (Args.hasArg(options::OPT_static)) + CmdArgs.push_back("--end-group"); + else + AddLibgcc(ToolChain.getTriple(), D, CmdArgs, Args); + } if (!Args.hasArg(options::OPT_nostartfiles)) { const char *crtend; if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) - crtend = "crtendS.o"; + crtend = isAndroid ? "crtend_so.o" : "crtendS.o"; else - crtend = "crtend.o"; + crtend = isAndroid ? "crtend_android.o" : "crtend.o"; CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend))); - CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); + if (!isAndroid) + CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } @@ -5581,7 +6035,14 @@ void visualstudio::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-nologo"); - AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs); + Args.AddAllArgValues(CmdArgs, options::OPT_l); + + // Add filenames immediately. + for (InputInfoList::const_iterator + it = Inputs.begin(), ie = Inputs.end(); it != ie; ++it) { + if (it->isFilename()) + CmdArgs.push_back(it->getFilename()); + } const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("link.exe")); diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h index 651a8f2..999c57a 100644 --- a/lib/Driver/Tools.h +++ b/lib/Driver/Tools.h @@ -18,6 +18,8 @@ #include "llvm/Support/Compiler.h" namespace clang { + class ObjCRuntime; + namespace driver { class Driver; @@ -39,10 +41,16 @@ namespace tools { void AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, bool KernelOrKext) const; void AddMIPSTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const; + void AddPPCTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const; void AddSparcTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const; void AddX86TargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const; void AddHexagonTargetArgs (const ArgList &Args, ArgStringList &CmdArgs) const; + enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile }; + + ObjCRuntime AddObjCRuntimeArgs(const ArgList &args, ArgStringList &cmdArgs, + RewriteKind rewrite) const; + public: Clang(const ToolChain &TC) : Tool("clang", "clang frontend", TC) {} @@ -369,6 +377,36 @@ namespace openbsd { }; } // end namespace openbsd + /// bitrig -- Directly call GNU Binutils assembler and linker +namespace bitrig { + class LLVM_LIBRARY_VISIBILITY Assemble : public Tool { + public: + Assemble(const ToolChain &TC) : Tool("bitrig::Assemble", "assembler", + TC) {} + + virtual bool hasIntegratedCPP() const { return false; } + + virtual void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &TCArgs, + const char *LinkingOutput) const; + }; + class LLVM_LIBRARY_VISIBILITY Link : public Tool { + public: + Link(const ToolChain &TC) : Tool("bitrig::Link", "linker", TC) {} + + virtual bool hasIntegratedCPP() const { return false; } + virtual bool isLinkJob() const { return true; } + + virtual void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &TCArgs, + const char *LinkingOutput) const; + }; +} // end namespace bitrig + /// freebsd -- Directly call GNU Binutils assembler and linker namespace freebsd { class LLVM_LIBRARY_VISIBILITY Assemble : public Tool { diff --git a/lib/Driver/Types.cpp b/lib/Driver/Types.cpp index 50742fe..9d8fcfd 100644 --- a/lib/Driver/Types.cpp +++ b/lib/Driver/Types.cpp @@ -67,7 +67,8 @@ bool types::appendSuffixForType(ID Id) { bool types::canLipoType(ID Id) { return (Id == TY_Nothing || Id == TY_Image || - Id == TY_Object); + Id == TY_Object || + Id == TY_LTO_BC); } bool types::isAcceptedByClang(ID Id) { @@ -129,6 +130,7 @@ bool types::isCXX(ID Id) { case TY_ObjCXX: case TY_PP_ObjCXX: case TY_CXXHeader: case TY_PP_CXXHeader: case TY_ObjCXXHeader: case TY_PP_ObjCXXHeader: + case TY_CUDA: return true; } } diff --git a/lib/Edit/CMakeLists.txt b/lib/Edit/CMakeLists.txt index c87478c..cce1c19 100644 --- a/lib/Edit/CMakeLists.txt +++ b/lib/Edit/CMakeLists.txt @@ -1,7 +1,20 @@ -set(LLVM_USED_LIBS clangBasic clangAST clangLex) - add_clang_library(clangEdit Commit.cpp EditedSource.cpp RewriteObjCFoundationAPI.cpp ) + +add_dependencies(clangEdit + ClangAttrClasses + ClangAttrList + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticCommon + ClangStmtNodes + ) + +target_link_libraries(clangEdit + clangBasic + clangAST + clangLex + ) diff --git a/lib/Edit/Commit.cpp b/lib/Edit/Commit.cpp index c45ee1f..41c72e4 100644 --- a/lib/Edit/Commit.cpp +++ b/lib/Edit/Commit.cpp @@ -332,6 +332,7 @@ bool Commit::canReplaceText(SourceLocation loc, StringRef text, if (invalidTemp) return false; + Len = text.size(); return file.substr(Offs.getOffset()).startswith(text); } diff --git a/lib/Edit/EditedSource.cpp b/lib/Edit/EditedSource.cpp index 5b7fa4a..b2a1663 100644 --- a/lib/Edit/EditedSource.cpp +++ b/lib/Edit/EditedSource.cpp @@ -100,8 +100,11 @@ bool EditedSource::commitInsertFromRange(SourceLocation OrigLoc, FileOffset B = I->first; FileOffset E = B.getWithOffset(FA.RemoveLen); + if (BeginOffs == B) + break; + if (BeginOffs < E) { - if (BeginOffs >= B) { + if (BeginOffs > B) { BeginOffs = E; ++I; } diff --git a/lib/Edit/RewriteObjCFoundationAPI.cpp b/lib/Edit/RewriteObjCFoundationAPI.cpp index 24a0db1..d15b7a7 100644 --- a/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -14,6 +14,7 @@ #include "clang/Edit/Rewriters.h" #include "clang/Edit/Commit.h" #include "clang/Lex/Lexer.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/NSAPI.h" @@ -22,7 +23,8 @@ using namespace clang; using namespace edit; static bool checkForLiteralCreation(const ObjCMessageExpr *Msg, - IdentifierInfo *&ClassId) { + IdentifierInfo *&ClassId, + const LangOptions &LangOpts) { if (!Msg || Msg->isImplicit() || !Msg->getMethodDecl()) return false; @@ -34,6 +36,18 @@ static bool checkForLiteralCreation(const ObjCMessageExpr *Msg, if (Msg->getReceiverKind() == ObjCMessageExpr::Class) return true; + // When in ARC mode we also convert "[[.. alloc] init]" messages to literals, + // since the change from +1 to +0 will be handled fine by ARC. + if (LangOpts.ObjCAutoRefCount) { + if (Msg->getReceiverKind() == ObjCMessageExpr::Instance) { + if (const ObjCMessageExpr *Rec = dyn_cast( + Msg->getInstanceReceiver()->IgnoreParenImpCasts())) { + if (Rec->getMethodFamily() == OMF_alloc) + return true; + } + } + } + return false; } @@ -44,7 +58,7 @@ static bool checkForLiteralCreation(const ObjCMessageExpr *Msg, bool edit::rewriteObjCRedundantCallWithLiteral(const ObjCMessageExpr *Msg, const NSAPI &NS, Commit &commit) { IdentifierInfo *II = 0; - if (!checkForLiteralCreation(Msg, II)) + if (!checkForLiteralCreation(Msg, II, NS.getASTContext().getLangOpts())) return false; if (Msg->getNumArgs() != 1) return false; @@ -54,16 +68,19 @@ bool edit::rewriteObjCRedundantCallWithLiteral(const ObjCMessageExpr *Msg, if ((isa(Arg) && NS.getNSClassId(NSAPI::ClassId_NSString) == II && - NS.getNSStringSelector(NSAPI::NSStr_stringWithString) == Sel) || + (NS.getNSStringSelector(NSAPI::NSStr_stringWithString) == Sel || + NS.getNSStringSelector(NSAPI::NSStr_initWithString) == Sel)) || (isa(Arg) && NS.getNSClassId(NSAPI::ClassId_NSArray) == II && - NS.getNSArraySelector(NSAPI::NSArr_arrayWithArray) == Sel) || + (NS.getNSArraySelector(NSAPI::NSArr_arrayWithArray) == Sel || + NS.getNSArraySelector(NSAPI::NSArr_initWithArray) == Sel)) || (isa(Arg) && NS.getNSClassId(NSAPI::ClassId_NSDictionary) == II && - NS.getNSDictionarySelector( - NSAPI::NSDict_dictionaryWithDictionary) == Sel)) { + (NS.getNSDictionarySelector( + NSAPI::NSDict_dictionaryWithDictionary) == Sel || + NS.getNSDictionarySelector(NSAPI::NSDict_initWithDictionary) == Sel))) { commit.replaceWithInner(Msg->getSourceRange(), Msg->getArg(0)->getSourceRange()); @@ -77,15 +94,91 @@ bool edit::rewriteObjCRedundantCallWithLiteral(const ObjCMessageExpr *Msg, // rewriteToObjCSubscriptSyntax. //===----------------------------------------------------------------------===// +/// \brief Check for classes that accept 'objectForKey:' (or the other selectors +/// that the migrator handles) but return their instances as 'id', resulting +/// in the compiler resolving 'objectForKey:' as the method from NSDictionary. +/// +/// When checking if we can convert to subscripting syntax, check whether +/// the receiver is a result of a class method from a hardcoded list of +/// such classes. In such a case return the specific class as the interface +/// of the receiver. +/// +/// FIXME: Remove this when these classes start using 'instancetype'. +static const ObjCInterfaceDecl * +maybeAdjustInterfaceForSubscriptingCheck(const ObjCInterfaceDecl *IFace, + const Expr *Receiver, + ASTContext &Ctx) { + assert(IFace && Receiver); + + // If the receiver has type 'id'... + if (!Ctx.isObjCIdType(Receiver->getType().getUnqualifiedType())) + return IFace; + + const ObjCMessageExpr * + InnerMsg = dyn_cast(Receiver->IgnoreParenCasts()); + if (!InnerMsg) + return IFace; + + QualType ClassRec; + switch (InnerMsg->getReceiverKind()) { + case ObjCMessageExpr::Instance: + case ObjCMessageExpr::SuperInstance: + return IFace; + + case ObjCMessageExpr::Class: + ClassRec = InnerMsg->getClassReceiver(); + break; + case ObjCMessageExpr::SuperClass: + ClassRec = InnerMsg->getSuperType(); + break; + } + + if (ClassRec.isNull()) + return IFace; + + // ...and it is the result of a class message... + + const ObjCObjectType *ObjTy = ClassRec->getAs(); + if (!ObjTy) + return IFace; + const ObjCInterfaceDecl *OID = ObjTy->getInterface(); + + // ...and the receiving class is NSMapTable or NSLocale, return that + // class as the receiving interface. + if (OID->getName() == "NSMapTable" || + OID->getName() == "NSLocale") + return OID; + + return IFace; +} + +static bool canRewriteToSubscriptSyntax(const ObjCInterfaceDecl *&IFace, + const ObjCMessageExpr *Msg, + ASTContext &Ctx, + Selector subscriptSel) { + const Expr *Rec = Msg->getInstanceReceiver(); + if (!Rec) + return false; + IFace = maybeAdjustInterfaceForSubscriptingCheck(IFace, Rec, Ctx); + + if (const ObjCMethodDecl *MD = IFace->lookupInstanceMethod(subscriptSel)) { + if (!MD->isUnavailable()) + return true; + } + return false; +} + +static bool subscriptOperatorNeedsParens(const Expr *FullExpr); + static void maybePutParensOnReceiver(const Expr *Receiver, Commit &commit) { - Receiver = Receiver->IgnoreImpCasts(); - if (isa(Receiver) || isa(Receiver)) { + if (subscriptOperatorNeedsParens(Receiver)) { SourceRange RecRange = Receiver->getSourceRange(); commit.insertWrap("(", RecRange, ")"); } } -static bool rewriteToSubscriptGet(const ObjCMessageExpr *Msg, Commit &commit) { +static bool rewriteToSubscriptGetCommon(const ObjCMessageExpr *Msg, + Commit &commit) { if (Msg->getNumArgs() != 1) return false; const Expr *Rec = Msg->getInstanceReceiver(); @@ -106,8 +199,34 @@ static bool rewriteToSubscriptGet(const ObjCMessageExpr *Msg, Commit &commit) { return true; } -static bool rewriteToArraySubscriptSet(const ObjCMessageExpr *Msg, +static bool rewriteToArraySubscriptGet(const ObjCInterfaceDecl *IFace, + const ObjCMessageExpr *Msg, + const NSAPI &NS, + Commit &commit) { + if (!canRewriteToSubscriptSyntax(IFace, Msg, NS.getASTContext(), + NS.getObjectAtIndexedSubscriptSelector())) + return false; + return rewriteToSubscriptGetCommon(Msg, commit); +} + +static bool rewriteToDictionarySubscriptGet(const ObjCInterfaceDecl *IFace, + const ObjCMessageExpr *Msg, + const NSAPI &NS, + Commit &commit) { + if (!canRewriteToSubscriptSyntax(IFace, Msg, NS.getASTContext(), + NS.getObjectForKeyedSubscriptSelector())) + return false; + return rewriteToSubscriptGetCommon(Msg, commit); +} + +static bool rewriteToArraySubscriptSet(const ObjCInterfaceDecl *IFace, + const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit) { + if (!canRewriteToSubscriptSyntax(IFace, Msg, NS.getASTContext(), + NS.getSetObjectAtIndexedSubscriptSelector())) + return false; + if (Msg->getNumArgs() != 2) return false; const Expr *Rec = Msg->getInstanceReceiver(); @@ -134,8 +253,14 @@ static bool rewriteToArraySubscriptSet(const ObjCMessageExpr *Msg, return true; } -static bool rewriteToDictionarySubscriptSet(const ObjCMessageExpr *Msg, +static bool rewriteToDictionarySubscriptSet(const ObjCInterfaceDecl *IFace, + const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit) { + if (!canRewriteToSubscriptSyntax(IFace, Msg, NS.getASTContext(), + NS.getSetObjectForKeyedSubscriptSelector())) + return false; + if (Msg->getNumArgs() != 2) return false; const Expr *Rec = Msg->getInstanceReceiver(); @@ -162,7 +287,7 @@ static bool rewriteToDictionarySubscriptSet(const ObjCMessageExpr *Msg, } bool edit::rewriteToObjCSubscriptSyntax(const ObjCMessageExpr *Msg, - const NSAPI &NS, Commit &commit) { + const NSAPI &NS, Commit &commit) { if (!Msg || Msg->isImplicit() || Msg->getReceiverKind() != ObjCMessageExpr::Instance) return false; @@ -175,25 +300,22 @@ bool edit::rewriteToObjCSubscriptSyntax(const ObjCMessageExpr *Msg, const_cast(Method)); if (!IFace) return false; - IdentifierInfo *II = IFace->getIdentifier(); Selector Sel = Msg->getSelector(); - if ((II == NS.getNSClassId(NSAPI::ClassId_NSArray) && - Sel == NS.getNSArraySelector(NSAPI::NSArr_objectAtIndex)) || - (II == NS.getNSClassId(NSAPI::ClassId_NSDictionary) && - Sel == NS.getNSDictionarySelector(NSAPI::NSDict_objectForKey))) - return rewriteToSubscriptGet(Msg, commit); + if (Sel == NS.getNSArraySelector(NSAPI::NSArr_objectAtIndex)) + return rewriteToArraySubscriptGet(IFace, Msg, NS, commit); + + if (Sel == NS.getNSDictionarySelector(NSAPI::NSDict_objectForKey)) + return rewriteToDictionarySubscriptGet(IFace, Msg, NS, commit); if (Msg->getNumArgs() != 2) return false; - if (II == NS.getNSClassId(NSAPI::ClassId_NSMutableArray) && - Sel == NS.getNSArraySelector(NSAPI::NSMutableArr_replaceObjectAtIndex)) - return rewriteToArraySubscriptSet(Msg, commit); + if (Sel == NS.getNSArraySelector(NSAPI::NSMutableArr_replaceObjectAtIndex)) + return rewriteToArraySubscriptSet(IFace, Msg, NS, commit); - if (II == NS.getNSClassId(NSAPI::ClassId_NSMutableDictionary) && - Sel == NS.getNSDictionarySelector(NSAPI::NSMutableDict_setObjectForKey)) - return rewriteToDictionarySubscriptSet(Msg, commit); + if (Sel == NS.getNSDictionarySelector(NSAPI::NSMutableDict_setObjectForKey)) + return rewriteToDictionarySubscriptSet(IFace, Msg, NS, commit); return false; } @@ -208,11 +330,15 @@ static bool rewriteToDictionaryLiteral(const ObjCMessageExpr *Msg, const NSAPI &NS, Commit &commit); static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, const NSAPI &NS, Commit &commit); +static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit); +static bool rewriteToStringBoxedExpression(const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit); bool edit::rewriteToObjCLiteralSyntax(const ObjCMessageExpr *Msg, const NSAPI &NS, Commit &commit) { IdentifierInfo *II = 0; - if (!checkForLiteralCreation(Msg, II)) + if (!checkForLiteralCreation(Msg, II, NS.getASTContext().getLangOpts())) return false; if (II == NS.getNSClassId(NSAPI::ClassId_NSArray)) @@ -221,6 +347,8 @@ bool edit::rewriteToObjCLiteralSyntax(const ObjCMessageExpr *Msg, return rewriteToDictionaryLiteral(Msg, NS, commit); if (II == NS.getNSClassId(NSAPI::ClassId_NSNumber)) return rewriteToNumberLiteral(Msg, NS, commit); + if (II == NS.getNSClassId(NSAPI::ClassId_NSString)) + return rewriteToStringBoxedExpression(Msg, NS, commit); return false; } @@ -229,6 +357,9 @@ bool edit::rewriteToObjCLiteralSyntax(const ObjCMessageExpr *Msg, // rewriteToArrayLiteral. //===----------------------------------------------------------------------===// +/// \brief Adds an explicit cast to 'id' if the type is not objc object. +static void objectifyExpr(const Expr *E, Commit &commit); + static bool rewriteToArrayLiteral(const ObjCMessageExpr *Msg, const NSAPI &NS, Commit &commit) { Selector Sel = Msg->getSelector(); @@ -244,19 +375,24 @@ static bool rewriteToArrayLiteral(const ObjCMessageExpr *Msg, if (Sel == NS.getNSArraySelector(NSAPI::NSArr_arrayWithObject)) { if (Msg->getNumArgs() != 1) return false; + objectifyExpr(Msg->getArg(0), commit); SourceRange ArgRange = Msg->getArg(0)->getSourceRange(); commit.replaceWithInner(MsgRange, ArgRange); commit.insertWrap("@[", ArgRange, "]"); return true; } - if (Sel == NS.getNSArraySelector(NSAPI::NSArr_arrayWithObjects)) { + if (Sel == NS.getNSArraySelector(NSAPI::NSArr_arrayWithObjects) || + Sel == NS.getNSArraySelector(NSAPI::NSArr_initWithObjects)) { if (Msg->getNumArgs() == 0) return false; const Expr *SentinelExpr = Msg->getArg(Msg->getNumArgs() - 1); if (!NS.getASTContext().isSentinelNullExpr(SentinelExpr)) return false; + for (unsigned i = 0, e = Msg->getNumArgs() - 1; i != e; ++i) + objectifyExpr(Msg->getArg(i), commit); + if (Msg->getNumArgs() == 1) { commit.replace(MsgRange, "@[]"); return true; @@ -291,6 +427,10 @@ static bool rewriteToDictionaryLiteral(const ObjCMessageExpr *Msg, NSAPI::NSDict_dictionaryWithObjectForKey)) { if (Msg->getNumArgs() != 2) return false; + + objectifyExpr(Msg->getArg(0), commit); + objectifyExpr(Msg->getArg(1), commit); + SourceRange ValRange = Msg->getArg(0)->getSourceRange(); SourceRange KeyRange = Msg->getArg(1)->getSourceRange(); // Insert key before the value. @@ -305,7 +445,8 @@ static bool rewriteToDictionaryLiteral(const ObjCMessageExpr *Msg, } if (Sel == NS.getNSDictionarySelector( - NSAPI::NSDict_dictionaryWithObjectsAndKeys)) { + NSAPI::NSDict_dictionaryWithObjectsAndKeys) || + Sel == NS.getNSDictionarySelector(NSAPI::NSDict_initWithObjectsAndKeys)) { if (Msg->getNumArgs() % 2 != 1) return false; unsigned SentinelIdx = Msg->getNumArgs() - 1; @@ -319,6 +460,9 @@ static bool rewriteToDictionaryLiteral(const ObjCMessageExpr *Msg, } for (unsigned i = 0; i < SentinelIdx; i += 2) { + objectifyExpr(Msg->getArg(i), commit); + objectifyExpr(Msg->getArg(i+1), commit); + SourceRange ValRange = Msg->getArg(i)->getSourceRange(); SourceRange KeyRange = Msg->getArg(i+1)->getSourceRange(); // Insert value after key. @@ -357,7 +501,7 @@ static bool rewriteToCharLiteral(const ObjCMessageExpr *Msg, return true; } - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); } static bool rewriteToBoolLiteral(const ObjCMessageExpr *Msg, @@ -371,7 +515,7 @@ static bool rewriteToBoolLiteral(const ObjCMessageExpr *Msg, return true; } - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); } namespace { @@ -473,10 +617,10 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, literalE = UOE->getSubExpr(); } - // Only integer and floating literals; non-literals or imaginary literal - // cannot be rewritten. + // Only integer and floating literals, otherwise try to rewrite to boxed + // expression. if (!isa(literalE) && !isa(literalE)) - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); ASTContext &Ctx = NS.getASTContext(); Selector Sel = Msg->getSelector(); @@ -496,7 +640,7 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, case NSAPI::NSNumberWithShort: case NSAPI::NSNumberWithUnsignedShort: case NSAPI::NSNumberWithBool: - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); case NSAPI::NSNumberWithUnsignedInt: case NSAPI::NSNumberWithUnsignedInteger: @@ -536,15 +680,16 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, } // We will need to modify the literal suffix to get the same type as the call. - // Don't even try if it came from a macro. + // Try with boxed expression if it came from a macro. if (ArgRange.getBegin().isMacroID()) - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); bool LitIsFloat = ArgTy->isFloatingType(); - // For a float passed to integer call, don't try rewriting. It is difficult - // and a very uncommon case anyway. + // For a float passed to integer call, don't try rewriting to objc literal. + // It is difficult and a very uncommon case anyway. + // But try with boxed expression. if (LitIsFloat && !CallIsFloating) - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); // Try to modify the literal make it the same type as the method call. // -Modify the suffix, and/or @@ -555,11 +700,11 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, if (const IntegerLiteral *IntE = dyn_cast(literalE)) isIntZero = !IntE->getValue().getBoolValue(); if (!getLiteralInfo(ArgRange, LitIsFloat, isIntZero, Ctx, LitInfo)) - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); // Not easy to do int -> float with hex/octal and uncommon anyway. if (!LitIsFloat && CallIsFloating && (LitInfo.Hex || LitInfo.Octal)) - return false; + return rewriteToNumericBoxedExpression(Msg, NS, commit); SourceLocation LitB = LitInfo.WithoutSuffRange.getBegin(); SourceLocation LitE = LitInfo.WithoutSuffRange.getEnd(); @@ -585,3 +730,284 @@ static bool rewriteToNumberLiteral(const ObjCMessageExpr *Msg, } return true; } + +// FIXME: Make determination of operator precedence more general and +// make it broadly available. +static bool subscriptOperatorNeedsParens(const Expr *FullExpr) { + const Expr* Expr = FullExpr->IgnoreImpCasts(); + if (isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(FullExpr) || + isa(Expr) || + isa(Expr)) + return false; + + return true; +} +static bool castOperatorNeedsParens(const Expr *FullExpr) { + const Expr* Expr = FullExpr->IgnoreImpCasts(); + if (isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(Expr) || + isa(FullExpr) || + isa(Expr) || + isa(Expr) || + isa(Expr)) + return false; + + return true; +} + +static void objectifyExpr(const Expr *E, Commit &commit) { + if (!E) return; + + QualType T = E->getType(); + if (T->isObjCObjectPointerType()) { + if (const ImplicitCastExpr *ICE = dyn_cast(E)) { + if (ICE->getCastKind() != CK_CPointerToObjCPointerCast) + return; + } else { + return; + } + } else if (!T->isPointerType()) { + return; + } + + SourceRange Range = E->getSourceRange(); + if (castOperatorNeedsParens(E)) + commit.insertWrap("(", Range, ")"); + commit.insertBefore(Range.getBegin(), "(id)"); +} + +//===----------------------------------------------------------------------===// +// rewriteToNumericBoxedExpression. +//===----------------------------------------------------------------------===// + +static bool isEnumConstant(const Expr *E) { + if (const DeclRefExpr *DRE = dyn_cast(E->IgnoreParenImpCasts())) + if (const ValueDecl *VD = DRE->getDecl()) + return isa(VD); + + return false; +} + +static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit) { + if (Msg->getNumArgs() != 1) + return false; + + const Expr *Arg = Msg->getArg(0); + if (Arg->isTypeDependent()) + return false; + + ASTContext &Ctx = NS.getASTContext(); + Selector Sel = Msg->getSelector(); + llvm::Optional + MKOpt = NS.getNSNumberLiteralMethodKind(Sel); + if (!MKOpt) + return false; + NSAPI::NSNumberLiteralMethodKind MK = *MKOpt; + + const Expr *OrigArg = Arg->IgnoreImpCasts(); + QualType FinalTy = Arg->getType(); + QualType OrigTy = OrigArg->getType(); + uint64_t FinalTySize = Ctx.getTypeSize(FinalTy); + uint64_t OrigTySize = Ctx.getTypeSize(OrigTy); + + bool isTruncated = FinalTySize < OrigTySize; + bool needsCast = false; + + if (const ImplicitCastExpr *ICE = dyn_cast(Arg)) { + switch (ICE->getCastKind()) { + case CK_LValueToRValue: + case CK_NoOp: + case CK_UserDefinedConversion: + break; + + case CK_IntegralCast: { + if (MK == NSAPI::NSNumberWithBool && OrigTy->isBooleanType()) + break; + // Be more liberal with Integer/UnsignedInteger which are very commonly + // used. + if ((MK == NSAPI::NSNumberWithInteger || + MK == NSAPI::NSNumberWithUnsignedInteger) && + !isTruncated) { + if (OrigTy->getAs() || isEnumConstant(OrigArg)) + break; + if ((MK==NSAPI::NSNumberWithInteger) == OrigTy->isSignedIntegerType() && + OrigTySize >= Ctx.getTypeSize(Ctx.IntTy)) + break; + } + + needsCast = true; + break; + } + + case CK_PointerToBoolean: + case CK_IntegralToBoolean: + case CK_IntegralToFloating: + case CK_FloatingToIntegral: + case CK_FloatingToBoolean: + case CK_FloatingCast: + case CK_FloatingComplexToReal: + case CK_FloatingComplexToBoolean: + case CK_IntegralComplexToReal: + case CK_IntegralComplexToBoolean: + case CK_AtomicToNonAtomic: + needsCast = true; + break; + + case CK_Dependent: + case CK_BitCast: + case CK_LValueBitCast: + case CK_BaseToDerived: + case CK_DerivedToBase: + case CK_UncheckedDerivedToBase: + case CK_Dynamic: + case CK_ToUnion: + case CK_ArrayToPointerDecay: + case CK_FunctionToPointerDecay: + case CK_NullToPointer: + case CK_NullToMemberPointer: + case CK_BaseToDerivedMemberPointer: + case CK_DerivedToBaseMemberPointer: + case CK_MemberPointerToBoolean: + case CK_ReinterpretMemberPointer: + case CK_ConstructorConversion: + case CK_IntegralToPointer: + case CK_PointerToIntegral: + case CK_ToVoid: + case CK_VectorSplat: + case CK_CPointerToObjCPointerCast: + case CK_BlockPointerToObjCPointerCast: + case CK_AnyPointerToBlockPointerCast: + case CK_ObjCObjectLValueCast: + case CK_FloatingRealToComplex: + case CK_FloatingComplexCast: + case CK_FloatingComplexToIntegralComplex: + case CK_IntegralRealToComplex: + case CK_IntegralComplexCast: + case CK_IntegralComplexToFloatingComplex: + case CK_ARCProduceObject: + case CK_ARCConsumeObject: + case CK_ARCReclaimReturnedObject: + case CK_ARCExtendBlockObject: + case CK_NonAtomicToAtomic: + case CK_CopyAndAutoreleaseBlockObject: + return false; + } + } + + if (needsCast) { + DiagnosticsEngine &Diags = Ctx.getDiagnostics(); + // FIXME: Use a custom category name to distinguish migration diagnostics. + unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Warning, + "converting to boxing syntax requires casting %0 to %1"); + Diags.Report(Msg->getExprLoc(), diagID) << OrigTy << FinalTy + << Msg->getSourceRange(); + return false; + } + + SourceRange ArgRange = OrigArg->getSourceRange(); + commit.replaceWithInner(Msg->getSourceRange(), ArgRange); + + if (isa(OrigArg) || isa(OrigArg)) + commit.insertBefore(ArgRange.getBegin(), "@"); + else + commit.insertWrap("@(", ArgRange, ")"); + + return true; +} + +//===----------------------------------------------------------------------===// +// rewriteToStringBoxedExpression. +//===----------------------------------------------------------------------===// + +static bool doRewriteToUTF8StringBoxedExpressionHelper( + const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit) { + const Expr *Arg = Msg->getArg(0); + if (Arg->isTypeDependent()) + return false; + + ASTContext &Ctx = NS.getASTContext(); + + const Expr *OrigArg = Arg->IgnoreImpCasts(); + QualType OrigTy = OrigArg->getType(); + if (OrigTy->isArrayType()) + OrigTy = Ctx.getArrayDecayedType(OrigTy); + + if (const StringLiteral * + StrE = dyn_cast(OrigArg->IgnoreParens())) { + commit.replaceWithInner(Msg->getSourceRange(), StrE->getSourceRange()); + commit.insert(StrE->getLocStart(), "@"); + return true; + } + + if (const PointerType *PT = OrigTy->getAs()) { + QualType PointeeType = PT->getPointeeType(); + if (Ctx.hasSameUnqualifiedType(PointeeType, Ctx.CharTy)) { + SourceRange ArgRange = OrigArg->getSourceRange(); + commit.replaceWithInner(Msg->getSourceRange(), ArgRange); + + if (isa(OrigArg) || isa(OrigArg)) + commit.insertBefore(ArgRange.getBegin(), "@"); + else + commit.insertWrap("@(", ArgRange, ")"); + + return true; + } + } + + return false; +} + +static bool rewriteToStringBoxedExpression(const ObjCMessageExpr *Msg, + const NSAPI &NS, Commit &commit) { + Selector Sel = Msg->getSelector(); + + if (Sel == NS.getNSStringSelector(NSAPI::NSStr_stringWithUTF8String) || + Sel == NS.getNSStringSelector(NSAPI::NSStr_stringWithCString)) { + if (Msg->getNumArgs() != 1) + return false; + return doRewriteToUTF8StringBoxedExpressionHelper(Msg, NS, commit); + } + + if (Sel == NS.getNSStringSelector(NSAPI::NSStr_stringWithCStringEncoding)) { + if (Msg->getNumArgs() != 2) + return false; + + const Expr *encodingArg = Msg->getArg(1); + if (NS.isNSUTF8StringEncodingConstant(encodingArg) || + NS.isNSASCIIStringEncodingConstant(encodingArg)) + return doRewriteToUTF8StringBoxedExpressionHelper(Msg, NS, commit); + } + + return false; +} diff --git a/lib/Frontend/ASTConsumers.cpp b/lib/Frontend/ASTConsumers.cpp index 390ae09..bb1a4e6 100644 --- a/lib/Frontend/ASTConsumers.cpp +++ b/lib/Frontend/ASTConsumers.cpp @@ -12,47 +12,116 @@ //===----------------------------------------------------------------------===// #include "clang/Frontend/ASTConsumers.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" -#include "clang/Basic/FileManager.h" #include "clang/AST/AST.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/RecordLayout.h" #include "clang/AST/PrettyPrinter.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "llvm/Module.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" using namespace clang; //===----------------------------------------------------------------------===// /// ASTPrinter - Pretty-printer and dumper of ASTs namespace { - class ASTPrinter : public ASTConsumer { + class ASTPrinter : public ASTConsumer, + public RecursiveASTVisitor { + typedef RecursiveASTVisitor base; + + public: + ASTPrinter(raw_ostream *Out = NULL, bool Dump = false, + StringRef FilterString = "") + : Out(Out ? *Out : llvm::outs()), Dump(Dump), + FilterString(FilterString) {} + + virtual void HandleTranslationUnit(ASTContext &Context) { + TranslationUnitDecl *D = Context.getTranslationUnitDecl(); + + if (FilterString.empty()) { + if (Dump) + D->dump(Out); + else + D->print(Out, /*Indentation=*/0, /*PrintInstantiation=*/true); + return; + } + + TraverseDecl(D); + } + + bool shouldWalkTypesOfTypeLocs() const { return false; } + + bool TraverseDecl(Decl *D) { + if (filterMatches(D)) { + Out.changeColor(llvm::raw_ostream::BLUE) << + (Dump ? "Dumping " : "Printing ") << getName(D) << ":\n"; + Out.resetColor(); + if (Dump) + D->dump(Out); + else + D->print(Out, /*Indentation=*/0, /*PrintInstantiation=*/true); + // Don't traverse child nodes to avoid output duplication. + return true; + } + return base::TraverseDecl(D); + } + + private: + std::string getName(Decl *D) { + if (isa(D)) + return cast(D)->getQualifiedNameAsString(); + return ""; + } + bool filterMatches(Decl *D) { + return getName(D).find(FilterString) != std::string::npos; + } + raw_ostream &Out; bool Dump; + std::string FilterString; + }; + + class ASTDeclNodeLister : public ASTConsumer, + public RecursiveASTVisitor { + typedef RecursiveASTVisitor base; public: - ASTPrinter(raw_ostream* o = NULL, bool Dump = false) - : Out(o? *o : llvm::outs()), Dump(Dump) { } + ASTDeclNodeLister(raw_ostream *Out = NULL) + : Out(Out ? *Out : llvm::outs()) {} virtual void HandleTranslationUnit(ASTContext &Context) { - PrintingPolicy Policy = Context.getPrintingPolicy(); - Policy.Dump = Dump; - Context.getTranslationUnitDecl()->print(Out, Policy, /*Indentation=*/0, - /*PrintInstantiation=*/true); + TraverseDecl(Context.getTranslationUnitDecl()); } + + bool shouldWalkTypesOfTypeLocs() const { return false; } + + virtual bool VisitNamedDecl(NamedDecl *D) { + Out << D->getQualifiedNameAsString() << "\n"; + return true; + } + + private: + raw_ostream &Out; }; } // end anonymous namespace -ASTConsumer *clang::CreateASTPrinter(raw_ostream* out) { - return new ASTPrinter(out); +ASTConsumer *clang::CreateASTPrinter(raw_ostream *Out, + StringRef FilterString) { + return new ASTPrinter(Out, /*Dump=*/ false, FilterString); +} + +ASTConsumer *clang::CreateASTDumper(StringRef FilterString) { + return new ASTPrinter(0, /*Dump=*/ true, FilterString); } -ASTConsumer *clang::CreateASTDumper() { - return new ASTPrinter(0, true); +ASTConsumer *clang::CreateASTDeclNodeLister() { + return new ASTDeclNodeLister(0); } //===----------------------------------------------------------------------===// diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp index 7aa9603..42a6772 100644 --- a/lib/Frontend/ASTUnit.cpp +++ b/lib/Frontend/ASTUnit.cpp @@ -17,12 +17,6 @@ #include "clang/AST/DeclVisitor.h" #include "clang/AST/TypeOrdering.h" #include "clang/AST/StmtVisitor.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/Job.h" -#include "clang/Driver/ArgList.h" -#include "clang/Driver/Options.h" -#include "clang/Driver/Tool.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -122,7 +116,8 @@ static OnDiskDataMap &getOnDiskDataMap() { } static void cleanupOnDiskMapAtExit(void) { - // No mutex required here since we are leaving the program. + // Use the mutex because there can be an alive thread destroying an ASTUnit. + llvm::MutexGuard Guard(getOnDiskMutex()); OnDiskDataMap &M = getOnDiskDataMap(); for (OnDiskDataMap::iterator I = M.begin(), E = M.end(); I != E; ++I) { // We don't worry about freeing the memory associated with OnDiskDataMap. @@ -220,6 +215,7 @@ ASTUnit::ASTUnit(bool _MainFileIsAST) PreambleRebuildCounter(0), SavedMainFileBuffer(0), PreambleBuffer(0), NumWarningsInPreamble(0), ShouldCacheCodeCompletionResults(false), + IncludeBriefCommentsInCodeCompletion(false), UserFilesAreVolatile(false), CompletionCacheTopLevelHashValue(0), PreambleTopLevelHashValue(0), CurrentTopLevelHashValue(0), @@ -275,43 +271,43 @@ static unsigned getDeclShowContexts(NamedDecl *ND, if (!ND) return 0; - unsigned Contexts = 0; + uint64_t Contexts = 0; if (isa(ND) || isa(ND) || isa(ND) || isa(ND)) { // Types can appear in these contexts. if (LangOpts.CPlusPlus || !isa(ND)) - Contexts |= (1 << (CodeCompletionContext::CCC_TopLevel - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCIvarList - 1)) - | (1 << (CodeCompletionContext::CCC_ClassStructUnion - 1)) - | (1 << (CodeCompletionContext::CCC_Statement - 1)) - | (1 << (CodeCompletionContext::CCC_Type - 1)) - | (1 << (CodeCompletionContext::CCC_ParenthesizedExpression - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_TopLevel) + | (1LL << CodeCompletionContext::CCC_ObjCIvarList) + | (1LL << CodeCompletionContext::CCC_ClassStructUnion) + | (1LL << CodeCompletionContext::CCC_Statement) + | (1LL << CodeCompletionContext::CCC_Type) + | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression); // In C++, types can appear in expressions contexts (for functional casts). if (LangOpts.CPlusPlus) - Contexts |= (1 << (CodeCompletionContext::CCC_Expression - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_Expression); // In Objective-C, message sends can send interfaces. In Objective-C++, // all types are available due to functional casts. if (LangOpts.CPlusPlus || isa(ND)) - Contexts |= (1 << (CodeCompletionContext::CCC_ObjCMessageReceiver - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver); // In Objective-C, you can only be a subclass of another Objective-C class if (isa(ND)) - Contexts |= (1 << (CodeCompletionContext::CCC_ObjCInterfaceName - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_ObjCInterfaceName); // Deal with tag names. if (isa(ND)) { - Contexts |= (1 << (CodeCompletionContext::CCC_EnumTag - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_EnumTag); // Part of the nested-name-specifier in C++0x. if (LangOpts.CPlusPlus0x) IsNestedNameSpecifier = true; } else if (RecordDecl *Record = dyn_cast(ND)) { if (Record->isUnion()) - Contexts |= (1 << (CodeCompletionContext::CCC_UnionTag - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_UnionTag); else - Contexts |= (1 << (CodeCompletionContext::CCC_ClassOrStructTag - 1)); + Contexts |= (1LL << CodeCompletionContext::CCC_ClassOrStructTag); if (LangOpts.CPlusPlus) IsNestedNameSpecifier = true; @@ -319,16 +315,16 @@ static unsigned getDeclShowContexts(NamedDecl *ND, IsNestedNameSpecifier = true; } else if (isa(ND) || isa(ND)) { // Values can appear in these contexts. - Contexts = (1 << (CodeCompletionContext::CCC_Statement - 1)) - | (1 << (CodeCompletionContext::CCC_Expression - 1)) - | (1 << (CodeCompletionContext::CCC_ParenthesizedExpression - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCMessageReceiver - 1)); + Contexts = (1LL << CodeCompletionContext::CCC_Statement) + | (1LL << CodeCompletionContext::CCC_Expression) + | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression) + | (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver); } else if (isa(ND)) { - Contexts = (1 << (CodeCompletionContext::CCC_ObjCProtocolName - 1)); + Contexts = (1LL << CodeCompletionContext::CCC_ObjCProtocolName); } else if (isa(ND)) { - Contexts = (1 << (CodeCompletionContext::CCC_ObjCCategoryName - 1)); + Contexts = (1LL << CodeCompletionContext::CCC_ObjCCategoryName); } else if (isa(ND) || isa(ND)) { - Contexts = (1 << (CodeCompletionContext::CCC_Namespace - 1)); + Contexts = (1LL << CodeCompletionContext::CCC_Namespace); // Part of the nested-name-specifier. IsNestedNameSpecifier = true; @@ -364,7 +360,8 @@ void ASTUnit::CacheCodeCompletionResults() { CachedCodeCompletionResult CachedResult; CachedResult.Completion = Results[I].CreateCodeCompletionString(*TheSema, *CachedCompletionAllocator, - getCodeCompletionTUInfo()); + getCodeCompletionTUInfo(), + IncludeBriefCommentsInCodeCompletion); CachedResult.ShowInContexts = getDeclShowContexts(Results[I].Declaration, Ctx->getLangOpts(), IsNestedNameSpecifier); @@ -402,23 +399,23 @@ void ASTUnit::CacheCodeCompletionResults() { if (TheSema->Context.getLangOpts().CPlusPlus && IsNestedNameSpecifier && !Results[I].StartsNestedNameSpecifier) { // The contexts in which a nested-name-specifier can appear in C++. - unsigned NNSContexts - = (1 << (CodeCompletionContext::CCC_TopLevel - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCIvarList - 1)) - | (1 << (CodeCompletionContext::CCC_ClassStructUnion - 1)) - | (1 << (CodeCompletionContext::CCC_Statement - 1)) - | (1 << (CodeCompletionContext::CCC_Expression - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCMessageReceiver - 1)) - | (1 << (CodeCompletionContext::CCC_EnumTag - 1)) - | (1 << (CodeCompletionContext::CCC_UnionTag - 1)) - | (1 << (CodeCompletionContext::CCC_ClassOrStructTag - 1)) - | (1 << (CodeCompletionContext::CCC_Type - 1)) - | (1 << (CodeCompletionContext::CCC_PotentiallyQualifiedName - 1)) - | (1 << (CodeCompletionContext::CCC_ParenthesizedExpression - 1)); + uint64_t NNSContexts + = (1LL << CodeCompletionContext::CCC_TopLevel) + | (1LL << CodeCompletionContext::CCC_ObjCIvarList) + | (1LL << CodeCompletionContext::CCC_ClassStructUnion) + | (1LL << CodeCompletionContext::CCC_Statement) + | (1LL << CodeCompletionContext::CCC_Expression) + | (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver) + | (1LL << CodeCompletionContext::CCC_EnumTag) + | (1LL << CodeCompletionContext::CCC_UnionTag) + | (1LL << CodeCompletionContext::CCC_ClassOrStructTag) + | (1LL << CodeCompletionContext::CCC_Type) + | (1LL << CodeCompletionContext::CCC_PotentiallyQualifiedName) + | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression); if (isa(Results[I].Declaration) || isa(Results[I].Declaration)) - NNSContexts |= (1 << (CodeCompletionContext::CCC_Namespace - 1)); + NNSContexts |= (1LL << CodeCompletionContext::CCC_Namespace); if (unsigned RemainingContexts = NNSContexts & ~CachedResult.ShowInContexts) { @@ -429,7 +426,8 @@ void ASTUnit::CacheCodeCompletionResults() { CachedResult.Completion = Results[I].CreateCodeCompletionString(*TheSema, *CachedCompletionAllocator, - getCodeCompletionTUInfo()); + getCodeCompletionTUInfo(), + IncludeBriefCommentsInCodeCompletion); CachedResult.ShowInContexts = RemainingContexts; CachedResult.Priority = CCP_NestedNameSpecifier; CachedResult.TypeClass = STC_Void; @@ -451,20 +449,21 @@ void ASTUnit::CacheCodeCompletionResults() { CachedResult.Completion = Results[I].CreateCodeCompletionString(*TheSema, *CachedCompletionAllocator, - getCodeCompletionTUInfo()); + getCodeCompletionTUInfo(), + IncludeBriefCommentsInCodeCompletion); CachedResult.ShowInContexts - = (1 << (CodeCompletionContext::CCC_TopLevel - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCInterface - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCImplementation - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCIvarList - 1)) - | (1 << (CodeCompletionContext::CCC_ClassStructUnion - 1)) - | (1 << (CodeCompletionContext::CCC_Statement - 1)) - | (1 << (CodeCompletionContext::CCC_Expression - 1)) - | (1 << (CodeCompletionContext::CCC_ObjCMessageReceiver - 1)) - | (1 << (CodeCompletionContext::CCC_MacroNameUse - 1)) - | (1 << (CodeCompletionContext::CCC_PreprocessorExpression - 1)) - | (1 << (CodeCompletionContext::CCC_ParenthesizedExpression - 1)) - | (1 << (CodeCompletionContext::CCC_OtherWithMacros - 1)); + = (1LL << CodeCompletionContext::CCC_TopLevel) + | (1LL << CodeCompletionContext::CCC_ObjCInterface) + | (1LL << CodeCompletionContext::CCC_ObjCImplementation) + | (1LL << CodeCompletionContext::CCC_ObjCIvarList) + | (1LL << CodeCompletionContext::CCC_ClassStructUnion) + | (1LL << CodeCompletionContext::CCC_Statement) + | (1LL << CodeCompletionContext::CCC_Expression) + | (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver) + | (1LL << CodeCompletionContext::CCC_MacroNameUse) + | (1LL << CodeCompletionContext::CCC_PreprocessorExpression) + | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression) + | (1LL << CodeCompletionContext::CCC_OtherWithMacros); CachedResult.Priority = Results[I].Priority; CachedResult.Kind = Results[I].CursorKind; @@ -659,7 +658,8 @@ ASTUnit *ASTUnit::LoadFromASTFile(const std::string &Filename, RemappedFile *RemappedFiles, unsigned NumRemappedFiles, bool CaptureDiagnostics, - bool AllowPCHWithCompilerErrors) { + bool AllowPCHWithCompilerErrors, + bool UserFilesAreVolatile) { OwningPtr AST(new ASTUnit(true)); // Recover resources if we crash before exiting this method. @@ -675,8 +675,10 @@ ASTUnit *ASTUnit::LoadFromASTFile(const std::string &Filename, AST->CaptureDiagnostics = CaptureDiagnostics; AST->Diagnostics = Diags; AST->FileMgr = new FileManager(FileSystemOpts); + AST->UserFilesAreVolatile = UserFilesAreVolatile; AST->SourceMgr = new SourceManager(AST->getDiagnostics(), - AST->getFileManager()); + AST->getFileManager(), + UserFilesAreVolatile); AST->HeaderInfo.reset(new HeaderSearch(AST->getFileManager(), AST->getDiagnostics(), AST->ASTFileLangOpts, @@ -1078,7 +1080,8 @@ bool ASTUnit::Parse(llvm::MemoryBuffer *OverrideMainBuffer) { LangOpts = &Clang->getLangOpts(); FileSystemOpts = Clang->getFileSystemOpts(); FileMgr = new FileManager(FileSystemOpts); - SourceMgr = new SourceManager(getDiagnostics(), *FileMgr); + SourceMgr = new SourceManager(getDiagnostics(), *FileMgr, + UserFilesAreVolatile); TheSema.reset(); Ctx = 0; PP = 0; @@ -1139,7 +1142,8 @@ bool ASTUnit::Parse(llvm::MemoryBuffer *OverrideMainBuffer) { StoredDiagnostics); } - Act->Execute(); + if (!Act->Execute()) + goto error; transferASTDataFromCompilerInstance(*Clang); @@ -1665,7 +1669,8 @@ StringRef ASTUnit::getMainFileName() const { ASTUnit *ASTUnit::create(CompilerInvocation *CI, IntrusiveRefCntPtr Diags, - bool CaptureDiagnostics) { + bool CaptureDiagnostics, + bool UserFilesAreVolatile) { OwningPtr AST; AST.reset(new ASTUnit(false)); ConfigureDiags(Diags, 0, 0, *AST, CaptureDiagnostics); @@ -1673,7 +1678,9 @@ ASTUnit *ASTUnit::create(CompilerInvocation *CI, AST->Invocation = CI; AST->FileSystemOpts = CI->getFileSystemOpts(); AST->FileMgr = new FileManager(AST->FileSystemOpts); - AST->SourceMgr = new SourceManager(AST->getDiagnostics(), *AST->FileMgr); + AST->UserFilesAreVolatile = UserFilesAreVolatile; + AST->SourceMgr = new SourceManager(AST->getDiagnostics(), *AST->FileMgr, + UserFilesAreVolatile); return AST.take(); } @@ -1688,6 +1695,8 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(CompilerInvocation *CI, bool CaptureDiagnostics, bool PrecompilePreamble, bool CacheCodeCompletionResults, + bool IncludeBriefCommentsInCodeCompletion, + bool UserFilesAreVolatile, OwningPtr *ErrAST) { assert(CI && "A CompilerInvocation is required"); @@ -1695,7 +1704,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(CompilerInvocation *CI, ASTUnit *AST = Unit; if (!AST) { // Create the AST unit. - OwnAST.reset(create(CI, Diags, CaptureDiagnostics)); + OwnAST.reset(create(CI, Diags, CaptureDiagnostics, UserFilesAreVolatile)); AST = OwnAST.get(); } @@ -1709,6 +1718,8 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(CompilerInvocation *CI, AST->PreambleRebuildCounter = 2; AST->TUKind = Action ? Action->getTranslationUnitKind() : TU_Complete; AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; + AST->IncludeBriefCommentsInCodeCompletion + = IncludeBriefCommentsInCodeCompletion; // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar @@ -1801,7 +1812,13 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(CompilerInvocation *CI, AST->getCurrentTopLevelHashValue())); Clang->setASTConsumer(new MultiplexConsumer(Consumers)); } - Act->Execute(); + if (!Act->Execute()) { + AST->transferASTDataFromCompilerInstance(*Clang); + if (OwnAST && ErrAST) + ErrAST->swap(OwnAST); + + return 0; + } // Steal the created target, context, and preprocessor. AST->transferASTDataFromCompilerInstance(*Clang); @@ -1849,7 +1866,9 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocation(CompilerInvocation *CI, bool CaptureDiagnostics, bool PrecompilePreamble, TranslationUnitKind TUKind, - bool CacheCodeCompletionResults) { + bool CacheCodeCompletionResults, + bool IncludeBriefCommentsInCodeCompletion, + bool UserFilesAreVolatile) { // Create the AST unit. OwningPtr AST; AST.reset(new ASTUnit(false)); @@ -1859,7 +1878,10 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocation(CompilerInvocation *CI, AST->CaptureDiagnostics = CaptureDiagnostics; AST->TUKind = TUKind; AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; + AST->IncludeBriefCommentsInCodeCompletion + = IncludeBriefCommentsInCodeCompletion; AST->Invocation = CI; + AST->UserFilesAreVolatile = UserFilesAreVolatile; // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar @@ -1883,8 +1905,10 @@ ASTUnit *ASTUnit::LoadFromCommandLine(const char **ArgBegin, bool PrecompilePreamble, TranslationUnitKind TUKind, bool CacheCodeCompletionResults, + bool IncludeBriefCommentsInCodeCompletion, bool AllowPCHWithCompilerErrors, bool SkipFunctionBodies, + bool UserFilesAreVolatile, OwningPtr *ErrAST) { if (!Diags.getPtr()) { // No diagnostics engine was provided, so create our own diagnostics object @@ -1942,6 +1966,9 @@ ASTUnit *ASTUnit::LoadFromCommandLine(const char **ArgBegin, AST->CaptureDiagnostics = CaptureDiagnostics; AST->TUKind = TUKind; AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults; + AST->IncludeBriefCommentsInCodeCompletion + = IncludeBriefCommentsInCodeCompletion; + AST->UserFilesAreVolatile = UserFilesAreVolatile; AST->NumStoredDiagnosticsFromDriver = StoredDiagnostics.size(); AST->StoredDiagnostics.swap(StoredDiagnostics); AST->Invocation = CI; @@ -2034,38 +2061,37 @@ namespace { /// results from an ASTUnit with the code-completion results provided to it, /// then passes the result on to class AugmentedCodeCompleteConsumer : public CodeCompleteConsumer { - unsigned long long NormalContexts; + uint64_t NormalContexts; ASTUnit &AST; CodeCompleteConsumer &Next; public: AugmentedCodeCompleteConsumer(ASTUnit &AST, CodeCompleteConsumer &Next, - bool IncludeMacros, bool IncludeCodePatterns, - bool IncludeGlobals) - : CodeCompleteConsumer(IncludeMacros, IncludeCodePatterns, IncludeGlobals, - Next.isOutputBinary()), AST(AST), Next(Next) + const CodeCompleteOptions &CodeCompleteOpts) + : CodeCompleteConsumer(CodeCompleteOpts, Next.isOutputBinary()), + AST(AST), Next(Next) { // Compute the set of contexts in which we will look when we don't have // any information about the specific context. NormalContexts - = (1LL << (CodeCompletionContext::CCC_TopLevel - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCInterface - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCImplementation - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCIvarList - 1)) - | (1LL << (CodeCompletionContext::CCC_Statement - 1)) - | (1LL << (CodeCompletionContext::CCC_Expression - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCMessageReceiver - 1)) - | (1LL << (CodeCompletionContext::CCC_DotMemberAccess - 1)) - | (1LL << (CodeCompletionContext::CCC_ArrowMemberAccess - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCPropertyAccess - 1)) - | (1LL << (CodeCompletionContext::CCC_ObjCProtocolName - 1)) - | (1LL << (CodeCompletionContext::CCC_ParenthesizedExpression - 1)) - | (1LL << (CodeCompletionContext::CCC_Recovery - 1)); + = (1LL << CodeCompletionContext::CCC_TopLevel) + | (1LL << CodeCompletionContext::CCC_ObjCInterface) + | (1LL << CodeCompletionContext::CCC_ObjCImplementation) + | (1LL << CodeCompletionContext::CCC_ObjCIvarList) + | (1LL << CodeCompletionContext::CCC_Statement) + | (1LL << CodeCompletionContext::CCC_Expression) + | (1LL << CodeCompletionContext::CCC_ObjCMessageReceiver) + | (1LL << CodeCompletionContext::CCC_DotMemberAccess) + | (1LL << CodeCompletionContext::CCC_ArrowMemberAccess) + | (1LL << CodeCompletionContext::CCC_ObjCPropertyAccess) + | (1LL << CodeCompletionContext::CCC_ObjCProtocolName) + | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression) + | (1LL << CodeCompletionContext::CCC_Recovery); if (AST.getASTContext().getLangOpts().CPlusPlus) - NormalContexts |= (1LL << (CodeCompletionContext::CCC_EnumTag - 1)) - | (1LL << (CodeCompletionContext::CCC_UnionTag - 1)) - | (1LL << (CodeCompletionContext::CCC_ClassOrStructTag - 1)); + NormalContexts |= (1LL << CodeCompletionContext::CCC_EnumTag) + | (1LL << CodeCompletionContext::CCC_UnionTag) + | (1LL << CodeCompletionContext::CCC_ClassOrStructTag); } virtual void ProcessCodeCompleteResults(Sema &S, @@ -2180,9 +2206,9 @@ void AugmentedCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &S, unsigned NumResults) { // Merge the results we were given with the results we cached. bool AddedResult = false; - unsigned InContexts - = (Context.getKind() == CodeCompletionContext::CCC_Recovery? NormalContexts - : (1ULL << (Context.getKind() - 1))); + uint64_t InContexts = + Context.getKind() == CodeCompletionContext::CCC_Recovery + ? NormalContexts : (1LL << Context.getKind()); // Contains the set of names that are hidden by "local" completion results. llvm::StringSet HiddenNames; typedef CodeCompletionResult Result; @@ -2273,6 +2299,7 @@ void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column, unsigned NumRemappedFiles, bool IncludeMacros, bool IncludeCodePatterns, + bool IncludeBriefComments, CodeCompleteConsumer &Consumer, DiagnosticsEngine &Diag, LangOptions &LangOpts, SourceManager &SourceMgr, FileManager &FileMgr, @@ -2289,13 +2316,17 @@ void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column, CCInvocation(new CompilerInvocation(*Invocation)); FrontendOptions &FrontendOpts = CCInvocation->getFrontendOpts(); + CodeCompleteOptions &CodeCompleteOpts = FrontendOpts.CodeCompleteOpts; PreprocessorOptions &PreprocessorOpts = CCInvocation->getPreprocessorOpts(); - FrontendOpts.ShowMacrosInCodeCompletion - = IncludeMacros && CachedCompletionResults.empty(); - FrontendOpts.ShowCodePatternsInCodeCompletion = IncludeCodePatterns; - FrontendOpts.ShowGlobalSymbolsInCodeCompletion - = CachedCompletionResults.empty(); + CodeCompleteOpts.IncludeMacros = IncludeMacros && + CachedCompletionResults.empty(); + CodeCompleteOpts.IncludeCodePatterns = IncludeCodePatterns; + CodeCompleteOpts.IncludeGlobals = CachedCompletionResults.empty(); + CodeCompleteOpts.IncludeBriefComments = IncludeBriefComments; + + assert(IncludeBriefComments == this->IncludeBriefCommentsInCodeCompletion); + FrontendOpts.CodeCompletionAt.FileName = File; FrontendOpts.CodeCompletionAt.Line = Line; FrontendOpts.CodeCompletionAt.Column = Column; @@ -2364,10 +2395,7 @@ void ASTUnit::CodeComplete(StringRef File, unsigned Line, unsigned Column, // Use the code completion consumer we were given, but adding any cached // code-completion results. AugmentedCodeCompleteConsumer *AugmentedConsumer - = new AugmentedCodeCompleteConsumer(*this, Consumer, - FrontendOpts.ShowMacrosInCodeCompletion, - FrontendOpts.ShowCodePatternsInCodeCompletion, - FrontendOpts.ShowGlobalSymbolsInCodeCompletion); + = new AugmentedCodeCompleteConsumer(*this, Consumer, CodeCompleteOpts); Clang->setCodeCompletionConsumer(AugmentedConsumer); Clang->getFrontendOpts().SkipFunctionBodies = true; diff --git a/lib/Frontend/CMakeLists.txt b/lib/Frontend/CMakeLists.txt index 2bee240..0566d54 100644 --- a/lib/Frontend/CMakeLists.txt +++ b/lib/Frontend/CMakeLists.txt @@ -1,14 +1,3 @@ -set( LLVM_USED_LIBS - clangAST - clangBasic - clangDriver - clangEdit - clangLex - clangParse - clangSema - clangSerialization - ) - add_clang_library(clangFrontend ASTConsumers.cpp ASTMerge.cpp @@ -41,21 +30,29 @@ add_clang_library(clangFrontend Warnings.cpp ) -IF(MSVC) - get_target_property(NON_ANSI_COMPILE_FLAGS clangFrontend COMPILE_FLAGS) - string(REPLACE /Za - "" NON_ANSI_COMPILE_FLAGS - ${NON_ANSI_COMPILE_FLAGS}) - set_target_properties(clangFrontend PROPERTIES COMPILE_FLAGS ${NON_ANSI_COMPILE_FLAGS}) -ENDIF(MSVC) - -add_dependencies(clangFrontend +add_dependencies(clangFrontend ClangAttrClasses ClangAttrList - ClangCC1Options - ClangDiagnosticFrontend + ClangAttrParsedAttrList + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticAST + ClangDiagnosticCommon + ClangDiagnosticDriver + ClangDiagnosticFrontend ClangDiagnosticLex ClangDiagnosticSema ClangDriverOptions - ClangDeclNodes - ClangStmtNodes) + ClangStmtNodes + ) + +target_link_libraries(clangFrontend + clangAST + clangBasic + clangDriver + clangEdit + clangLex + clangParse + clangSema + clangSerialization + ) diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp index 803e418..6de1531 100644 --- a/lib/Frontend/CompilerInstance.cpp +++ b/lib/Frontend/CompilerInstance.cpp @@ -387,9 +387,7 @@ void CompilerInstance::createCodeCompletionConsumer() { setCodeCompletionConsumer( createCodeCompletionConsumer(getPreprocessor(), Loc.FileName, Loc.Line, Loc.Column, - getFrontendOpts().ShowMacrosInCodeCompletion, - getFrontendOpts().ShowCodePatternsInCodeCompletion, - getFrontendOpts().ShowGlobalSymbolsInCodeCompletion, + getFrontendOpts().CodeCompleteOpts, llvm::outs())); if (!CompletionConsumer) return; @@ -415,16 +413,13 @@ CompilerInstance::createCodeCompletionConsumer(Preprocessor &PP, const std::string &Filename, unsigned Line, unsigned Column, - bool ShowMacros, - bool ShowCodePatterns, - bool ShowGlobals, + const CodeCompleteOptions &Opts, raw_ostream &OS) { if (EnableCodeCompletion(PP, Filename, Line, Column)) return 0; // Set up the creation routine for code-completion. - return new PrintingCodeCompleteConsumer(ShowMacros, ShowCodePatterns, - ShowGlobals, OS); + return new PrintingCodeCompleteConsumer(Opts, OS); } void CompilerInstance::createSema(TranslationUnitKind TUKind, @@ -456,7 +451,7 @@ void CompilerInstance::clearOutputFiles(bool EraseFiles) { FileMgr->FixupRelativePath(NewOutFile); if (llvm::error_code ec = llvm::sys::fs::rename(it->TempFilename, NewOutFile.str())) { - getDiagnostics().Report(diag::err_fe_unable_to_rename_temp) + getDiagnostics().Report(diag::err_unable_to_rename_temp) << it->TempFilename << it->Filename << ec.message(); bool existed; @@ -560,7 +555,8 @@ CompilerInstance::createOutputFile(StringRef OutputPath, TempPath += "-%%%%%%%%"; int fd; if (llvm::sys::fs::unique_file(TempPath.str(), fd, TempPath, - /*makeAbsolute=*/false) == llvm::errc::success) { + /*makeAbsolute=*/false, 0664) + == llvm::errc::success) { OS.reset(new llvm::raw_fd_ostream(fd, /*shouldClose=*/true)); OSFile = TempFile = TempPath.str(); } @@ -859,13 +855,6 @@ Module *CompilerInstance::loadModule(SourceLocation ImportLoc, } // Determine what file we're searching from. - SourceManager &SourceMgr = getSourceManager(); - SourceLocation ExpandedImportLoc = SourceMgr.getExpansionLoc(ImportLoc); - const FileEntry *CurFile - = SourceMgr.getFileEntryForID(SourceMgr.getFileID(ExpandedImportLoc)); - if (!CurFile) - CurFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); - StringRef ModuleName = Path[0].first->getName(); SourceLocation ModuleNameLoc = Path[0].second; diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 4c5b063..d39679c 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -13,7 +13,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Driver/Arg.h" #include "clang/Driver/ArgList.h" -#include "clang/Driver/CC1Options.h" +#include "clang/Driver/Options.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/OptTable.h" #include "clang/Driver/Option.h" @@ -181,8 +181,21 @@ static void AnalyzerOptsToArgs(const AnalyzerOptions &Opts, ToArgsList &Res) { } static void CodeGenOptsToArgs(const CodeGenOptions &Opts, ToArgsList &Res) { - if (Opts.DebugInfo) - Res.push_back("-g"); + switch (Opts.DebugInfo) { + case CodeGenOptions::NoDebugInfo: + break; + case CodeGenOptions::DebugLineTablesOnly: + Res.push_back("-gline-tables-only"); + break; + case CodeGenOptions::LimitedDebugInfo: + Res.push_back("-g"); + Res.push_back("-flimit-debug-info"); + break; + case CodeGenOptions::FullDebugInfo: + Res.push_back("-g"); + Res.push_back("-fno-limit-debug-info"); + break; + } if (Opts.DisableLLVMOpts) Res.push_back("-disable-llvm-optzns"); if (Opts.DisableRedZone) @@ -193,14 +206,12 @@ static void CodeGenOptsToArgs(const CodeGenOptions &Opts, ToArgsList &Res) { Res.push_back("-fdebug-compilation-dir", Opts.DebugCompilationDir); if (!Opts.DwarfDebugFlags.empty()) Res.push_back("-dwarf-debug-flags", Opts.DwarfDebugFlags); - if (Opts.ObjCRuntimeHasARC) - Res.push_back("-fobjc-runtime-has-arc"); - if (Opts.ObjCRuntimeHasTerminate) - Res.push_back("-fobjc-runtime-has-terminate"); if (Opts.EmitGcovArcs) Res.push_back("-femit-coverage-data"); if (Opts.EmitGcovNotes) Res.push_back("-femit-coverage-notes"); + if (Opts.EmitOpenCLArgMetadata) + Res.push_back("-cl-kernel-arg-info"); if (!Opts.MergeAllConstants) Res.push_back("-fno-merge-all-constants"); if (Opts.NoCommon) @@ -270,6 +281,8 @@ static void CodeGenOptsToArgs(const CodeGenOptions &Opts, ToArgsList &Res) { Res.push_back("-fobjc-dispatch-method=non-legacy"); break; } + if (Opts.BoundsChecking > 0) + Res.push_back("-fbounds-checking=" + llvm::utostr(Opts.BoundsChecking)); if (Opts.NumRegisterParameters) Res.push_back("-mregparm", llvm::utostr(Opts.NumRegisterParameters)); if (Opts.NoGlobalMerge) @@ -296,6 +309,20 @@ static void CodeGenOptsToArgs(const CodeGenOptions &Opts, ToArgsList &Res) { Res.push_back("-disable-llvm-verifier"); for (unsigned i = 0, e = Opts.BackendOptions.size(); i != e; ++i) Res.push_back("-backend-option", Opts.BackendOptions[i]); + + switch (Opts.DefaultTLSModel) { + case CodeGenOptions::GeneralDynamicTLSModel: + break; + case CodeGenOptions::LocalDynamicTLSModel: + Res.push_back("-ftls-model=local-dynamic"); + break; + case CodeGenOptions::InitialExecTLSModel: + Res.push_back("-ftls-model=initial-exec"); + break; + case CodeGenOptions::LocalExecTLSModel: + Res.push_back("-ftls-model=local-exec"); + break; + } } static void DependencyOutputOptsToArgs(const DependencyOutputOptions &Opts, @@ -407,6 +434,7 @@ static const char *getActionName(frontend::ActionKind Kind) { case frontend::PluginAction: llvm_unreachable("Invalid kind!"); + case frontend::ASTDeclList: return "-ast-list"; case frontend::ASTDump: return "-ast-dump"; case frontend::ASTDumpXML: return "-ast-dump-xml"; case frontend::ASTPrint: return "-ast-print"; @@ -445,6 +473,18 @@ static void FileSystemOptsToArgs(const FileSystemOptions &Opts, ToArgsList &Res) Res.push_back("-working-directory", Opts.WorkingDir); } +static void CodeCompleteOptionsToArgs(const CodeCompleteOptions &Opts, + ToArgsList &Res) { + if (Opts.IncludeMacros) + Res.push_back("-code-completion-macros"); + if (Opts.IncludeCodePatterns) + Res.push_back("-code-completion-patterns"); + if (!Opts.IncludeGlobals) + Res.push_back("-no-code-completion-globals"); + if (Opts.IncludeBriefComments) + Res.push_back("-code-completion-brief-comments"); +} + static void FrontendOptsToArgs(const FrontendOptions &Opts, ToArgsList &Res) { if (Opts.DisableFree) Res.push_back("-disable-free"); @@ -452,12 +492,6 @@ static void FrontendOptsToArgs(const FrontendOptions &Opts, ToArgsList &Res) { Res.push_back("-relocatable-pch"); if (Opts.ShowHelp) Res.push_back("-help"); - if (Opts.ShowMacrosInCodeCompletion) - Res.push_back("-code-completion-macros"); - if (Opts.ShowCodePatternsInCodeCompletion) - Res.push_back("-code-completion-patterns"); - if (!Opts.ShowGlobalSymbolsInCodeCompletion) - Res.push_back("-no-code-completion-globals"); if (Opts.ShowStats) Res.push_back("-print-stats"); if (Opts.ShowTimers) @@ -485,6 +519,7 @@ static void FrontendOptsToArgs(const FrontendOptions &Opts, ToArgsList &Res) { Res.push_back("-arcmt-migrate"); break; } + CodeCompleteOptionsToArgs(Opts.CodeCompleteOpts, Res); if (!Opts.MTMigrateDir.empty()) Res.push_back("-mt-migrate-directory", Opts.MTMigrateDir); if (!Opts.ARCMTMigrateReportOut.empty()) @@ -524,6 +559,8 @@ static void FrontendOptsToArgs(const FrontendOptions &Opts, ToArgsList &Res) { for(unsigned i = 0, e = Opts.PluginArgs.size(); i != e; ++i) Res.push_back("-plugin-arg-" + Opts.ActionName, Opts.PluginArgs[i]); } + if (!Opts.ASTDumpFilter.empty()) + Res.push_back("-ast-dump-filter", Opts.ASTDumpFilter); for (unsigned i = 0, e = Opts.Plugins.size(); i != e; ++i) Res.push_back("-load", Opts.Plugins[i]); for (unsigned i = 0, e = Opts.AddPluginActions.size(); i != e; ++i) { @@ -608,6 +645,16 @@ static void HeaderSearchOptsToArgs(const HeaderSearchOptions &Opts, Res.push_back(E.Path); } + /// User-specified system header prefixes. + for (unsigned i = 0, e = Opts.SystemHeaderPrefixes.size(); i != e; ++i) { + if (Opts.SystemHeaderPrefixes[i].IsSystemHeader) + Res.push_back("-isystem-prefix"); + else + Res.push_back("-ino-system-prefix"); + + Res.push_back(Opts.SystemHeaderPrefixes[i].Prefix); + } + if (!Opts.ResourceDir.empty()) Res.push_back("-resource-dir", Opts.ResourceDir); if (!Opts.ModuleCachePath.empty()) @@ -653,8 +700,6 @@ static void LangOptsToArgs(const LangOptions &Opts, ToArgsList &Res) { Res.push_back("-fmsc-version=" + llvm::utostr(Opts.MSCVersion)); if (Opts.Borland) Res.push_back("-fborland-extensions"); - if (!Opts.ObjCNonFragileABI) - Res.push_back("-fobjc-fragile-abi"); if (Opts.ObjCDefaultSynthProperties) Res.push_back("-fobjc-default-synthesize-properties"); // NoInline is implicit. @@ -690,8 +735,6 @@ static void LangOptsToArgs(const LangOptions &Opts, ToArgsList &Res) { Res.push_back("-fno-rtti"); if (Opts.MSBitfields) Res.push_back("-mms-bitfields"); - if (!Opts.NeXTRuntime) - Res.push_back("-fgnu-runtime"); if (Opts.Freestanding) Res.push_back("-ffreestanding"); if (Opts.NoBuiltin) @@ -721,6 +764,11 @@ static void LangOptsToArgs(const LangOptions &Opts, ToArgsList &Res) { Res.push_back("-ftrapv-handler", Opts.OverflowHandler); break; } + switch (Opts.getFPContractMode()) { + case LangOptions::FPC_Off: Res.push_back("-ffp-contract=off"); break; + case LangOptions::FPC_On: Res.push_back("-ffp-contract=on"); break; + case LangOptions::FPC_Fast: Res.push_back("-ffp-contract=fast"); break; + } if (Opts.HeinousExtensions) Res.push_back("-fheinous-gnu-extensions"); // Optimize is implicit. @@ -761,6 +809,7 @@ static void LangOptsToArgs(const LangOptions &Opts, ToArgsList &Res) { Res.push_back("-fobjc-gc-only"); } } + Res.push_back("-fobjc-runtime=" + Opts.ObjCRuntime.getAsString()); if (Opts.ObjCAutoRefCount) Res.push_back("-fobjc-arc"); if (Opts.ObjCRuntimeHasWeak) @@ -770,7 +819,7 @@ static void LangOptsToArgs(const LangOptions &Opts, ToArgsList &Res) { if (Opts.AppleKext) Res.push_back("-fapple-kext"); - + if (Opts.getVisibilityMode() != DefaultVisibility) { Res.push_back("-fvisibility"); if (Opts.getVisibilityMode() == HiddenVisibility) { @@ -880,7 +929,7 @@ static void TargetOptsToArgs(const TargetOptions &Opts, Res.push_back("-target-feature", Opts.Features[i]); } -void CompilerInvocation::toArgs(std::vector &Res) { +void CompilerInvocation::toArgs(std::vector &Res) const { ToArgsList List(Res); AnalyzerOptsToArgs(getAnalyzerOpts(), List); CodeGenOptsToArgs(getCodeGenOpts(), List); @@ -900,7 +949,7 @@ void CompilerInvocation::toArgs(std::vector &Res) { //===----------------------------------------------------------------------===// using namespace clang::driver; -using namespace clang::driver::cc1options; +using namespace clang::driver::options; // @@ -909,14 +958,66 @@ static unsigned getOptimizationLevel(ArgList &Args, InputKind IK, unsigned DefaultOpt = 0; if (IK == IK_OpenCL && !Args.hasArg(OPT_cl_opt_disable)) DefaultOpt = 2; - // -Os/-Oz implies -O2 - return (Args.hasArg(OPT_Os) || Args.hasArg (OPT_Oz)) ? 2 : - Args.getLastArgIntValue(OPT_O, DefaultOpt, Diags); + + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O0)) + return 0; + + assert (A->getOption().matches(options::OPT_O)); + + llvm::StringRef S(A->getValue(Args)); + if (S == "s" || S == "z" || S.empty()) + return 2; + + return Args.getLastArgIntValue(OPT_O, DefaultOpt, Diags); + } + + return DefaultOpt; +} + +static unsigned getOptimizationLevelSize(ArgList &Args, InputKind IK, + DiagnosticsEngine &Diags) { + if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { + if (A->getOption().matches(options::OPT_O)) { + switch (A->getValue(Args)[0]) { + default: + return 0; + case 's': + return 1; + case 'z': + return 2; + } + } + } + return 0; +} + +static void addWarningArgs(ArgList &Args, std::vector &Warnings) { + for (arg_iterator I = Args.filtered_begin(OPT_W_Group), + E = Args.filtered_end(); I != E; ++I) { + Arg *A = *I; + // If the argument is a pure flag, add its name (minus the "-W" at the beginning) + // to the warning list. Else, add its value (for the OPT_W case). + if (A->getOption().getKind() == Option::FlagClass) { + Warnings.push_back(A->getOption().getName().substr(2)); + } else { + for (unsigned Idx = 0, End = A->getNumValues(); + Idx < End; ++Idx) { + StringRef V = A->getValue(Args, Idx); + // "-Wl," and such are not warning options. + // FIXME: Should be handled by putting these in separate flags. + if (V.startswith("l,") || V.startswith("a,") || V.startswith("p,")) + continue; + + Warnings.push_back(V); + } + } + } } static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { - using namespace cc1options; + using namespace options; bool Success = true; if (Arg *A = Args.getLastArg(OPT_analyzer_store)) { StringRef Name = A->getValue(Args); @@ -1026,7 +1127,6 @@ static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, Opts.AnalyzeSpecificFunction = Args.getLastArgValue(OPT_analyze_function); Opts.UnoptimizedCFG = Args.hasArg(OPT_analysis_UnoptimizedCFG); Opts.CFGAddImplicitDtors = Args.hasArg(OPT_analysis_CFGAddImplicitDtors); - Opts.CFGAddInitializers = Args.hasArg(OPT_analysis_CFGAddInitializers); Opts.TrimGraph = Args.hasArg(OPT_trim_egraph); Opts.MaxNodes = Args.getLastArgIntValue(OPT_analyzer_max_nodes, 150000,Diags); Opts.MaxLoop = Args.getLastArgIntValue(OPT_analyzer_max_loop, 4, Diags); @@ -1066,7 +1166,7 @@ static bool ParseMigratorArgs(MigratorOptions &Opts, ArgList &Args) { static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, DiagnosticsEngine &Diags) { - using namespace cc1options; + using namespace options; bool Success = true; unsigned OptLevel = getOptimizationLevel(Args, IK, Diags); @@ -1083,12 +1183,18 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, : CodeGenOptions::OnlyAlwaysInlining; // -fno-inline-functions overrides OptimizationLevel > 1. Opts.NoInline = Args.hasArg(OPT_fno_inline); - Opts.Inlining = Args.hasArg(OPT_fno_inline_functions) ? + Opts.Inlining = Args.hasArg(OPT_fno_inline_functions) ? CodeGenOptions::OnlyAlwaysInlining : Opts.Inlining; - Opts.DebugInfo = Args.hasArg(OPT_g); - Opts.LimitDebugInfo = !Args.hasArg(OPT_fno_limit_debug_info) - || Args.hasArg(OPT_flimit_debug_info); + if (Args.hasArg(OPT_gline_tables_only)) { + Opts.DebugInfo = CodeGenOptions::DebugLineTablesOnly; + } else if (Args.hasArg(OPT_g_Flag)) { + if (Args.hasFlag(OPT_flimit_debug_info, OPT_fno_limit_debug_info, true)) + Opts.DebugInfo = CodeGenOptions::LimitedDebugInfo; + else + Opts.DebugInfo = CodeGenOptions::FullDebugInfo; + } + Opts.DisableLLVMOpts = Args.hasArg(OPT_disable_llvm_optzns); Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone); Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables); @@ -1099,8 +1205,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.MergeAllConstants = !Args.hasArg(OPT_fno_merge_all_constants); Opts.NoCommon = Args.hasArg(OPT_fno_common); Opts.NoImplicitFloat = Args.hasArg(OPT_no_implicit_float); - Opts.OptimizeSize = Args.hasArg(OPT_Os); - Opts.OptimizeSize = Args.hasArg(OPT_Oz) ? 2 : Opts.OptimizeSize; + Opts.OptimizeSize = getOptimizationLevelSize(Args, IK, Diags); Opts.SimplifyLibCalls = !(Args.hasArg(OPT_fno_builtin) || Args.hasArg(OPT_ffreestanding)); Opts.UnrollLoops = Args.hasArg(OPT_funroll_loops) || @@ -1108,8 +1213,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.AsmVerbose = Args.hasArg(OPT_masm_verbose); Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions); - Opts.ObjCRuntimeHasARC = Args.hasArg(OPT_fobjc_runtime_has_arc); - Opts.ObjCRuntimeHasTerminate = Args.hasArg(OPT_fobjc_runtime_has_terminate); Opts.CUDAIsDevice = Args.hasArg(OPT_fcuda_is_device); Opts.CXAAtExit = !Args.hasArg(OPT_fno_use_cxa_atexit); Opts.CXXCtorDtorAliases = Args.hasArg(OPT_mconstructor_aliases); @@ -1145,6 +1248,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.UnwindTables = Args.hasArg(OPT_munwind_tables); Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic"); Opts.TrapFuncName = Args.getLastArgValue(OPT_ftrap_function_EQ); + Opts.BoundsChecking = Args.getLastArgIntValue(OPT_fbounds_checking_EQ, 0, + Diags); + Opts.UseInitArray = Args.hasArg(OPT_fuse_init_array); Opts.FunctionSections = Args.hasArg(OPT_ffunction_sections); Opts.DataSections = Args.hasArg(OPT_fdata_sections); @@ -1156,6 +1262,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.InstrumentForProfiling = Args.hasArg(OPT_pg); Opts.EmitGcovArcs = Args.hasArg(OPT_femit_coverage_data); Opts.EmitGcovNotes = Args.hasArg(OPT_femit_coverage_notes); + Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info); + Opts.EmitMicrosoftInlineAsm = Args.hasArg(OPT_fenable_experimental_ms_inline_asm); Opts.CoverageFile = Args.getLastArgValue(OPT_coverage_file); Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir); Opts.LinkBitcodeFile = Args.getLastArgValue(OPT_mlink_bitcode_file); @@ -1180,12 +1288,28 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, } } + if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) { + StringRef Name = A->getValue(Args); + unsigned Model = llvm::StringSwitch(Name) + .Case("global-dynamic", CodeGenOptions::GeneralDynamicTLSModel) + .Case("local-dynamic", CodeGenOptions::LocalDynamicTLSModel) + .Case("initial-exec", CodeGenOptions::InitialExecTLSModel) + .Case("local-exec", CodeGenOptions::LocalExecTLSModel) + .Default(~0U); + if (Model == ~0U) { + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; + Success = false; + } else { + Opts.DefaultTLSModel = static_cast(Model); + } + } + return Success; } static void ParseDependencyOutputArgs(DependencyOutputOptions &Opts, ArgList &Args) { - using namespace cc1options; + using namespace options; Opts.OutputFile = Args.getLastArgValue(OPT_dependency_file); Opts.Targets = Args.getAllArgValues(OPT_MT); Opts.IncludeSystemHeaders = Args.hasArg(OPT_sys_header_deps); @@ -1198,7 +1322,7 @@ static void ParseDependencyOutputArgs(DependencyOutputOptions &Opts, bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, DiagnosticsEngine *Diags) { - using namespace cc1options; + using namespace options; bool Success = true; Opts.DiagnosticLogFile = Args.getLastArgValue(OPT_diagnostic_log_file); @@ -1273,6 +1397,8 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, Opts.ShowSourceRanges = Args.hasArg(OPT_fdiagnostics_print_source_range_info); Opts.ShowParseableFixits = Args.hasArg(OPT_fdiagnostics_parseable_fixits); Opts.VerifyDiagnostics = Args.hasArg(OPT_verify); + Opts.ElideType = !Args.hasArg(OPT_fno_elide_type); + Opts.ShowTemplateTree = Args.hasArg(OPT_fdiagnostics_show_template_tree); Opts.ErrorLimit = Args.getLastArgIntValue(OPT_ferror_limit, 0, Diags); Opts.MacroBacktraceLimit = Args.getLastArgIntValue(OPT_fmacro_backtrace_limit, @@ -1295,16 +1421,7 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, } Opts.MessageLength = Args.getLastArgIntValue(OPT_fmessage_length, 0, Diags); Opts.DumpBuildInformation = Args.getLastArgValue(OPT_dump_build_information); - - for (arg_iterator it = Args.filtered_begin(OPT_W), - ie = Args.filtered_end(); it != ie; ++it) { - StringRef V = (*it)->getValue(Args); - // "-Wl," and such are not warnings options. - if (V.startswith("l,") || V.startswith("a,") || V.startswith("p,")) - continue; - - Opts.Warnings.push_back(V); - } + addWarningArgs(Args, Opts.Warnings); return Success; } @@ -1315,12 +1432,14 @@ static void ParseFileSystemArgs(FileSystemOptions &Opts, ArgList &Args) { static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { - using namespace cc1options; + using namespace options; Opts.ProgramAction = frontend::ParseSyntaxOnly; if (const Arg *A = Args.getLastArg(OPT_Action_Group)) { switch (A->getOption().getID()) { default: llvm_unreachable("Invalid option in group!"); + case OPT_ast_list: + Opts.ProgramAction = frontend::ASTDeclList; break; case OPT_ast_dump: Opts.ProgramAction = frontend::ASTDump; break; case OPT_ast_dump_xml: @@ -1418,11 +1537,6 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, Opts.Plugins = Args.getAllArgValues(OPT_load); Opts.RelocatablePCH = Args.hasArg(OPT_relocatable_pch); Opts.ShowHelp = Args.hasArg(OPT_help); - Opts.ShowMacrosInCodeCompletion = Args.hasArg(OPT_code_completion_macros); - Opts.ShowCodePatternsInCodeCompletion - = Args.hasArg(OPT_code_completion_patterns); - Opts.ShowGlobalSymbolsInCodeCompletion - = !Args.hasArg(OPT_no_code_completion_globals); Opts.ShowStats = Args.hasArg(OPT_print_stats); Opts.ShowTimers = Args.hasArg(OPT_ftime_report); Opts.ShowVersion = Args.hasArg(OPT_version); @@ -1432,6 +1546,17 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, Opts.FixOnlyWarnings = Args.hasArg(OPT_fix_only_warnings); Opts.FixAndRecompile = Args.hasArg(OPT_fixit_recompile); Opts.FixToTemporaries = Args.hasArg(OPT_fixit_to_temp); + Opts.ASTDumpFilter = Args.getLastArgValue(OPT_ast_dump_filter); + + Opts.CodeCompleteOpts.IncludeMacros + = Args.hasArg(OPT_code_completion_macros); + Opts.CodeCompleteOpts.IncludeCodePatterns + = Args.hasArg(OPT_code_completion_patterns); + Opts.CodeCompleteOpts.IncludeGlobals + = !Args.hasArg(OPT_no_code_completion_globals); + Opts.CodeCompleteOpts.IncludeBriefComments + = Args.hasArg(OPT_code_completion_brief_comments); + Opts.OverrideRecordLayoutsFile = Args.getLastArgValue(OPT_foverride_record_layout_EQ); if (const Arg *A = Args.getLastArg(OPT_arcmt_check, @@ -1535,7 +1660,7 @@ std::string CompilerInvocation::GetResourcesPath(const char *Argv0, } static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) { - using namespace cc1options; + using namespace options; Opts.Sysroot = Args.getLastArgValue(OPT_isysroot, "/"); Opts.Verbose = Args.hasArg(OPT_v); Opts.UseBuiltinIncludes = !Args.hasArg(OPT_nobuiltininc); @@ -1620,6 +1745,14 @@ static void ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args) { Opts.AddPath((*I)->getValue(Args), frontend::System, false, false, /*IgnoreSysRoot=*/true, /*IsInternal=*/true, (*I)->getOption().matches(OPT_internal_externc_isystem)); + + // Add the path prefixes which are implicitly treated as being system headers. + for (arg_iterator I = Args.filtered_begin(OPT_isystem_prefix, + OPT_ino_system_prefix), + E = Args.filtered_end(); + I != E; ++I) + Opts.AddSystemHeaderPrefix((*I)->getValue(Args), + (*I)->getOption().matches(OPT_isystem_prefix)); } void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, @@ -1677,9 +1810,22 @@ void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, Opts.HexFloats = Std.hasHexFloats(); Opts.ImplicitInt = Std.hasImplicitInt(); - // OpenCL has some additional defaults. + // Set OpenCL Version. if (LangStd == LangStandard::lang_opencl) { Opts.OpenCL = 1; + Opts.OpenCLVersion = 100; + } + else if (LangStd == LangStandard::lang_opencl11) { + Opts.OpenCL = 1; + Opts.OpenCLVersion = 110; + } + else if (LangStd == LangStandard::lang_opencl12) { + Opts.OpenCL = 1; + Opts.OpenCLVersion = 120; + } + + // OpenCL has some additional defaults. + if (Opts.OpenCL) { Opts.AltiVec = 0; Opts.CXXOperatorNames = 1; Opts.LaxVectorConversions = 0; @@ -1751,13 +1897,24 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, } } + // -cl-std only applies for OpenCL language standards. + // Override the -std option in this case. if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { - if (strcmp(A->getValue(Args), "CL1.1") != 0) { + LangStandard::Kind OpenCLLangStd + = llvm::StringSwitch(A->getValue(Args)) + .Case("CL", LangStandard::lang_opencl) + .Case("CL1.1", LangStandard::lang_opencl11) + .Case("CL1.2", LangStandard::lang_opencl12) + .Default(LangStandard::lang_unspecified); + + if (OpenCLLangStd == LangStandard::lang_unspecified) { Diags.Report(diag::err_drv_invalid_value) - << A->getAsString(Args) << A->getValue(Args); + << A->getAsString(Args) << A->getValue(Args); } + else + LangStd = OpenCLLangStd; } - + CompilerInvocation::setLangDefaults(Opts, IK, LangStd); // We abuse '-f[no-]gnu-keywords' to force overriding all GNU-extension @@ -1772,16 +1929,23 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.CXXOperatorNames = 0; if (Opts.ObjC1) { + if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { + StringRef value = arg->getValue(Args); + if (Opts.ObjCRuntime.tryParse(value)) + Diags.Report(diag::err_drv_unknown_objc_runtime) << value; + } + if (Args.hasArg(OPT_fobjc_gc_only)) Opts.setGC(LangOptions::GCOnly); else if (Args.hasArg(OPT_fobjc_gc)) Opts.setGC(LangOptions::HybridGC); else if (Args.hasArg(OPT_fobjc_arc)) { Opts.ObjCAutoRefCount = 1; - if (Args.hasArg(OPT_fobjc_fragile_abi)) + if (!Opts.ObjCRuntime.isNonFragile()) Diags.Report(diag::err_arc_nonfragile_abi); } + Opts.ObjCRuntimeHasWeak = Opts.ObjCRuntime.hasWeak(); if (Args.hasArg(OPT_fobjc_runtime_has_weak)) Opts.ObjCRuntimeHasWeak = 1; @@ -1825,6 +1989,18 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Diags.Report(diag::err_drv_invalid_value) << Args.getLastArg(OPT_fvisibility)->getAsString(Args) << Vis; + if (Arg *A = Args.getLastArg(OPT_ffp_contract)) { + StringRef Val = A->getValue(Args); + if (Val == "fast") + Opts.setFPContractMode(LangOptions::FPC_Fast); + else if (Val == "on") + Opts.setFPContractMode(LangOptions::FPC_On); + else if (Val == "off") + Opts.setFPContractMode(LangOptions::FPC_Off); + else + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; + } + if (Args.hasArg(OPT_fvisibility_inlines_hidden)) Opts.InlineVisibilityHidden = 1; @@ -1876,25 +2052,21 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.AccessControl = !Args.hasArg(OPT_fno_access_control); Opts.ElideConstructors = !Args.hasArg(OPT_fno_elide_constructors); Opts.MathErrno = Args.hasArg(OPT_fmath_errno); - Opts.InstantiationDepth = Args.getLastArgIntValue(OPT_ftemplate_depth, 1024, + Opts.InstantiationDepth = Args.getLastArgIntValue(OPT_ftemplate_depth, 512, Diags); Opts.ConstexprCallDepth = Args.getLastArgIntValue(OPT_fconstexpr_depth, 512, Diags); Opts.DelayedTemplateParsing = Args.hasArg(OPT_fdelayed_template_parsing); - Opts.NumLargeByValueCopy = Args.getLastArgIntValue(OPT_Wlarge_by_value_copy, + Opts.NumLargeByValueCopy = Args.getLastArgIntValue(OPT_Wlarge_by_value_copy_EQ, 0, Diags); Opts.MSBitfields = Args.hasArg(OPT_mms_bitfields); - Opts.NeXTRuntime = !Args.hasArg(OPT_fgnu_runtime); Opts.ObjCConstantStringClass = Args.getLastArgValue(OPT_fconstant_string_class); - Opts.ObjCNonFragileABI = !Args.hasArg(OPT_fobjc_fragile_abi); - if (Opts.ObjCNonFragileABI) - Opts.ObjCNonFragileABI2 = true; Opts.ObjCDefaultSynthProperties = Args.hasArg(OPT_fobjc_default_synthesize_properties); Opts.CatchUndefined = Args.hasArg(OPT_fcatch_undefined_behavior); Opts.EmitAllDecls = Args.hasArg(OPT_femit_all_decls); - Opts.PackStruct = Args.getLastArgIntValue(OPT_fpack_struct, 0, Diags); + Opts.PackStruct = Args.getLastArgIntValue(OPT_fpack_struct_EQ, 0, Diags); Opts.PICLevel = Args.getLastArgIntValue(OPT_pic_level, 0, Diags); Opts.PIELevel = Args.getLastArgIntValue(OPT_pie_level, 0, Diags); Opts.Static = Args.hasArg(OPT_static_define); @@ -1924,9 +2096,10 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.Deprecated); // FIXME: Eliminate this dependency. - unsigned Opt = getOptimizationLevel(Args, IK, Diags); + unsigned Opt = getOptimizationLevel(Args, IK, Diags), + OptSize = getOptimizationLevelSize(Args, IK, Diags); Opts.Optimize = Opt != 0; - Opts.OptimizeSize = Args.hasArg(OPT_Os) || Args.hasArg(OPT_Oz); + Opts.OptimizeSize = OptSize != 0; // This is the __NO_INLINE__ define, which just depends on things like the // optimization level and -fno-inline, not actually whether the backend has @@ -1934,6 +2107,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.NoInlineDefine = !Opt || Args.hasArg(OPT_fno_inline); Opts.FastMath = Args.hasArg(OPT_ffast_math); + Opts.FiniteMathOnly = Args.hasArg(OPT_ffinite_math_only); unsigned SSP = Args.getLastArgIntValue(OPT_stack_protector, 0, Diags); switch (SSP) { @@ -1950,7 +2124,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, FileManager &FileMgr, DiagnosticsEngine &Diags) { - using namespace cc1options; + using namespace options; Opts.ImplicitPCHInclude = Args.getLastArgValue(OPT_include_pch); Opts.ImplicitPTHInclude = Args.getLastArgValue(OPT_include_pth); if (const Arg *A = Args.getLastArg(OPT_token_cache)) @@ -2052,16 +2226,17 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, ArgList &Args) { - using namespace cc1options; + using namespace options; Opts.ShowCPP = !Args.hasArg(OPT_dM); Opts.ShowComments = Args.hasArg(OPT_C); Opts.ShowLineMarkers = !Args.hasArg(OPT_P); Opts.ShowMacroComments = Args.hasArg(OPT_CC); Opts.ShowMacros = Args.hasArg(OPT_dM) || Args.hasArg(OPT_dD); + Opts.RewriteIncludes = Args.hasArg(OPT_frewrite_includes); } static void ParseTargetArgs(TargetOptions &Opts, ArgList &Args) { - using namespace cc1options; + using namespace options; Opts.ABI = Args.getLastArgValue(OPT_target_abi); Opts.CXXABI = Args.getLastArgValue(OPT_cxx_abi); Opts.CPU = Args.getLastArgValue(OPT_target_cpu); @@ -2083,7 +2258,7 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, bool Success = true; // Parse the arguments. - OwningPtr Opts(createCC1OptTable()); + OwningPtr Opts(createDriverOptTable()); unsigned MissingArgIndex, MissingArgCount; OwningPtr Args( Opts->ParseArgs(ArgBegin, ArgEnd,MissingArgIndex, MissingArgCount)); @@ -2102,6 +2277,15 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, Success = false; } + // Issue errors on arguments that are not valid for CC1. + for (ArgList::iterator I = Args->begin(), E = Args->end(); + I != E; ++I) { + if (!(*I)->getOption().isCC1Option()) { + Diags.Report(diag::err_drv_unknown_argument) << (*I)->getAsString(*Args); + Success = false; + } + } + Success = ParseAnalyzerArgs(Res.getAnalyzerOpts(), *Args, Diags) && Success; Success = ParseMigratorArgs(Res.getMigratorOpts(), *Args) && Success; ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), *Args); diff --git a/lib/Frontend/CreateInvocationFromCommandLine.cpp b/lib/Frontend/CreateInvocationFromCommandLine.cpp index b477ade..0aca86e 100644 --- a/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -43,13 +43,17 @@ clang::createInvocationFromCommandLine(ArrayRef ArgList, Args.push_back(""); // FIXME: Remove dummy argument. Args.insert(Args.end(), ArgList.begin(), ArgList.end()); - // FIXME: Find a cleaner way to force the driver into restricted modes. We - // also want to force it to use clang. + // FIXME: Find a cleaner way to force the driver into restricted modes. Args.push_back("-fsyntax-only"); // FIXME: We shouldn't have to pass in the path info. driver::Driver TheDriver("clang", llvm::sys::getDefaultTargetTriple(), "a.out", false, *Diags); + // Force driver to use clang. + // FIXME: This seems like a hack. Maybe the "Clang" tool subclass should be + // available for using it to get the arguments, thus avoiding the overkill + // of using the driver. + TheDriver.setForcedClangUse(); // Don't check that inputs exist, they may have been remapped. TheDriver.setCheckInputsExist(false); diff --git a/lib/Frontend/DiagnosticRenderer.cpp b/lib/Frontend/DiagnosticRenderer.cpp index 6c3bb1d..f052f90 100644 --- a/lib/Frontend/DiagnosticRenderer.cpp +++ b/lib/Frontend/DiagnosticRenderer.cpp @@ -22,56 +22,6 @@ #include using namespace clang; -/// Look through spelling locations for a macro argument expansion, and -/// if found skip to it so that we can trace the argument rather than the macros -/// in which that argument is used. If no macro argument expansion is found, -/// don't skip anything and return the starting location. -static SourceLocation skipToMacroArgExpansion(const SourceManager &SM, - SourceLocation StartLoc) { - for (SourceLocation L = StartLoc; L.isMacroID(); - L = SM.getImmediateSpellingLoc(L)) { - if (SM.isMacroArgExpansion(L)) - return L; - } - - // Otherwise just return initial location, there's nothing to skip. - return StartLoc; -} - -/// Gets the location of the immediate macro caller, one level up the stack -/// toward the initial macro typed into the source. -static SourceLocation getImmediateMacroCallerLoc(const SourceManager &SM, - SourceLocation Loc) { - if (!Loc.isMacroID()) return Loc; - - // When we have the location of (part of) an expanded parameter, its spelling - // location points to the argument as typed into the macro call, and - // therefore is used to locate the macro caller. - if (SM.isMacroArgExpansion(Loc)) - return SM.getImmediateSpellingLoc(Loc); - - // Otherwise, the caller of the macro is located where this macro is - // expanded (while the spelling is part of the macro definition). - return SM.getImmediateExpansionRange(Loc).first; -} - -/// Gets the location of the immediate macro callee, one level down the stack -/// toward the leaf macro. -static SourceLocation getImmediateMacroCalleeLoc(const SourceManager &SM, - SourceLocation Loc) { - if (!Loc.isMacroID()) return Loc; - - // When we have the location of (part of) an expanded parameter, its - // expansion location points to the unexpanded paramater reference within - // the macro definition (or callee). - if (SM.isMacroArgExpansion(Loc)) - return SM.getImmediateExpansionRange(Loc).first; - - // Otherwise, the callee of the macro is located where this location was - // spelled inside the macro definition. - return SM.getImmediateSpellingLoc(Loc); -} - /// \brief Retrieve the name of the immediate macro expansion. /// /// This routine starts from a source location, and finds the name of the macro @@ -109,24 +59,9 @@ static StringRef getImmediateMacroName(SourceLocation Loc, return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength); } -/// Get the presumed location of a diagnostic message. This computes the -/// presumed location for the top of any macro backtrace when present. -static PresumedLoc getDiagnosticPresumedLoc(const SourceManager &SM, - SourceLocation Loc) { - // This is a condensed form of the algorithm used by emitCaretDiagnostic to - // walk to the top of the macro call stack. - while (Loc.isMacroID()) { - Loc = skipToMacroArgExpansion(SM, Loc); - Loc = getImmediateMacroCallerLoc(SM, Loc); - } - - return SM.getPresumedLoc(Loc); -} - -DiagnosticRenderer::DiagnosticRenderer(const SourceManager &SM, - const LangOptions &LangOpts, +DiagnosticRenderer::DiagnosticRenderer(const LangOptions &LangOpts, const DiagnosticOptions &DiagOpts) -: SM(SM), LangOpts(LangOpts), DiagOpts(DiagOpts), LastLevel() {} +: LangOpts(LangOpts), DiagOpts(DiagOpts), LastLevel() {} DiagnosticRenderer::~DiagnosticRenderer() {} @@ -184,18 +119,23 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc, StringRef Message, ArrayRef Ranges, ArrayRef FixItHints, + const SourceManager *SM, DiagOrStoredDiag D) { + assert(SM || Loc.isInvalid()); beginDiagnostic(D, Level); - PresumedLoc PLoc = getDiagnosticPresumedLoc(SM, Loc); + PresumedLoc PLoc; + if (Loc.isValid()) { + PLoc = SM->getPresumedLocForDisplay(Loc); - // First, if this diagnostic is not in the main file, print out the - // "included from" lines. - emitIncludeStack(PLoc.getIncludeLoc(), Level); + // First, if this diagnostic is not in the main file, print out the + // "included from" lines. + emitIncludeStack(PLoc.getIncludeLoc(), Level, *SM); + } // Next, emit the actual diagnostic message. - emitDiagnosticMessage(Loc, PLoc, Level, Message, Ranges, D); + emitDiagnosticMessage(Loc, PLoc, Level, Message, Ranges, SM, D); // Only recurse if we have a valid location. if (Loc.isValid()) { @@ -205,7 +145,7 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc, llvm::SmallVector MergedFixits; if (!FixItHints.empty()) { - mergeFixits(FixItHints, SM, LangOpts, MergedFixits); + mergeFixits(FixItHints, *SM, LangOpts, MergedFixits); FixItHints = MergedFixits; } @@ -216,7 +156,7 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc, MutableRanges.push_back(I->RemoveRange); unsigned MacroDepth = 0; - emitMacroExpansionsAndCarets(Loc, Level, MutableRanges, FixItHints, + emitMacroExpansionsAndCarets(Loc, Level, MutableRanges, FixItHints, *SM, MacroDepth); } @@ -230,6 +170,8 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc, void DiagnosticRenderer::emitStoredDiagnostic(StoredDiagnostic &Diag) { emitDiagnostic(Diag.getLocation(), Diag.getLevel(), Diag.getMessage(), Diag.getRanges(), Diag.getFixIts(), + Diag.getLocation().isValid() ? &Diag.getLocation().getManager() + : 0, &Diag); } @@ -245,7 +187,8 @@ void DiagnosticRenderer::emitStoredDiagnostic(StoredDiagnostic &Diag) { /// \param Loc The include location of the current file (not the diagnostic /// location). void DiagnosticRenderer::emitIncludeStack(SourceLocation Loc, - DiagnosticsEngine::Level Level) { + DiagnosticsEngine::Level Level, + const SourceManager &SM) { // Skip redundant include stacks altogether. if (LastIncludeLoc == Loc) return; @@ -254,12 +197,13 @@ void DiagnosticRenderer::emitIncludeStack(SourceLocation Loc, if (!DiagOpts.ShowNoteIncludeStack && Level == DiagnosticsEngine::Note) return; - emitIncludeStackRecursively(Loc); + emitIncludeStackRecursively(Loc, SM); } /// \brief Helper to recursivly walk up the include stack and print each layer /// on the way back down. -void DiagnosticRenderer::emitIncludeStackRecursively(SourceLocation Loc) { +void DiagnosticRenderer::emitIncludeStackRecursively(SourceLocation Loc, + const SourceManager &SM) { if (Loc.isInvalid()) return; @@ -268,10 +212,10 @@ void DiagnosticRenderer::emitIncludeStackRecursively(SourceLocation Loc) { return; // Emit the other include frames first. - emitIncludeStackRecursively(PLoc.getIncludeLoc()); + emitIncludeStackRecursively(PLoc.getIncludeLoc(), SM); // Emit the inclusion text/note. - emitIncludeLocation(Loc, PLoc); + emitIncludeLocation(Loc, PLoc, SM); } /// \brief Recursively emit notes for each macro expansion and caret @@ -292,6 +236,7 @@ void DiagnosticRenderer::emitMacroExpansionsAndCarets( DiagnosticsEngine::Level Level, SmallVectorImpl& Ranges, ArrayRef Hints, + const SourceManager &SM, unsigned &MacroDepth, unsigned OnMacroInst) { @@ -302,26 +247,26 @@ void DiagnosticRenderer::emitMacroExpansionsAndCarets( if (Loc.isFileID()) { assert(MacroDepth == 0 && "We shouldn't hit a leaf node twice!"); MacroDepth = OnMacroInst; - emitCodeContext(Loc, Level, Ranges, Hints); + emitCodeContext(Loc, Level, Ranges, Hints, SM); return; } // Otherwise recurse through each macro expansion layer. // When processing macros, skip over the expansions leading up to // a macro argument, and trace the argument's expansion stack instead. - Loc = skipToMacroArgExpansion(SM, Loc); + Loc = SM.skipToMacroArgExpansion(Loc); - SourceLocation OneLevelUp = getImmediateMacroCallerLoc(SM, Loc); + SourceLocation OneLevelUp = SM.getImmediateMacroCallerLoc(Loc); // FIXME: Map ranges? - emitMacroExpansionsAndCarets(OneLevelUp, Level, Ranges, Hints, MacroDepth, + emitMacroExpansionsAndCarets(OneLevelUp, Level, Ranges, Hints, SM, MacroDepth, OnMacroInst + 1); // Save the original location so we can find the spelling of the macro call. SourceLocation MacroLoc = Loc; // Map the location. - Loc = getImmediateMacroCalleeLoc(SM, Loc); + Loc = SM.getImmediateMacroCalleeLoc(Loc); unsigned MacroSkipStart = 0, MacroSkipEnd = 0; if (MacroDepth > DiagOpts.MacroBacktraceLimit && @@ -341,9 +286,9 @@ void DiagnosticRenderer::emitMacroExpansionsAndCarets( I != E; ++I) { SourceLocation Start = I->getBegin(), End = I->getEnd(); if (Start.isMacroID()) - I->setBegin(getImmediateMacroCalleeLoc(SM, Start)); + I->setBegin(SM.getImmediateMacroCalleeLoc(Start)); if (End.isMacroID()) - I->setEnd(getImmediateMacroCalleeLoc(SM, End)); + I->setEnd(SM.getImmediateMacroCalleeLoc(End)); } if (Suppressed) { @@ -365,22 +310,22 @@ void DiagnosticRenderer::emitMacroExpansionsAndCarets( << getImmediateMacroName(MacroLoc, SM, LangOpts) << "'"; emitDiagnostic(SM.getSpellingLoc(Loc), DiagnosticsEngine::Note, Message.str(), - Ranges, ArrayRef()); + Ranges, ArrayRef(), &SM); } DiagnosticNoteRenderer::~DiagnosticNoteRenderer() {} void DiagnosticNoteRenderer::emitIncludeLocation(SourceLocation Loc, - PresumedLoc PLoc) { + PresumedLoc PLoc, + const SourceManager &SM) { // Generate a note indicating the include location. SmallString<200> MessageStorage; llvm::raw_svector_ostream Message(MessageStorage); Message << "in file included from " << PLoc.getFilename() << ':' << PLoc.getLine() << ":"; - emitNote(Loc, Message.str()); + emitNote(Loc, Message.str(), &SM); } void DiagnosticNoteRenderer::emitBasicNote(StringRef Message) { - emitNote(SourceLocation(), Message); + emitNote(SourceLocation(), Message, 0); } - diff --git a/lib/Frontend/FrontendAction.cpp b/lib/Frontend/FrontendAction.cpp index da4bdfa..a4321e7 100644 --- a/lib/Frontend/FrontendAction.cpp +++ b/lib/Frontend/FrontendAction.cpp @@ -83,31 +83,31 @@ public: } }; - /// \brief Checks deserialized declarations and emits error if a name - /// matches one given in command-line using -error-on-deserialized-decl. - class DeserializedDeclsChecker : public DelegatingDeserializationListener { - ASTContext &Ctx; - std::set NamesToCheck; - - public: - DeserializedDeclsChecker(ASTContext &Ctx, - const std::set &NamesToCheck, - ASTDeserializationListener *Previous) - : DelegatingDeserializationListener(Previous), - Ctx(Ctx), NamesToCheck(NamesToCheck) { } - - virtual void DeclRead(serialization::DeclID ID, const Decl *D) { - if (const NamedDecl *ND = dyn_cast(D)) - if (NamesToCheck.find(ND->getNameAsString()) != NamesToCheck.end()) { - unsigned DiagID - = Ctx.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, - "%0 was deserialized"); - Ctx.getDiagnostics().Report(Ctx.getFullLoc(D->getLocation()), DiagID) - << ND->getNameAsString(); - } - - DelegatingDeserializationListener::DeclRead(ID, D); - } +/// \brief Checks deserialized declarations and emits error if a name +/// matches one given in command-line using -error-on-deserialized-decl. +class DeserializedDeclsChecker : public DelegatingDeserializationListener { + ASTContext &Ctx; + std::set NamesToCheck; + +public: + DeserializedDeclsChecker(ASTContext &Ctx, + const std::set &NamesToCheck, + ASTDeserializationListener *Previous) + : DelegatingDeserializationListener(Previous), + Ctx(Ctx), NamesToCheck(NamesToCheck) { } + + virtual void DeclRead(serialization::DeclID ID, const Decl *D) { + if (const NamedDecl *ND = dyn_cast(D)) + if (NamesToCheck.find(ND->getNameAsString()) != NamesToCheck.end()) { + unsigned DiagID + = Ctx.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, + "%0 was deserialized"); + Ctx.getDiagnostics().Report(Ctx.getFullLoc(D->getLocation()), DiagID) + << ND->getNameAsString(); + } + + DelegatingDeserializationListener::DeclRead(ID, D); + } }; } // end anonymous namespace @@ -162,6 +162,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, setCurrentInput(Input); setCompilerInstance(&CI); + bool HasBegunSourceFile = false; if (!BeginInvocation(CI)) goto failure; @@ -214,6 +215,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, // Inform the diagnostic client we are processing a source file. CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), 0); + HasBegunSourceFile = true; // Initialize the action. if (!BeginSourceFileAction(CI, Input.File)) @@ -228,6 +230,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, // Inform the diagnostic client we are processing a source file. CI.getDiagnosticClient().BeginSourceFile(CI.getLangOpts(), &CI.getPreprocessor()); + HasBegunSourceFile = true; // Initialize the action. if (!BeginSourceFileAction(CI, Input.File)) @@ -309,13 +312,14 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI, CI.setFileManager(0); } - CI.getDiagnosticClient().EndSourceFile(); + if (HasBegunSourceFile) + CI.getDiagnosticClient().EndSourceFile(); setCurrentInput(FrontendInputFile()); setCompilerInstance(0); return false; } -void FrontendAction::Execute() { +bool FrontendAction::Execute() { CompilerInstance &CI = getCompilerInstance(); // Initialize the main file entry. This needs to be delayed until after PCH @@ -325,7 +329,7 @@ void FrontendAction::Execute() { getCurrentInput().IsSystem ? SrcMgr::C_System : SrcMgr::C_User)) - return; + return false; } if (CI.hasFrontendTimer()) { @@ -333,6 +337,8 @@ void FrontendAction::Execute() { ExecuteAction(); } else ExecuteAction(); + + return true; } void FrontendAction::EndSourceFile() { diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp index 737ee4a..24960cf 100644 --- a/lib/Frontend/FrontendActions.cpp +++ b/lib/Frontend/FrontendActions.cpp @@ -47,13 +47,18 @@ void InitOnlyAction::ExecuteAction() { ASTConsumer *ASTPrintAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (raw_ostream *OS = CI.createDefaultOutputFile(false, InFile)) - return CreateASTPrinter(OS); + return CreateASTPrinter(OS, CI.getFrontendOpts().ASTDumpFilter); return 0; } ASTConsumer *ASTDumpAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { - return CreateASTDumper(); + return CreateASTDumper(CI.getFrontendOpts().ASTDumpFilter); +} + +ASTConsumer *ASTDeclListAction::CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) { + return CreateASTDeclNodeLister(); } ASTConsumer *ASTDumpXMLAction::CreateASTConsumer(CompilerInstance &CI, @@ -131,7 +136,7 @@ ASTConsumer *GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI, /// /// \param Module The module we're collecting includes from. /// -/// \param Includes Will be augmented with the set of #includes or #imports +/// \param Includes Will be augmented with the set of \#includes or \#imports /// needed to load all of the named headers. static void collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr, diff --git a/lib/Frontend/InitHeaderSearch.cpp b/lib/Frontend/InitHeaderSearch.cpp index 3f7e682..8178f7a 100644 --- a/lib/Frontend/InitHeaderSearch.cpp +++ b/lib/Frontend/InitHeaderSearch.cpp @@ -40,6 +40,7 @@ class InitHeaderSearch { std::vector > IncludePath; typedef std::vector >::const_iterator path_iterator; + std::vector > SystemHeaderPrefixes; HeaderSearch &Headers; bool Verbose; std::string IncludeSysroot; @@ -57,6 +58,12 @@ public: bool isCXXAware, bool isUserSupplied, bool isFramework, bool IgnoreSysRoot = false); + /// AddSystemHeaderPrefix - Add the specified prefix to the system header + /// prefix list. + void AddSystemHeaderPrefix(StringRef Prefix, bool IsSystemHeader) { + SystemHeaderPrefixes.push_back(std::make_pair(Prefix, IsSystemHeader)); + } + /// AddGnuCPlusPlusIncludePaths - Add the necessary paths to support a gnu /// libstdc++. void AddGnuCPlusPlusIncludePaths(StringRef Base, @@ -210,6 +217,8 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple, switch (os) { case llvm::Triple::FreeBSD: case llvm::Triple::NetBSD: + case llvm::Triple::OpenBSD: + case llvm::Triple::Bitrig: break; default: // FIXME: temporary hack: hard-coded paths. @@ -623,6 +632,8 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) { bool DontSearchCurDir = false; // TODO: set to true if -I- is set? Headers.SetSearchPaths(SearchList, NumQuoted, NumAngled, DontSearchCurDir); + Headers.SetSystemHeaderPrefixes(SystemHeaderPrefixes); + // If verbose, print the list of directories that will be searched. if (Verbose) { llvm::errs() << "#include \"...\" search starts here:\n"; @@ -660,6 +671,10 @@ void clang::ApplyHeaderSearchOptions(HeaderSearch &HS, Init.AddDefaultIncludePaths(Lang, Triple, HSOpts); + for (unsigned i = 0, e = HSOpts.SystemHeaderPrefixes.size(); i != e; ++i) + Init.AddSystemHeaderPrefix(HSOpts.SystemHeaderPrefixes[i].Prefix, + HSOpts.SystemHeaderPrefixes[i].IsSystemHeader); + if (HSOpts.UseBuiltinIncludes) { // Set up the builtin include directory in the module map. llvm::sys::Path P(HSOpts.ResourceDir); diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp index 93d49b0..1440da6 100644 --- a/lib/Frontend/InitPreprocessor.cpp +++ b/lib/Frontend/InitPreprocessor.cpp @@ -49,7 +49,7 @@ static void DefineBuiltinMacro(MacroBuilder &Builder, StringRef Macro, } } -/// AddImplicitInclude - Add an implicit #include of the specified file to the +/// AddImplicitInclude - Add an implicit \#include of the specified file to the /// predefines buffer. static void AddImplicitInclude(MacroBuilder &Builder, StringRef File, FileManager &FileMgr) { @@ -66,8 +66,8 @@ static void AddImplicitIncludeMacros(MacroBuilder &Builder, Builder.append("##"); // ##? } -/// AddImplicitIncludePTH - Add an implicit #include using the original file -/// used to generate a PTH cache. +/// AddImplicitIncludePTH - Add an implicit \#include using the original file +/// used to generate a PTH cache. static void AddImplicitIncludePTH(MacroBuilder &Builder, Preprocessor &PP, StringRef ImplicitIncludePTH) { PTHManager *P = PP.getPTHManager(); @@ -288,20 +288,16 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, else if (!LangOpts.GNUMode && LangOpts.Digraphs) Builder.defineMacro("__STDC_VERSION__", "199409L"); } else { - if (LangOpts.GNUMode) - Builder.defineMacro("__cplusplus"); - else { - // C++0x [cpp.predefined]p1: - // The name_ _cplusplus is defined to the value 201103L when compiling a - // C++ translation unit. - if (LangOpts.CPlusPlus0x) - Builder.defineMacro("__cplusplus", "201103L"); - // C++03 [cpp.predefined]p1: - // The name_ _cplusplus is defined to the value 199711L when compiling a - // C++ translation unit. - else - Builder.defineMacro("__cplusplus", "199711L"); - } + // C++11 [cpp.predefined]p1: + // The name __cplusplus is defined to the value 201103L when compiling a + // C++ translation unit. + if (LangOpts.CPlusPlus0x) + Builder.defineMacro("__cplusplus", "201103L"); + // C++03 [cpp.predefined]p1: + // The name __cplusplus is defined to the value 199711L when compiling a + // C++ translation unit. + else + Builder.defineMacro("__cplusplus", "199711L"); } if (LangOpts.ObjC1) @@ -369,7 +365,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__GXX_EXPERIMENTAL_CXX0X__"); if (LangOpts.ObjC1) { - if (LangOpts.ObjCNonFragileABI) { + if (LangOpts.ObjCRuntime.isNonFragile()) { Builder.defineMacro("__OBJC2__"); if (LangOpts.ObjCExceptions) @@ -379,8 +375,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.getGC() != LangOptions::NonGC) Builder.defineMacro("__OBJC_GC__"); - if (LangOpts.NeXTRuntime) + if (LangOpts.ObjCRuntime.isNeXTFamily()) Builder.defineMacro("__NEXT_RUNTIME__"); + + Builder.defineMacro("IBOutlet", "__attribute__((iboutlet))"); + Builder.defineMacro("IBOutletCollection(ClassName)", + "__attribute__((iboutletcollection(ClassName)))"); + Builder.defineMacro("IBAction", "void)__attribute__((ibaction)"); } // darwin_constant_cfstrings controls this. This is also dependent @@ -444,6 +445,26 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // Initialize target-specific preprocessor defines. + // __BYTE_ORDER__ was added in GCC 4.6. It's analogous + // to the macro __BYTE_ORDER (no trailing underscores) + // from glibc's header. + // We don't support the PDP-11 as a target, but include + // the define so it can still be compared against. + Builder.defineMacro("__ORDER_LITTLE_ENDIAN__", "1234"); + Builder.defineMacro("__ORDER_BIG_ENDIAN__", "4321"); + Builder.defineMacro("__ORDER_PDP_ENDIAN__", "3412"); + if (TI.isBigEndian()) + Builder.defineMacro("__BYTE_ORDER__", "__ORDER_BIG_ENDIAN__"); + else + Builder.defineMacro("__BYTE_ORDER__", "__ORDER_LITTLE_ENDIAN__"); + + + if (TI.getPointerWidth(0) == 64 && TI.getLongWidth() == 64 + && TI.getIntWidth() == 32) { + Builder.defineMacro("_LP64"); + Builder.defineMacro("__LP64__"); + } + // Define type sizing macros based on the target properties. assert(TI.getCharWidth() == 8 && "Only support 8-bit char so far"); Builder.defineMacro("__CHAR_BIT__", "8"); @@ -501,6 +522,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (!LangOpts.CharIsSigned) Builder.defineMacro("__CHAR_UNSIGNED__"); + if (!TargetInfo::isTypeSigned(TI.getWCharType())) + Builder.defineMacro("__WCHAR_UNSIGNED__"); + if (!TargetInfo::isTypeSigned(TI.getWIntType())) Builder.defineMacro("__WINT_UNSIGNED__"); @@ -520,15 +544,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (TI.getLongLongWidth() > TI.getLongWidth()) DefineExactWidthIntType(TargetInfo::SignedLongLong, TI, Builder); - // Add __builtin_va_list typedef. - Builder.append(TI.getVAListDeclaration()); - if (const char *Prefix = TI.getUserLabelPrefix()) Builder.defineMacro("__USER_LABEL_PREFIX__", Prefix); - // Build configuration options. FIXME: these should be controlled by - // command line options or something. - Builder.defineMacro("__FINITE_MATH_ONLY__", "0"); + if (LangOpts.FastMath || LangOpts.FiniteMathOnly) + Builder.defineMacro("__FINITE_MATH_ONLY__", "1"); + else + Builder.defineMacro("__FINITE_MATH_ONLY__", "0"); if (LangOpts.GNUInline) Builder.defineMacro("__GNUC_GNU_INLINE__"); diff --git a/lib/Frontend/LayoutOverrideSource.cpp b/lib/Frontend/LayoutOverrideSource.cpp index eb7865e..e023250 100644 --- a/lib/Frontend/LayoutOverrideSource.cpp +++ b/lib/Frontend/LayoutOverrideSource.cpp @@ -9,6 +9,7 @@ #include "clang/Frontend/LayoutOverrideSource.h" #include "clang/AST/Decl.h" #include "llvm/Support/raw_ostream.h" +#include #include #include diff --git a/lib/Frontend/PrintPreprocessedOutput.cpp b/lib/Frontend/PrintPreprocessedOutput.cpp index 9e1587c..5311ed5 100644 --- a/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/lib/Frontend/PrintPreprocessedOutput.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" +#include #include using namespace clang; @@ -87,7 +88,7 @@ private: unsigned CurLine; bool EmittedTokensOnThisLine; - bool EmittedMacroOnThisLine; + bool EmittedDirectiveOnThisLine; SrcMgr::CharacteristicKind FileType; SmallString<512> CurFilename; bool Initialized; @@ -103,7 +104,7 @@ public: CurLine = 0; CurFilename += ""; EmittedTokensOnThisLine = false; - EmittedMacroOnThisLine = false; + EmittedDirectiveOnThisLine = false; FileType = SrcMgr::C_User; Initialized = false; @@ -111,10 +112,15 @@ public: UseLineDirective = PP.getLangOpts().MicrosoftExt; } - void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } + void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; } - bool StartNewLineIfNeeded(); + void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; } + bool hasEmittedDirectiveOnThisLine() const { + return EmittedDirectiveOnThisLine; + } + + bool startNewLineIfNeeded(bool ShouldUpdateCurrentLine = true); virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, @@ -158,11 +164,7 @@ public: void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo, const char *Extra, unsigned ExtraLen) { - if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) { - OS << '\n'; - EmittedTokensOnThisLine = false; - EmittedMacroOnThisLine = false; - } + startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); // Emit #line directives or GNU line markers depending on what mode we're in. if (UseLineDirective) { @@ -207,23 +209,21 @@ bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) { } else { // Okay, we're in -P mode, which turns off line markers. However, we still // need to emit a newline between tokens on different lines. - if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) { - OS << '\n'; - EmittedTokensOnThisLine = false; - EmittedMacroOnThisLine = false; - } + startNewLineIfNeeded(/*ShouldUpdateCurrentLine=*/false); } CurLine = LineNo; return true; } -bool PrintPPOutputPPCallbacks::StartNewLineIfNeeded() { - if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) { +bool +PrintPPOutputPPCallbacks::startNewLineIfNeeded(bool ShouldUpdateCurrentLine) { + if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) { OS << '\n'; EmittedTokensOnThisLine = false; - EmittedMacroOnThisLine = false; - ++CurLine; + EmittedDirectiveOnThisLine = false; + if (ShouldUpdateCurrentLine) + ++CurLine; return true; } @@ -307,7 +307,7 @@ void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok, MoveToLine(MI->getDefinitionLoc()); PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS); - EmittedMacroOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, @@ -317,12 +317,13 @@ void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok, MoveToLine(MacroNameTok.getLocation()); OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName(); - EmittedMacroOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind, const std::string &Str) { + startNewLineIfNeeded(); MoveToLine(Loc); OS << "#pragma comment(" << Kind->getName(); @@ -343,11 +344,12 @@ void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc, } OS << ')'; - EmittedTokensOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, StringRef Str) { + startNewLineIfNeeded(); MoveToLine(Loc); OS << "#pragma message("; @@ -366,26 +368,29 @@ void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc, OS << '"'; OS << ')'; - EmittedTokensOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) { + startNewLineIfNeeded(); MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic push"; - EmittedTokensOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) { + startNewLineIfNeeded(); MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic pop"; - EmittedTokensOnThisLine = true; + setEmittedDirectiveOnThisLine(); } void PrintPPOutputPPCallbacks:: PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, diag::Mapping Map, StringRef Str) { + startNewLineIfNeeded(); MoveToLine(Loc); OS << "#pragma " << Namespace << " diagnostic "; switch (Map) { @@ -403,7 +408,7 @@ PragmaDiagnostic(SourceLocation Loc, StringRef Namespace, break; } OS << " \"" << Str << '"'; - EmittedTokensOnThisLine = true; + setEmittedDirectiveOnThisLine(); } /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this @@ -471,10 +476,9 @@ struct UnknownPragmaHandler : public PragmaHandler { Token &PragmaTok) { // Figure out what line we went to and insert the appropriate number of // newline characters. - Callbacks->StartNewLineIfNeeded(); + Callbacks->startNewLineIfNeeded(); Callbacks->MoveToLine(PragmaTok.getLocation()); Callbacks->OS.write(Prefix, strlen(Prefix)); - Callbacks->SetEmittedTokensOnThisLine(); // Read and print all of the pragma tokens. while (PragmaTok.isNot(tok::eod)) { if (PragmaTok.hasLeadingSpace()) @@ -483,7 +487,7 @@ struct UnknownPragmaHandler : public PragmaHandler { Callbacks->OS.write(&TokSpell[0], TokSpell.size()); PP.LexUnexpandedToken(PragmaTok); } - Callbacks->StartNewLineIfNeeded(); + Callbacks->setEmittedDirectiveOnThisLine(); } }; } // end anonymous namespace @@ -497,6 +501,10 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, PrevPrevTok.startToken(); PrevTok.startToken(); while (1) { + if (Callbacks->hasEmittedDirectiveOnThisLine()) { + Callbacks->startNewLineIfNeeded(); + Callbacks->MoveToLine(Tok.getLocation()); + } // If this token is at the start of a line, emit newlines if needed. if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) { @@ -533,7 +541,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok, if (Tok.getKind() == tok::comment) Callbacks->HandleNewlinesInToken(&S[0], S.size()); } - Callbacks->SetEmittedTokensOnThisLine(); + Callbacks->setEmittedTokensOnThisLine(); if (Tok.is(tok::eof)) break; diff --git a/lib/Frontend/SerializedDiagnosticPrinter.cpp b/lib/Frontend/SerializedDiagnosticPrinter.cpp index 7bf8742..a20f30d 100644 --- a/lib/Frontend/SerializedDiagnosticPrinter.cpp +++ b/lib/Frontend/SerializedDiagnosticPrinter.cpp @@ -53,10 +53,9 @@ class SDiagsRenderer : public DiagnosticNoteRenderer { RecordData &Record; public: SDiagsRenderer(SDiagsWriter &Writer, RecordData &Record, - const SourceManager &SM, const LangOptions &LangOpts, const DiagnosticOptions &DiagOpts) - : DiagnosticNoteRenderer(SM, LangOpts, DiagOpts), + : DiagnosticNoteRenderer(LangOpts, DiagOpts), Writer(Writer), Record(Record){} virtual ~SDiagsRenderer() {} @@ -67,18 +66,21 @@ protected: DiagnosticsEngine::Level Level, StringRef Message, ArrayRef Ranges, + const SourceManager *SM, DiagOrStoredDiag D); virtual void emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, - ArrayRef Ranges) {} + ArrayRef Ranges, + const SourceManager &SM) {} - void emitNote(SourceLocation Loc, StringRef Message); + void emitNote(SourceLocation Loc, StringRef Message, const SourceManager *SM); virtual void emitCodeContext(SourceLocation Loc, DiagnosticsEngine::Level Level, SmallVectorImpl& Ranges, - ArrayRef Hints); + ArrayRef Hints, + const SourceManager &SM); virtual void beginDiagnostic(DiagOrStoredDiag D, DiagnosticsEngine::Level Level); @@ -137,15 +139,16 @@ private: unsigned getEmitFile(const char *Filename); /// \brief Add SourceLocation information the specified record. - void AddLocToRecord(SourceLocation Loc, const SourceManager &SM, + void AddLocToRecord(SourceLocation Loc, const SourceManager *SM, PresumedLoc PLoc, RecordDataImpl &Record, unsigned TokSize = 0); /// \brief Add SourceLocation information the specified record. void AddLocToRecord(SourceLocation Loc, RecordDataImpl &Record, - const SourceManager &SM, + const SourceManager *SM, unsigned TokSize = 0) { - AddLocToRecord(Loc, SM, SM.getPresumedLoc(Loc), Record, TokSize); + AddLocToRecord(Loc, SM, SM ? SM->getPresumedLoc(Loc) : PresumedLoc(), + Record, TokSize); } /// \brief Add CharSourceRange information the specified record. @@ -241,7 +244,7 @@ static void EmitRecordID(unsigned ID, const char *Name, } void SDiagsWriter::AddLocToRecord(SourceLocation Loc, - const SourceManager &SM, + const SourceManager *SM, PresumedLoc PLoc, RecordDataImpl &Record, unsigned TokSize) { @@ -257,19 +260,19 @@ void SDiagsWriter::AddLocToRecord(SourceLocation Loc, Record.push_back(getEmitFile(PLoc.getFilename())); Record.push_back(PLoc.getLine()); Record.push_back(PLoc.getColumn()+TokSize); - Record.push_back(SM.getFileOffset(Loc)); + Record.push_back(SM->getFileOffset(Loc)); } void SDiagsWriter::AddCharSourceRangeToRecord(CharSourceRange Range, RecordDataImpl &Record, const SourceManager &SM) { - AddLocToRecord(Range.getBegin(), Record, SM); + AddLocToRecord(Range.getBegin(), Record, &SM); unsigned TokSize = 0; if (Range.isTokenRange()) TokSize = Lexer::MeasureTokenLength(Range.getEnd(), SM, *LangOpts); - AddLocToRecord(Range.getEnd(), Record, SM, TokSize); + AddLocToRecord(Range.getEnd(), Record, &SM, TokSize); } unsigned SDiagsWriter::getEmitFile(const char *FileName){ @@ -484,13 +487,15 @@ void SDiagsWriter::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, diagBuf.clear(); Info.FormatDiagnostic(diagBuf); - SourceManager &SM = Info.getSourceManager(); - SDiagsRenderer Renderer(*this, Record, SM, *LangOpts, DiagOpts); + const SourceManager * + SM = Info.hasSourceManager() ? &Info.getSourceManager() : 0; + SDiagsRenderer Renderer(*this, Record, *LangOpts, DiagOpts); Renderer.emitDiagnostic(Info.getLocation(), DiagLevel, diagBuf.str(), Info.getRanges(), llvm::makeArrayRef(Info.getFixItHints(), Info.getNumFixItHints()), + SM, &Info); } @@ -500,6 +505,7 @@ SDiagsRenderer::emitDiagnosticMessage(SourceLocation Loc, DiagnosticsEngine::Level Level, StringRef Message, ArrayRef Ranges, + const SourceManager *SM, DiagOrStoredDiag D) { // Emit the RECORD_DIAG record. Writer.Record.clear(); @@ -539,7 +545,8 @@ void SDiagsRenderer::endDiagnostic(DiagOrStoredDiag D, void SDiagsRenderer::emitCodeContext(SourceLocation Loc, DiagnosticsEngine::Level Level, SmallVectorImpl &Ranges, - ArrayRef Hints) { + ArrayRef Hints, + const SourceManager &SM) { // Emit Source Ranges. for (ArrayRef::iterator it=Ranges.begin(), ei=Ranges.end(); it != ei; ++it) { @@ -562,7 +569,8 @@ void SDiagsRenderer::emitCodeContext(SourceLocation Loc, } } -void SDiagsRenderer::emitNote(SourceLocation Loc, StringRef Message) { +void SDiagsRenderer::emitNote(SourceLocation Loc, StringRef Message, + const SourceManager *SM) { Writer.Stream.EnterSubblock(BLOCK_DIAG, 4); RecordData Record; Record.push_back(RECORD_DIAG); diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp index 65fb1ae..9bb3e1d 100644 --- a/lib/Frontend/TextDiagnostic.cpp +++ b/lib/Frontend/TextDiagnostic.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include +#include using namespace clang; @@ -31,16 +32,36 @@ static const enum raw_ostream::Colors caretColor = raw_ostream::GREEN; static const enum raw_ostream::Colors warningColor = raw_ostream::MAGENTA; +static const enum raw_ostream::Colors templateColor = + raw_ostream::CYAN; static const enum raw_ostream::Colors errorColor = raw_ostream::RED; static const enum raw_ostream::Colors fatalColor = raw_ostream::RED; // Used for changing only the bold attribute. static const enum raw_ostream::Colors savedColor = raw_ostream::SAVEDCOLOR; +/// \brief Add highlights to differences in template strings. +static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str, + bool &Normal, bool Bold) { + for (unsigned i = 0, e = Str.size(); i < e; ++i) + if (Str[i] != ToggleHighlight) { + OS << Str[i]; + } else { + if (Normal) + OS.changeColor(templateColor, true); + else { + OS.resetColor(); + if (Bold) + OS.changeColor(savedColor, true); + } + Normal = !Normal; + } +} + /// \brief Number of spaces to indent when word-wrapping. const unsigned WordWrapIndentation = 6; -int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) { +static int bytesSincePreviousTabOrLineBegin(StringRef SourceLine, size_t i) { int bytes = 0; while (0,bool> +static std::pair, bool> printableTextForNextCharacter(StringRef SourceLine, size_t *i, unsigned TabStop) { assert(i && "i must not be null"); @@ -146,7 +167,7 @@ printableTextForNextCharacter(StringRef SourceLine, size_t *i, return std::make_pair(expandedByte, false); } -void expandTabs(std::string &SourceLine, unsigned TabStop) { +static void expandTabs(std::string &SourceLine, unsigned TabStop) { size_t i = SourceLine.size(); while (i>0) { i--; @@ -164,7 +185,7 @@ void expandTabs(std::string &SourceLine, unsigned TabStop) { /// characters will appear at (numbering the first column as 0). /// /// If a byte 'i' corresponds to muliple columns (e.g. the byte contains a tab -/// character) then the the array will map that byte to the first column the +/// character) then the array will map that byte to the first column the /// tab appears at and the next value in the map will have been incremented /// more than once. /// @@ -179,9 +200,9 @@ void expandTabs(std::string &SourceLine, unsigned TabStop) { /// /// "a \t \u3042" -> {0,1,2,8,9,-1,-1,11} /// -/// (\u3042 is represented in UTF-8 by three bytes and takes two columns to +/// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to /// display) -void byteToColumn(StringRef SourceLine, unsigned TabStop, +static void byteToColumn(StringRef SourceLine, unsigned TabStop, SmallVectorImpl &out) { out.clear(); @@ -213,9 +234,9 @@ void byteToColumn(StringRef SourceLine, unsigned TabStop, /// /// "a \t \u3042" -> {0,1,2,-1,-1,-1,-1,-1,3,4,-1,7} /// -/// (\u3042 is represented in UTF-8 by three bytes and takes two columns to +/// (\\u3042 is represented in UTF-8 by three bytes and takes two columns to /// display) -void columnToByte(StringRef SourceLine, unsigned TabStop, +static void columnToByte(StringRef SourceLine, unsigned TabStop, SmallVectorImpl &out) { out.clear(); @@ -307,11 +328,11 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // correctly. unsigned CaretStart = 0, CaretEnd = CaretLine.size(); for (; CaretStart != CaretEnd; ++CaretStart) - if (!isspace(CaretLine[CaretStart])) + if (!isspace(static_cast(CaretLine[CaretStart]))) break; for (; CaretEnd != CaretStart; --CaretEnd) - if (!isspace(CaretLine[CaretEnd - 1])) + if (!isspace(static_cast(CaretLine[CaretEnd - 1]))) break; // caret has already been inserted into CaretLine so the above whitespace @@ -322,17 +343,33 @@ static void selectInterestingSourceRegion(std::string &SourceLine, if (!FixItInsertionLine.empty()) { unsigned FixItStart = 0, FixItEnd = FixItInsertionLine.size(); for (; FixItStart != FixItEnd; ++FixItStart) - if (!isspace(FixItInsertionLine[FixItStart])) + if (!isspace(static_cast(FixItInsertionLine[FixItStart]))) break; for (; FixItEnd != FixItStart; --FixItEnd) - if (!isspace(FixItInsertionLine[FixItEnd - 1])) + if (!isspace(static_cast(FixItInsertionLine[FixItEnd - 1]))) break; CaretStart = std::min(FixItStart, CaretStart); CaretEnd = std::max(FixItEnd, CaretEnd); } + // CaretEnd may have been set at the middle of a character + // If it's not at a character's first column then advance it past the current + // character. + while (static_cast(CaretEnd) < map.columns() && + -1 == map.columnToByte(CaretEnd)) + ++CaretEnd; + + assert((static_cast(CaretStart) > map.columns() || + -1!=map.columnToByte(CaretStart)) && + "CaretStart must not point to a column in the middle of a source" + " line character"); + assert((static_cast(CaretEnd) > map.columns() || + -1!=map.columnToByte(CaretEnd)) && + "CaretEnd must not point to a column in the middle of a source line" + " character"); + // CaretLine[CaretStart, CaretEnd) contains all of the interesting // parts of the caret line. While this slice is smaller than the // number of columns we have, try to grow the slice to encompass @@ -366,12 +403,14 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // Skip over any whitespace we see here; we're looking for // another bit of interesting text. while (NewStart && - (map.byteToColumn(NewStart)==-1 || isspace(SourceLine[NewStart]))) + (map.byteToColumn(NewStart)==-1 || + isspace(static_cast(SourceLine[NewStart])))) --NewStart; // Skip over this bit of "interesting" text. while (NewStart && - (map.byteToColumn(NewStart)!=-1 && !isspace(SourceLine[NewStart]))) + (map.byteToColumn(NewStart)!=-1 && + !isspace(static_cast(SourceLine[NewStart])))) --NewStart; // Move up to the non-whitespace character we just saw. @@ -392,12 +431,14 @@ static void selectInterestingSourceRegion(std::string &SourceLine, // Skip over any whitespace we see here; we're looking for // another bit of interesting text. while (NewEnd(SourceLine[NewEnd])))) ++NewEnd; // Skip over this bit of "interesting" text. while (NewEnd(SourceLine[NewEnd])))) ++NewEnd; unsigned NewColumns = map.byteToColumn(NewEnd) - @@ -549,6 +590,7 @@ static unsigned findEndOfWord(unsigned Start, StringRef Str, /// \param Column the column number at which the first character of \p /// Str will be printed. This will be non-zero when part of the first /// line has already been printed. +/// \param Bold if the current text should be bold /// \param Indentation the number of spaces to indent any lines beyond /// the first line. /// \returns true if word-wrapping was required, or false if the @@ -556,8 +598,10 @@ static unsigned findEndOfWord(unsigned Start, StringRef Str, static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns, unsigned Column = 0, + bool Bold = false, unsigned Indentation = WordWrapIndentation) { const unsigned Length = std::min(Str.find('\n'), Str.size()); + bool TextNormal = true; // The string used to indent each line. SmallString<16> IndentStr; @@ -581,7 +625,8 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, OS << ' '; Column += 1; } - OS << Str.substr(WordStart, WordLength); + applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), + TextNormal, Bold); Column += WordLength; continue; } @@ -590,22 +635,24 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, // line. OS << '\n'; OS.write(&IndentStr[0], Indentation); - OS << Str.substr(WordStart, WordLength); + applyTemplateHighlighting(OS, Str.substr(WordStart, WordLength), + TextNormal, Bold); Column = Indentation + WordLength; Wrapped = true; } // Append any remaning text from the message with its existing formatting. - OS << Str.substr(Length); + applyTemplateHighlighting(OS, Str.substr(Length), TextNormal, Bold); + + assert(TextNormal && "Text highlighted at end of diagnostic message."); return Wrapped; } TextDiagnostic::TextDiagnostic(raw_ostream &OS, - const SourceManager &SM, const LangOptions &LangOpts, const DiagnosticOptions &DiagOpts) - : DiagnosticRenderer(SM, LangOpts, DiagOpts), OS(OS) {} + : DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {} TextDiagnostic::~TextDiagnostic() {} @@ -615,11 +662,13 @@ TextDiagnostic::emitDiagnosticMessage(SourceLocation Loc, DiagnosticsEngine::Level Level, StringRef Message, ArrayRef Ranges, + const SourceManager *SM, DiagOrStoredDiag D) { uint64_t StartOfLocationInfo = OS.tell(); // Emit the location of this particular diagnostic. - emitDiagnosticLoc(Loc, PLoc, Level, Ranges); + if (Loc.isValid()) + emitDiagnosticLoc(Loc, PLoc, Level, Ranges, *SM); if (DiagOpts.ShowColors) OS.resetColor(); @@ -665,20 +714,27 @@ TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, StringRef Message, unsigned CurrentColumn, unsigned Columns, bool ShowColors) { + bool Bold = false; if (ShowColors) { // Print warnings, errors and fatal errors in bold, no color switch (Level) { - case DiagnosticsEngine::Warning: OS.changeColor(savedColor, true); break; - case DiagnosticsEngine::Error: OS.changeColor(savedColor, true); break; - case DiagnosticsEngine::Fatal: OS.changeColor(savedColor, true); break; + case DiagnosticsEngine::Warning: + case DiagnosticsEngine::Error: + case DiagnosticsEngine::Fatal: + OS.changeColor(savedColor, true); + Bold = true; + break; default: break; //don't bold notes } } if (Columns) - printWordWrapped(OS, Message, Columns, CurrentColumn); - else - OS << Message; + printWordWrapped(OS, Message, Columns, CurrentColumn, Bold); + else { + bool Normal = true; + applyTemplateHighlighting(OS, Message, Normal, Bold); + assert(Normal && "Formatting should have returned to normal"); + } if (ShowColors) OS.resetColor(); @@ -693,7 +749,8 @@ TextDiagnostic::printDiagnosticMessage(raw_ostream &OS, /// ranges necessary. void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc, DiagnosticsEngine::Level Level, - ArrayRef Ranges) { + ArrayRef Ranges, + const SourceManager &SM) { if (PLoc.isInvalid()) { // At least print the file name if available: FileID FID = SM.getFileID(Loc); @@ -799,7 +856,8 @@ void TextDiagnostic::emitBasicNote(StringRef Message) { } void TextDiagnostic::emitIncludeLocation(SourceLocation Loc, - PresumedLoc PLoc) { + PresumedLoc PLoc, + const SourceManager &SM) { if (DiagOpts.ShowLocation) OS << "In file included from " << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n"; @@ -817,7 +875,8 @@ void TextDiagnostic::emitIncludeLocation(SourceLocation Loc, void TextDiagnostic::emitSnippetAndCaret( SourceLocation Loc, DiagnosticsEngine::Level Level, SmallVectorImpl& Ranges, - ArrayRef Hints) { + ArrayRef Hints, + const SourceManager &SM) { assert(!Loc.isInvalid() && "must have a valid source location here"); assert(Loc.isFileID() && "must have a file location here"); @@ -840,17 +899,12 @@ void TextDiagnostic::emitSnippetAndCaret( // Get information about the buffer it points into. bool Invalid = false; - StringRef BufData = SM.getBufferData(FID, &Invalid); + const char *BufStart = SM.getBufferData(FID, &Invalid).data(); if (Invalid) return; - const char *BufStart = BufData.data(); - const char *BufEnd = BufStart + BufData.size(); - unsigned LineNo = SM.getLineNumber(FID, FileOffset); unsigned ColNo = SM.getColumnNumber(FID, FileOffset); - unsigned CaretEndColNo - = ColNo + Lexer::MeasureTokenLength(Loc, SM, LangOpts); // Rewind from the current position to the start of the line. const char *TokPtr = BufStart+FileOffset; @@ -860,14 +914,9 @@ void TextDiagnostic::emitSnippetAndCaret( // Compute the line end. Scan forward from the error position to the end of // the line. const char *LineEnd = TokPtr; - while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd!=BufEnd) + while (*LineEnd != '\n' && *LineEnd != '\r' && *LineEnd != '\0') ++LineEnd; - // FIXME: This shouldn't be necessary, but the CaretEndColNo can extend past - // the source line length as currently being computed. See - // test/Misc/message-length.c. - CaretEndColNo = std::min(CaretEndColNo, unsigned(LineEnd - LineStart)); - // Copy the line of code into an std::string for ease of manipulation. std::string SourceLine(LineStart, LineEnd); @@ -881,7 +930,7 @@ void TextDiagnostic::emitSnippetAndCaret( for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) - highlightRange(*I, LineNo, FID, sourceColMap, CaretLine); + highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM); // Next, insert the caret itself. ColNo = sourceColMap.byteToColumn(ColNo-1); @@ -891,7 +940,7 @@ void TextDiagnostic::emitSnippetAndCaret( std::string FixItInsertionLine = buildFixItInsertionLine(LineNo, sourceColMap, - Hints); + Hints, SM); // If the source line is too long for our terminal, select only the // "interesting" source region within that line. @@ -934,11 +983,10 @@ void TextDiagnostic::emitSnippetAndCaret( } // Print out any parseable fixit information requested by the options. - emitParseableFixits(Hints); + emitParseableFixits(Hints, SM); } -void TextDiagnostic::emitSnippet(StringRef line) -{ +void TextDiagnostic::emitSnippet(StringRef line) { if (line.empty()) return; @@ -952,8 +1000,7 @@ void TextDiagnostic::emitSnippet(StringRef line) = printableTextForNextCharacter(line, &i, DiagOpts.TabStop); bool was_printable = res.second; - if (DiagOpts.ShowColors - && was_printable==print_reversed) { + if (DiagOpts.ShowColors && was_printable == print_reversed) { if (print_reversed) OS.reverseColor(); OS << to_print; @@ -979,7 +1026,8 @@ void TextDiagnostic::emitSnippet(StringRef line) void TextDiagnostic::highlightRange(const CharSourceRange &R, unsigned LineNo, FileID FID, const SourceColumnMap &map, - std::string &CaretLine) { + std::string &CaretLine, + const SourceManager &SM) { if (!R.isValid()) return; SourceLocation Begin = SM.getExpansionLoc(R.getBegin()); @@ -1064,49 +1112,63 @@ void TextDiagnostic::highlightRange(const CharSourceRange &R, std::string TextDiagnostic::buildFixItInsertionLine( unsigned LineNo, const SourceColumnMap &map, - ArrayRef Hints) { + ArrayRef Hints, + const SourceManager &SM) { std::string FixItInsertionLine; if (Hints.empty() || !DiagOpts.ShowFixits) return FixItInsertionLine; + unsigned PrevHintEndCol = 0; for (ArrayRef::iterator I = Hints.begin(), E = Hints.end(); I != E; ++I) { if (!I->CodeToInsert.empty()) { // We have an insertion hint. Determine whether the inserted - // code is on the same line as the caret. + // code contains no newlines and is on the same line as the caret. std::pair HintLocInfo = SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin()); - if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second)) { + if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) && + StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) { // Insert the new code into the line just below the code // that the user wrote. - unsigned HintColNo + // Note: When modifying this function, be very careful about what is a + // "column" (printed width, platform-dependent) and what is a + // "byte offset" (SourceManager "column"). + unsigned HintByteOffset = SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1; - // hint must start inside the source or right at the end - assert(HintColNo(map.bytes())+1); - HintColNo = map.byteToColumn(HintColNo); - - // FIXME: if the fixit includes tabs or other characters that do not - // take up a single column per byte when displayed then - // I->CodeToInsert.size() is not a column number and we're mixing - // units (columns + bytes). We should get printable versions - // of each fixit before using them. - unsigned LastColumnModified - = HintColNo + I->CodeToInsert.size(); - - if (LastColumnModified > static_cast(map.bytes())) { - unsigned LastExistingColumn = map.byteToColumn(map.bytes()); - unsigned AddedColumns = LastColumnModified-LastExistingColumn; - LastColumnModified = LastExistingColumn + AddedColumns; - } else { - LastColumnModified = map.byteToColumn(LastColumnModified); - } + // The hint must start inside the source or right at the end + assert(HintByteOffset < static_cast(map.bytes())+1); + unsigned HintCol = map.byteToColumn(HintByteOffset); + + // If we inserted a long previous hint, push this one forwards, and add + // an extra space to show that this is not part of the previous + // completion. This is sort of the best we can do when two hints appear + // to overlap. + // + // Note that if this hint is located immediately after the previous + // hint, no space will be added, since the location is more important. + if (HintCol < PrevHintEndCol) + HintCol = PrevHintEndCol + 1; + + // FIXME: This function handles multibyte characters in the source, but + // not in the fixits. This assertion is intended to catch unintended + // use of multibyte characters in fixits. If we decide to do this, we'll + // have to track separate byte widths for the source and fixit lines. + assert((size_t)llvm::sys::locale::columnWidth(I->CodeToInsert) == + I->CodeToInsert.size()); + + // This relies on one byte per column in our fixit hints. + // This should NOT use HintByteOffset, because the source might have + // Unicode characters in earlier columns. + unsigned LastColumnModified = HintCol + I->CodeToInsert.size(); if (LastColumnModified > FixItInsertionLine.size()) FixItInsertionLine.resize(LastColumnModified, ' '); - assert(HintColNo+I->CodeToInsert.size() <= FixItInsertionLine.size()); + std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(), - FixItInsertionLine.begin() + HintColNo); + FixItInsertionLine.begin() + HintCol); + + PrevHintEndCol = LastColumnModified; } else { FixItInsertionLine.clear(); break; @@ -1119,7 +1181,8 @@ std::string TextDiagnostic::buildFixItInsertionLine( return FixItInsertionLine; } -void TextDiagnostic::emitParseableFixits(ArrayRef Hints) { +void TextDiagnostic::emitParseableFixits(ArrayRef Hints, + const SourceManager &SM) { if (!DiagOpts.ShowParseableFixits) return; diff --git a/lib/Frontend/TextDiagnosticPrinter.cpp b/lib/Frontend/TextDiagnosticPrinter.cpp index 6445a0c..382e156 100644 --- a/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/lib/Frontend/TextDiagnosticPrinter.cpp @@ -27,7 +27,7 @@ using namespace clang; TextDiagnosticPrinter::TextDiagnosticPrinter(raw_ostream &os, const DiagnosticOptions &diags, bool _OwnsOutputStream) - : OS(os), LangOpts(0), DiagOpts(&diags), SM(0), + : OS(os), DiagOpts(&diags), OwnsOutputStream(_OwnsOutputStream) { } @@ -38,11 +38,11 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() { void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO, const Preprocessor *PP) { - LangOpts = &LO; + // Build the TextDiagnostic utility. + TextDiag.reset(new TextDiagnostic(OS, LO, *DiagOpts)); } void TextDiagnosticPrinter::EndSourceFile() { - LangOpts = 0; TextDiag.reset(0); } @@ -79,16 +79,6 @@ static void printDiagnosticOptions(raw_ostream &OS, Started = true; } - // If the diagnostic is an extension diagnostic and not enabled by default - // then it must have been turned on with -pedantic. - bool EnabledByDefault; - if (DiagnosticIDs::isBuiltinExtensionDiag(Info.getID(), - EnabledByDefault) && - !EnabledByDefault) { - OS << (Started ? "," : " [") << "-pedantic"; - Started = true; - } - StringRef Opt = DiagnosticIDs::getWarningOptionForDiag(Info.getID()); if (!Opt.empty()) { OS << (Started ? "," : " [") << "-W" << Opt; @@ -128,7 +118,7 @@ void TextDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level, llvm::raw_svector_ostream DiagMessageStream(OutStr); printDiagnosticOptions(DiagMessageStream, Level, Info, *DiagOpts); - // Keeps track of the the starting position of the location + // Keeps track of the starting position of the location // information (e.g., "foo.c:10:4:") that precedes the error // message. We use this information to determine how long the // file+line+column number prefix is. @@ -152,22 +142,16 @@ void TextDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level, } // Assert that the rest of our infrastructure is setup properly. - assert(LangOpts && "Unexpected diagnostic outside source file processing"); assert(DiagOpts && "Unexpected diagnostic without options set"); assert(Info.hasSourceManager() && "Unexpected diagnostic with no source manager"); - - // Rebuild the TextDiagnostic utility if missing or the source manager has - // changed. - if (!TextDiag || SM != &Info.getSourceManager()) { - SM = &Info.getSourceManager(); - TextDiag.reset(new TextDiagnostic(OS, *SM, *LangOpts, *DiagOpts)); - } + assert(TextDiag && "Unexpected diagnostic outside source file processing"); TextDiag->emitDiagnostic(Info.getLocation(), Level, DiagMessageStream.str(), Info.getRanges(), llvm::makeArrayRef(Info.getFixItHints(), - Info.getNumFixItHints())); + Info.getNumFixItHints()), + &Info.getSourceManager()); OS.flush(); } diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp index 552282d..a9378a1 100644 --- a/lib/Frontend/VerifyDiagnosticConsumer.cpp +++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp @@ -11,58 +11,108 @@ // //===----------------------------------------------------------------------===// +#include "clang/Basic/FileManager.h" #include "clang/Frontend/VerifyDiagnosticConsumer.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/TextDiagnosticBuffer.h" +#include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" -#include +#include using namespace clang; +typedef VerifyDiagnosticConsumer::Directive Directive; +typedef VerifyDiagnosticConsumer::DirectiveList DirectiveList; +typedef VerifyDiagnosticConsumer::ExpectedData ExpectedData; VerifyDiagnosticConsumer::VerifyDiagnosticConsumer(DiagnosticsEngine &_Diags) - : Diags(_Diags), PrimaryClient(Diags.getClient()), - OwnsPrimaryClient(Diags.ownsClient()), - Buffer(new TextDiagnosticBuffer()), CurrentPreprocessor(0) + : Diags(_Diags), + PrimaryClient(Diags.getClient()), OwnsPrimaryClient(Diags.ownsClient()), + Buffer(new TextDiagnosticBuffer()), CurrentPreprocessor(0), + ActiveSourceFiles(0) { Diags.takeClient(); } VerifyDiagnosticConsumer::~VerifyDiagnosticConsumer() { + assert(!ActiveSourceFiles && "Incomplete parsing of source files!"); + assert(!CurrentPreprocessor && "CurrentPreprocessor should be invalid!"); CheckDiagnostics(); Diags.takeClient(); if (OwnsPrimaryClient) delete PrimaryClient; } +#ifndef NDEBUG +namespace { +class VerifyFileTracker : public PPCallbacks { + typedef VerifyDiagnosticConsumer::FilesParsedForDirectivesSet ListType; + ListType &FilesList; + SourceManager &SM; + +public: + VerifyFileTracker(ListType &FilesList, SourceManager &SM) + : FilesList(FilesList), SM(SM) { } + + /// \brief Hook into the preprocessor and update the list of parsed + /// files when the preprocessor indicates a new file is entered. + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) { + if (const FileEntry *E = SM.getFileEntryForID(SM.getFileID(Loc))) + FilesList.insert(E); + } +}; +} // End anonymous namespace. +#endif + // DiagnosticConsumer interface. void VerifyDiagnosticConsumer::BeginSourceFile(const LangOptions &LangOpts, const Preprocessor *PP) { - // FIXME: Const hack, we screw up the preprocessor but in practice its ok - // because it doesn't get reused. It would be better if we could make a copy - // though. - CurrentPreprocessor = const_cast(PP); + // Attach comment handler on first invocation. + if (++ActiveSourceFiles == 1) { + if (PP) { + CurrentPreprocessor = PP; + const_cast(PP)->addCommentHandler(this); +#ifndef NDEBUG + VerifyFileTracker *V = new VerifyFileTracker(FilesParsedForDirectives, + PP->getSourceManager()); + const_cast(PP)->addPPCallbacks(V); +#endif + } + } + assert((!PP || CurrentPreprocessor == PP) && "Preprocessor changed!"); PrimaryClient->BeginSourceFile(LangOpts, PP); } void VerifyDiagnosticConsumer::EndSourceFile() { - CheckDiagnostics(); - + assert(ActiveSourceFiles && "No active source files!"); PrimaryClient->EndSourceFile(); - CurrentPreprocessor = 0; + // Detach comment handler once last active source file completed. + if (--ActiveSourceFiles == 0) { + if (CurrentPreprocessor) + const_cast(CurrentPreprocessor)->removeCommentHandler(this); + + // Check diagnostics once last file completed. + CheckDiagnostics(); + CurrentPreprocessor = 0; + } } void VerifyDiagnosticConsumer::HandleDiagnostic( DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) { - if (FirstErrorFID.isInvalid() && Info.hasSourceManager()) { - const SourceManager &SM = Info.getSourceManager(); - FirstErrorFID = SM.getFileID(Info.getLocation()); +#ifndef NDEBUG + if (Info.hasSourceManager()) { + FileID FID = Info.getSourceManager().getFileID(Info.getLocation()); + if (!FID.isInvalid()) + FilesWithDiagnostics.insert(FID); } +#endif // Send the diagnostic to the buffer, we will check it once we reach the end // of the source file (or are destructed). Buffer->HandleDiagnostic(DiagLevel, Info); @@ -77,54 +127,21 @@ typedef TextDiagnosticBuffer::const_iterator const_diag_iterator; namespace { -/// Directive - Abstract class representing a parsed verify directive. -/// -class Directive { -public: - static Directive* Create(bool RegexKind, const SourceLocation &Location, - const std::string &Text, unsigned Count); -public: - /// Constant representing one or more matches aka regex "+". - static const unsigned OneOrMoreCount = UINT_MAX; - - SourceLocation Location; - const std::string Text; - unsigned Count; - - virtual ~Directive() { } - - // Returns true if directive text is valid. - // Otherwise returns false and populates E. - virtual bool isValid(std::string &Error) = 0; - - // Returns true on match. - virtual bool Match(const std::string &S) = 0; - -protected: - Directive(const SourceLocation &Location, const std::string &Text, - unsigned Count) - : Location(Location), Text(Text), Count(Count) { } - -private: - Directive(const Directive&); // DO NOT IMPLEMENT - void operator=(const Directive&); // DO NOT IMPLEMENT -}; - /// StandardDirective - Directive with string matching. /// class StandardDirective : public Directive { public: - StandardDirective(const SourceLocation &Location, const std::string &Text, - unsigned Count) - : Directive(Location, Text, Count) { } + StandardDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc, + StringRef Text, unsigned Min, unsigned Max) + : Directive(DirectiveLoc, DiagnosticLoc, Text, Min, Max) { } virtual bool isValid(std::string &Error) { // all strings are considered valid; even empty ones return true; } - virtual bool Match(const std::string &S) { - return S.find(Text) != std::string::npos; + virtual bool match(StringRef S) { + return S.find(Text) != StringRef::npos; } }; @@ -132,9 +149,9 @@ public: /// class RegexDirective : public Directive { public: - RegexDirective(const SourceLocation &Location, const std::string &Text, - unsigned Count) - : Directive(Location, Text, Count), Regex(Text) { } + RegexDirective(SourceLocation DirectiveLoc, SourceLocation DiagnosticLoc, + StringRef Text, unsigned Min, unsigned Max) + : Directive(DirectiveLoc, DiagnosticLoc, Text, Min, Max), Regex(Text) { } virtual bool isValid(std::string &Error) { if (Regex.isValid(Error)) @@ -142,7 +159,7 @@ public: return false; } - virtual bool Match(const std::string &S) { + virtual bool match(StringRef S) { return Regex.match(S); } @@ -150,30 +167,11 @@ private: llvm::Regex Regex; }; -typedef std::vector DirectiveList; - -/// ExpectedData - owns directive objects and deletes on destructor. -/// -struct ExpectedData { - DirectiveList Errors; - DirectiveList Warnings; - DirectiveList Notes; - - ~ExpectedData() { - DirectiveList* Lists[] = { &Errors, &Warnings, &Notes, 0 }; - for (DirectiveList **PL = Lists; *PL; ++PL) { - DirectiveList * const L = *PL; - for (DirectiveList::iterator I = L->begin(), E = L->end(); I != E; ++I) - delete *I; - } - } -}; - class ParseHelper { public: - ParseHelper(const char *Begin, const char *End) - : Begin(Begin), End(End), C(Begin), P(Begin), PEnd(NULL) { } + ParseHelper(StringRef S) + : Begin(S.begin()), End(S.end()), C(Begin), P(Begin), PEnd(NULL) { } // Return true if string literal is next. bool Next(StringRef S) { @@ -240,78 +238,134 @@ private: /// ParseDirective - Go through the comment and see if it indicates expected /// diagnostics. If so, then put them in the appropriate directive list. /// -static void ParseDirective(const char *CommentStart, unsigned CommentLen, - ExpectedData &ED, Preprocessor &PP, - SourceLocation Pos) { +/// Returns true if any valid directives were found. +static bool ParseDirective(StringRef S, ExpectedData *ED, SourceManager &SM, + SourceLocation Pos, DiagnosticsEngine &Diags) { // A single comment may contain multiple directives. - for (ParseHelper PH(CommentStart, CommentStart+CommentLen); !PH.Done();) { - // search for token: expected + bool FoundDirective = false; + for (ParseHelper PH(S); !PH.Done();) { + // Search for token: expected if (!PH.Search("expected")) break; PH.Advance(); - // next token: - + // Next token: - if (!PH.Next("-")) continue; PH.Advance(); - // next token: { error | warning | note } + // Next token: { error | warning | note } DirectiveList* DL = NULL; if (PH.Next("error")) - DL = &ED.Errors; + DL = ED ? &ED->Errors : NULL; else if (PH.Next("warning")) - DL = &ED.Warnings; + DL = ED ? &ED->Warnings : NULL; else if (PH.Next("note")) - DL = &ED.Notes; + DL = ED ? &ED->Notes : NULL; else continue; PH.Advance(); - // default directive kind + // If a directive has been found but we're not interested + // in storing the directive information, return now. + if (!DL) + return true; + + // Default directive kind. bool RegexKind = false; const char* KindStr = "string"; - // next optional token: - + // Next optional token: - if (PH.Next("-re")) { PH.Advance(); RegexKind = true; KindStr = "regex"; } - // skip optional whitespace + // Next optional token: @ + SourceLocation ExpectedLoc; + if (!PH.Next("@")) { + ExpectedLoc = Pos; + } else { + PH.Advance(); + unsigned Line = 0; + bool FoundPlus = PH.Next("+"); + if (FoundPlus || PH.Next("-")) { + // Relative to current line. + PH.Advance(); + bool Invalid = false; + unsigned ExpectedLine = SM.getSpellingLineNumber(Pos, &Invalid); + if (!Invalid && PH.Next(Line) && (FoundPlus || Line < ExpectedLine)) { + if (FoundPlus) ExpectedLine += Line; + else ExpectedLine -= Line; + ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), ExpectedLine, 1); + } + } else { + // Absolute line number. + if (PH.Next(Line) && Line > 0) + ExpectedLoc = SM.translateLineCol(SM.getFileID(Pos), Line, 1); + } + + if (ExpectedLoc.isInvalid()) { + Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin), + diag::err_verify_missing_line) << KindStr; + continue; + } + PH.Advance(); + } + + // Skip optional whitespace. PH.SkipWhitespace(); - // next optional token: positive integer or a '+'. - unsigned Count = 1; - if (PH.Next(Count)) + // Next optional token: positive integer or a '+'. + unsigned Min = 1; + unsigned Max = 1; + if (PH.Next(Min)) { PH.Advance(); - else if (PH.Next("+")) { - Count = Directive::OneOrMoreCount; + // A positive integer can be followed by a '+' meaning min + // or more, or by a '-' meaning a range from min to max. + if (PH.Next("+")) { + Max = Directive::MaxCount; + PH.Advance(); + } else if (PH.Next("-")) { + PH.Advance(); + if (!PH.Next(Max) || Max < Min) { + Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin), + diag::err_verify_invalid_range) << KindStr; + continue; + } + PH.Advance(); + } else { + Max = Min; + } + } else if (PH.Next("+")) { + // '+' on its own means "1 or more". + Max = Directive::MaxCount; PH.Advance(); } - // skip optional whitespace + // Skip optional whitespace. PH.SkipWhitespace(); - // next token: {{ + // Next token: {{ if (!PH.Next("{{")) { - PP.Diag(Pos.getLocWithOffset(PH.C-PH.Begin), - diag::err_verify_missing_start) << KindStr; + Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin), + diag::err_verify_missing_start) << KindStr; continue; } PH.Advance(); const char* const ContentBegin = PH.C; // mark content begin - // search for token: }} + // Search for token: }} if (!PH.Search("}}")) { - PP.Diag(Pos.getLocWithOffset(PH.C-PH.Begin), - diag::err_verify_missing_end) << KindStr; + Diags.Report(Pos.getLocWithOffset(PH.C-PH.Begin), + diag::err_verify_missing_end) << KindStr; continue; } const char* const ContentEnd = PH.P; // mark content end PH.Advance(); - // build directive text; convert \n to newlines + // Build directive text; convert \n to newlines. std::string Text; StringRef NewlineStr = "\\n"; StringRef Content(ContentBegin, ContentEnd-ContentBegin); @@ -325,25 +379,83 @@ static void ParseDirective(const char *CommentStart, unsigned CommentLen, if (Text.empty()) Text.assign(ContentBegin, ContentEnd); - // construct new directive - Directive *D = Directive::Create(RegexKind, Pos, Text, Count); + // Construct new directive. + Directive *D = Directive::create(RegexKind, Pos, ExpectedLoc, Text, + Min, Max); std::string Error; - if (D->isValid(Error)) + if (D->isValid(Error)) { DL->push_back(D); - else { - PP.Diag(Pos.getLocWithOffset(ContentBegin-PH.Begin), - diag::err_verify_invalid_content) + FoundDirective = true; + } else { + Diags.Report(Pos.getLocWithOffset(ContentBegin-PH.Begin), + diag::err_verify_invalid_content) << KindStr << Error; } } + + return FoundDirective; } -/// FindExpectedDiags - Lex the main source file to find all of the -// expected errors and warnings. -static void FindExpectedDiags(Preprocessor &PP, ExpectedData &ED, FileID FID) { +/// HandleComment - Hook into the preprocessor and extract comments containing +/// expected errors and warnings. +bool VerifyDiagnosticConsumer::HandleComment(Preprocessor &PP, + SourceRange Comment) { + SourceManager &SM = PP.getSourceManager(); + SourceLocation CommentBegin = Comment.getBegin(); + + const char *CommentRaw = SM.getCharacterData(CommentBegin); + StringRef C(CommentRaw, SM.getCharacterData(Comment.getEnd()) - CommentRaw); + + if (C.empty()) + return false; + + // Fold any "\" sequences + size_t loc = C.find('\\'); + if (loc == StringRef::npos) { + ParseDirective(C, &ED, SM, CommentBegin, PP.getDiagnostics()); + return false; + } + + std::string C2; + C2.reserve(C.size()); + + for (size_t last = 0;; loc = C.find('\\', last)) { + if (loc == StringRef::npos || loc == C.size()) { + C2 += C.substr(last); + break; + } + C2 += C.substr(last, loc-last); + last = loc + 1; + + if (C[last] == '\n' || C[last] == '\r') { + ++last; + + // Escape \r\n or \n\r, but not \n\n. + if (last < C.size()) + if (C[last] == '\n' || C[last] == '\r') + if (C[last] != C[last-1]) + ++last; + } else { + // This was just a normal backslash. + C2 += '\\'; + } + } + + if (!C2.empty()) + ParseDirective(C2, &ED, SM, CommentBegin, PP.getDiagnostics()); + return false; +} + +#ifndef NDEBUG +/// \brief Lex the specified source file to determine whether it contains +/// any expected-* directives. As a Lexer is used rather than a full-blown +/// Preprocessor, directives inside skipped #if blocks will still be found. +/// +/// \return true if any directives were found. +static bool findDirectives(const Preprocessor &PP, FileID FID) { // Create a raw lexer to pull all the comments out of FID. if (FID.isInvalid()) - return; + return false; SourceManager& SM = PP.getSourceManager(); // Create a lexer to lex all the tokens of the main file in raw mode. @@ -355,6 +467,7 @@ static void FindExpectedDiags(Preprocessor &PP, ExpectedData &ED, FileID FID) { Token Tok; Tok.setKind(tok::comment); + bool Found = false; while (Tok.isNot(tok::eof)) { RawLex.Lex(Tok); if (!Tok.is(tok::comment)) continue; @@ -363,19 +476,19 @@ static void FindExpectedDiags(Preprocessor &PP, ExpectedData &ED, FileID FID) { if (Comment.empty()) continue; // Find all expected errors/warnings/notes. - ParseDirective(&Comment[0], Comment.size(), ED, PP, Tok.getLocation()); - }; + Found |= ParseDirective(Comment, 0, SM, Tok.getLocation(), + PP.getDiagnostics()); + } + return Found; } - -/// PrintProblem - This takes a diagnostic map of the delta between expected and -/// seen diagnostics. If there's anything in it, then something unexpected -/// happened. Print the map out in a nice format and return "true". If the map -/// is empty and we're not going to print things, then return "false". -/// -static unsigned PrintProblem(DiagnosticsEngine &Diags, SourceManager *SourceMgr, - const_diag_iterator diag_begin, - const_diag_iterator diag_end, - const char *Kind, bool Expected) { +#endif // !NDEBUG + +/// \brief Takes a list of diagnostics that have been generated but not matched +/// by an expected-* directive and produces a diagnostic to the user from this. +static unsigned PrintUnexpected(DiagnosticsEngine &Diags, SourceManager *SourceMgr, + const_diag_iterator diag_begin, + const_diag_iterator diag_end, + const char *Kind) { if (diag_begin == diag_end) return 0; SmallString<256> Fmt; @@ -388,30 +501,32 @@ static unsigned PrintProblem(DiagnosticsEngine &Diags, SourceManager *SourceMgr, OS << ": " << I->second; } - Diags.Report(diag::err_verify_inconsistent_diags) - << Kind << !Expected << OS.str(); + Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit() + << Kind << /*Unexpected=*/true << OS.str(); return std::distance(diag_begin, diag_end); } -static unsigned PrintProblem(DiagnosticsEngine &Diags, SourceManager *SourceMgr, - DirectiveList &DL, const char *Kind, - bool Expected) { +/// \brief Takes a list of diagnostics that were expected to have been generated +/// but were not and produces a diagnostic to the user from this. +static unsigned PrintExpected(DiagnosticsEngine &Diags, SourceManager &SourceMgr, + DirectiveList &DL, const char *Kind) { if (DL.empty()) return 0; SmallString<256> Fmt; llvm::raw_svector_ostream OS(Fmt); for (DirectiveList::iterator I = DL.begin(), E = DL.end(); I != E; ++I) { - Directive& D = **I; - if (D.Location.isInvalid() || !SourceMgr) - OS << "\n (frontend)"; - else - OS << "\n Line " << SourceMgr->getPresumedLineNumber(D.Location); + Directive &D = **I; + OS << "\n Line " << SourceMgr.getPresumedLineNumber(D.DiagnosticLoc); + if (D.DirectiveLoc != D.DiagnosticLoc) + OS << " (directive at " + << SourceMgr.getFilename(D.DirectiveLoc) << ":" + << SourceMgr.getPresumedLineNumber(D.DirectiveLoc) << ")"; OS << ": " << D.Text; } - Diags.Report(diag::err_verify_inconsistent_diags) - << Kind << !Expected << OS.str(); + Diags.Report(diag::err_verify_inconsistent_diags).setForceEmit() + << Kind << /*Unexpected=*/false << OS.str(); return DL.size(); } @@ -428,10 +543,9 @@ static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr, for (DirectiveList::iterator I = Left.begin(), E = Left.end(); I != E; ++I) { Directive& D = **I; - unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.Location); - bool FoundOnce = false; + unsigned LineNo1 = SourceMgr.getPresumedLineNumber(D.DiagnosticLoc); - for (unsigned i = 0; i < D.Count; ++i) { + for (unsigned i = 0; i < D.Max; ++i) { DiagList::iterator II, IE; for (II = Right.begin(), IE = Right.end(); II != IE; ++II) { unsigned LineNo2 = SourceMgr.getPresumedLineNumber(II->first); @@ -439,29 +553,22 @@ static unsigned CheckLists(DiagnosticsEngine &Diags, SourceManager &SourceMgr, continue; const std::string &RightText = II->second; - if (D.Match(RightText)) + if (D.match(RightText)) break; } if (II == IE) { - if (D.Count == D.OneOrMoreCount) { - if (!FoundOnce) - LeftOnly.push_back(*I); - // We are only interested in at least one match, so exit the loop. - break; - } // Not found. + if (i >= D.Min) break; LeftOnly.push_back(*I); } else { // Found. The same cannot be found twice. Right.erase(II); - FoundOnce = true; } } } // Now all that's left in Right are those that were not matched. - unsigned num = PrintProblem(Diags, &SourceMgr, LeftOnly, Label, true); - num += PrintProblem(Diags, &SourceMgr, Right.begin(), Right.end(), - Label, false); + unsigned num = PrintExpected(Diags, SourceMgr, LeftOnly, Label); + num += PrintUnexpected(Diags, &SourceMgr, Right.begin(), Right.end(), Label); return num; } @@ -495,8 +602,6 @@ static unsigned CheckResults(DiagnosticsEngine &Diags, SourceManager &SourceMgr, } void VerifyDiagnosticConsumer::CheckDiagnostics() { - ExpectedData ED; - // Ensure any diagnostics go to the primary client. bool OwnsCurClient = Diags.ownsClient(); DiagnosticConsumer *CurClient = Diags.takeClient(); @@ -506,32 +611,38 @@ void VerifyDiagnosticConsumer::CheckDiagnostics() { // markers. If not then any diagnostics are unexpected. if (CurrentPreprocessor) { SourceManager &SM = CurrentPreprocessor->getSourceManager(); - // Extract expected-error strings from main file. - FindExpectedDiags(*CurrentPreprocessor, ED, SM.getMainFileID()); - // Only check for expectations in other diagnostic locations - // if they are not the main file (via ID or FileEntry) - the main - // file has already been looked at, and its expectations must not - // be added twice. - if (!FirstErrorFID.isInvalid() && FirstErrorFID != SM.getMainFileID() - && (!SM.getFileEntryForID(FirstErrorFID) - || (SM.getFileEntryForID(FirstErrorFID) != - SM.getFileEntryForID(SM.getMainFileID())))) { - FindExpectedDiags(*CurrentPreprocessor, ED, FirstErrorFID); - FirstErrorFID = FileID(); + +#ifndef NDEBUG + // In a debug build, scan through any files that may have been missed + // during parsing and issue a fatal error if directives are contained + // within these files. If a fatal error occurs, this suggests that + // this file is being parsed separately from the main file. + HeaderSearch &HS = CurrentPreprocessor->getHeaderSearchInfo(); + for (FilesWithDiagnosticsSet::iterator I = FilesWithDiagnostics.begin(), + End = FilesWithDiagnostics.end(); + I != End; ++I) { + const FileEntry *E = SM.getFileEntryForID(*I); + // Don't check files already parsed or those handled as modules. + if (E && (FilesParsedForDirectives.count(E) + || HS.findModuleForHeader(E))) + continue; + + if (findDirectives(*CurrentPreprocessor, *I)) + llvm::report_fatal_error(Twine("-verify directives found after rather" + " than during normal parsing of ", + StringRef(E ? E->getName() : "(unknown)"))); } +#endif // Check that the expected diagnostics occurred. NumErrors += CheckResults(Diags, SM, *Buffer, ED); } else { - NumErrors += (PrintProblem(Diags, 0, - Buffer->err_begin(), Buffer->err_end(), - "error", false) + - PrintProblem(Diags, 0, - Buffer->warn_begin(), Buffer->warn_end(), - "warn", false) + - PrintProblem(Diags, 0, - Buffer->note_begin(), Buffer->note_end(), - "note", false)); + NumErrors += (PrintUnexpected(Diags, 0, Buffer->err_begin(), + Buffer->err_end(), "error") + + PrintUnexpected(Diags, 0, Buffer->warn_begin(), + Buffer->warn_end(), "warn") + + PrintUnexpected(Diags, 0, Buffer->note_begin(), + Buffer->note_end(), "note")); } Diags.takeClient(); @@ -539,6 +650,9 @@ void VerifyDiagnosticConsumer::CheckDiagnostics() { // Reset the buffer, we have processed all the diagnostics in it. Buffer.reset(new TextDiagnosticBuffer()); + ED.Errors.clear(); + ED.Warnings.clear(); + ED.Notes.clear(); } DiagnosticConsumer * @@ -549,9 +663,10 @@ VerifyDiagnosticConsumer::clone(DiagnosticsEngine &Diags) const { return new VerifyDiagnosticConsumer(Diags); } -Directive* Directive::Create(bool RegexKind, const SourceLocation &Location, - const std::string &Text, unsigned Count) { +Directive *Directive::create(bool RegexKind, SourceLocation DirectiveLoc, + SourceLocation DiagnosticLoc, StringRef Text, + unsigned Min, unsigned Max) { if (RegexKind) - return new RegexDirective(Location, Text, Count); - return new StandardDirective(Location, Text, Count); + return new RegexDirective(DirectiveLoc, DiagnosticLoc, Text, Min, Max); + return new StandardDirective(DirectiveLoc, DiagnosticLoc, Text, Min, Max); } diff --git a/lib/Frontend/Warnings.cpp b/lib/Frontend/Warnings.cpp index ec5fde0..b7d4a3b 100644 --- a/lib/Frontend/Warnings.cpp +++ b/lib/Frontend/Warnings.cpp @@ -53,7 +53,11 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, Diags.setIgnoreAllWarnings(Opts.IgnoreWarnings); Diags.setShowOverloads( static_cast(Opts.ShowOverloads)); - + + Diags.setElideType(Opts.ElideType); + Diags.setPrintTemplateTree(Opts.ShowTemplateTree); + Diags.setShowColors(Opts.ShowColors); + // Handle -ferror-limit if (Opts.ErrorLimit) Diags.setErrorLimit(Opts.ErrorLimit); @@ -83,6 +87,7 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, bool SetDiagnostic = (Report == 0); for (unsigned i = 0, e = Opts.Warnings.size(); i != e; ++i) { StringRef Opt = Opts.Warnings[i]; + StringRef OrigOpt = Opts.Warnings[i]; // Treat -Wformat=0 as an alias for -Wno-format. if (Opt == "format=0") @@ -130,7 +135,7 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, if ((Opt[5] != '=' && Opt[5] != '-') || Opt.size() == 6) { if (Report) Diags.Report(diag::warn_unknown_warning_specifier) - << "-Werror" << ("-W" + Opt.str()); + << "-Werror" << ("-W" + OrigOpt.str()); continue; } Specifier = Opt.substr(6); @@ -158,7 +163,7 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, if ((Opt[12] != '=' && Opt[12] != '-') || Opt.size() == 13) { if (Report) Diags.Report(diag::warn_unknown_warning_specifier) - << "-Wfatal-errors" << ("-W" + Opt.str()); + << "-Wfatal-errors" << ("-W" + OrigOpt.str()); continue; } Specifier = Opt.substr(13); @@ -182,7 +187,8 @@ void clang::ProcessWarningOptions(DiagnosticsEngine &Diags, if (Report) { if (DiagIDs->getDiagnosticsInGroup(Opt, _Diags)) - EmitUnknownDiagWarning(Diags, "-W", Opt, isPositive); + EmitUnknownDiagWarning(Diags, isPositive ? "-W" : "-Wno-", Opt, + isPositive); } else { Diags.setDiagnosticGroupMapping(Opt, Mapping); } diff --git a/lib/FrontendTool/CMakeLists.txt b/lib/FrontendTool/CMakeLists.txt index 5270b1b..fe9d589 100644 --- a/lib/FrontendTool/CMakeLists.txt +++ b/lib/FrontendTool/CMakeLists.txt @@ -1,11 +1,20 @@ -set(LLVM_USED_LIBS clangDriver clangFrontend clangRewrite clangCodeGen - clangStaticAnalyzerFrontend clangStaticAnalyzerCheckers clangStaticAnalyzerCore - clangARCMigrate) - add_clang_library(clangFrontendTool ExecuteCompilerInvocation.cpp ) add_dependencies(clangFrontendTool - ClangCC1Options - ClangDiagnosticFrontend) + ClangDiagnosticCommon + ClangDiagnosticFrontend + ClangDriverOptions + ) + +target_link_libraries(clangFrontendTool + clangDriver + clangFrontend + clangRewrite + clangCodeGen + clangStaticAnalyzerFrontend + clangStaticAnalyzerCheckers + clangStaticAnalyzerCore + clangARCMigrate + ) diff --git a/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 07d2b8d..bd50083 100644 --- a/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -16,7 +16,7 @@ #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" #include "clang/ARCMigrate/ARCMTActions.h" #include "clang/CodeGen/CodeGenAction.h" -#include "clang/Driver/CC1Options.h" +#include "clang/Driver/Options.h" #include "clang/Driver/OptTable.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/CompilerInstance.h" @@ -32,6 +32,7 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) { using namespace clang::frontend; switch (CI.getFrontendOpts().ProgramAction) { + case ASTDeclList: return new ASTDeclListAction(); case ASTDump: return new ASTDumpAction(); case ASTDumpXML: return new ASTDumpXMLAction(); case ASTPrint: return new ASTPrintAction(); @@ -71,7 +72,12 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) { case PrintDeclContext: return new DeclContextPrintAction(); case PrintPreamble: return new PrintPreambleAction(); - case PrintPreprocessedInput: return new PrintPreprocessedAction(); + case PrintPreprocessedInput: { + if (CI.getPreprocessorOutputOpts().RewriteIncludes) + return new RewriteIncludesAction(); + return new PrintPreprocessedAction(); + } + case RewriteMacros: return new RewriteMacrosAction(); case RewriteObjC: return new RewriteObjCAction(); case RewriteTest: return new RewriteTestAction(); @@ -129,7 +135,7 @@ static FrontendAction *CreateFrontendAction(CompilerInstance &CI) { bool clang::ExecuteCompilerInvocation(CompilerInstance *Clang) { // Honor -help. if (Clang->getFrontendOpts().ShowHelp) { - OwningPtr Opts(driver::createCC1OptTable()); + OwningPtr Opts(driver::createDriverOptTable()); Opts->PrintHelp(llvm::outs(), "clang -cc1", "LLVM 'Clang' Compiler: http://clang.llvm.org"); return 0; diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 78141a3..6e9cc68 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -1,5 +1,6 @@ set(files altivec.h + ammintrin.h avxintrin.h avx2intrin.h bmiintrin.h @@ -7,6 +8,7 @@ set(files emmintrin.h float.h fma4intrin.h + fmaintrin.h immintrin.h iso646.h limits.h @@ -29,6 +31,7 @@ set(files wmmintrin.h x86intrin.h xmmintrin.h + xopintrin.h cpuid.h unwind.h module.map diff --git a/lib/Headers/ammintrin.h b/lib/Headers/ammintrin.h new file mode 100644 index 0000000..d87b9cd --- /dev/null +++ b/lib/Headers/ammintrin.h @@ -0,0 +1,68 @@ +/*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __AMMINTRIN_H +#define __AMMINTRIN_H + +#ifndef __SSE4A__ +#error "SSE4A instruction set not enabled" +#else + +#include + +#define _mm_extracti_si64(x, len, idx) \ + ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_extract_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); +} + +#define _mm_inserti_si64(x, y, len, idx) \ + ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ + (__v2di)(__m128i)(y), \ + (char)(len), (char)(idx))) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_insert_si64(__m128i __x, __m128i __y) +{ + return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_sd(double *__p, __m128d __a) +{ + __builtin_ia32_movntsd(__p, (__v2df)__a); +} + +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +_mm_stream_ss(float *__p, __m128 __a) +{ + __builtin_ia32_movntss(__p, (__v4sf)__a); +} + +#endif /* __SSE4A__ */ + +#endif /* __AMMINTRIN_H */ diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 884c46d..2c53aed 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -959,3 +959,243 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di(__X, __Y); } + +#define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \ + (__v4si)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4si)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m128d __a = (a); \ + double const *__m = (m); \ + __m128i __i = (i); \ + __m128d __mask = (mask); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \ + (__v2di)__i, (__v2df)__mask, (s)); }) + +#define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \ + __m256d __a = (a); \ + double const *__m = (m); \ + __m256i __i = (i); \ + __m256d __mask = (mask); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \ + (__v4di)__i, (__v4df)__mask, (s)); }) + +#define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v4si)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m256 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m256 __mask = (mask); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \ + (__v8si)__i, (__v8sf)__mask, (s)); }) + +#define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m128i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \ + (__v2di)__i, (__v4sf)__mask, (s)); }) + +#define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \ + __m128 __a = (a); \ + float const *__m = (m); \ + __m256i __i = (i); \ + __m128 __mask = (mask); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \ + (__v4di)__i, (__v4sf)__mask, (s)); }) + +#define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \ + (__v4si)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \ + (__v8si)__i, (__v8si)__mask, (s)); }) + +#define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \ + (__v2di)__i, (__v4si)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \ + (__v4di)__i, (__v4si)__mask, (s)); }) + +#define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \ + (__v4si)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4si)__i, (__v4di)__mask, (s)); }) + +#define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m128i __a = (a); \ + int const *__m = (m); \ + __m128i __i = (i); \ + __m128i __mask = (mask); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \ + (__v2di)__i, (__v2di)__mask, (s)); }) + +#define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \ + __m256i __a = (a); \ + int const *__m = (m); \ + __m256i __i = (i); \ + __m256i __mask = (mask); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \ + (__v4di)__i, (__v4di)__mask, (s)); }) + +#define _mm_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v4si)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i32gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4si)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m128i __i = (i); \ + (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \ + (const __v2df *)__m, (__v2di)__i, \ + (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm256_i64gather_pd(m, i, s) __extension__ ({ \ + double const *__m = (m); \ + __m256i __i = (i); \ + (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \ + (const __v4df *)__m, (__v4di)__i, \ + (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); }) + +#define _mm_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4si)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i32gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \ + (const __v8sf *)__m, (__v8si)__i, \ + (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m128i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v2di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm256_i64gather_ps(m, i, s) __extension__ ({ \ + float const *__m = (m); \ + __m256i __i = (i); \ + (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \ + (const __v4sf *)__m, (__v4di)__i, \ + (__v4sf)_mm_set1_ps((float)(int)-1), (s)); }) + +#define _mm_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4si)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \ + (const __v8si *)__m, (__v8si)__i, \ + (__v8si)_mm256_set1_epi32(-1), (s)); }) + +#define _mm_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v2di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \ + (const __v4si *)__m, (__v4di)__i, \ + (__v4si)_mm_set1_epi32(-1), (s)); }) + +#define _mm_i32gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v4si)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4si)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) + +#define _mm_i64gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m128i __i = (i); \ + (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \ + (const __v2di *)__m, (__v2di)__i, \ + (__v2di)_mm_set1_epi64x(-1), (s)); }) + +#define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \ + int const *__m = (m); \ + __m256i __i = (i); \ + (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \ + (const __v4di *)__m, (__v4di)__i, \ + (__v4di)_mm256_set1_epi64x(-1), (s)); }) diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h index 2f7db73..8cb00f5 100644 --- a/lib/Headers/bmiintrin.h +++ b/lib/Headers/bmiintrin.h @@ -33,7 +33,7 @@ #define __BMIINTRIN_H static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) -__tzcnt16(unsigned short __X) +__tzcnt_u16(unsigned short __X) { return __builtin_ctzs(__X); } @@ -69,7 +69,7 @@ __blsr_u32(unsigned int __X) } static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) -__tzcnt32(unsigned int __X) +__tzcnt_u32(unsigned int __X) { return __builtin_ctz(__X); } @@ -106,7 +106,7 @@ __blsr_u64(unsigned long long __X) } static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) -__tzcnt64(unsigned long long __X) +__tzcnt_u64(unsigned long long __X) { return __builtin_ctzll(__X); } diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h index e10b77d..91395ed 100644 --- a/lib/Headers/emmintrin.h +++ b/lib/Headers/emmintrin.h @@ -1186,7 +1186,10 @@ _mm_maskmoveu_si128(__m128i d, __m128i n, char *p) static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_storel_epi64(__m128i *p, __m128i a) { - __builtin_ia32_storelv4si((__v2si *)p, a); + struct __mm_storel_epi64_struct { + long long u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storel_epi64_struct*)p)->u = a[0]; } static __inline__ void __attribute__((__always_inline__, __nodebug__)) diff --git a/lib/Headers/float.h b/lib/Headers/float.h index 65b517d..2cb13d3 100644 --- a/lib/Headers/float.h +++ b/lib/Headers/float.h @@ -28,7 +28,7 @@ * additional definitions provided for Windows. * For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx */ -#if defined(__MINGW32__) && \ +#if (defined(__MINGW32__) || defined(_MSC_VER)) && \ defined(__has_include_next) && __has_include_next() # include_next diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h new file mode 100644 index 0000000..6bfd5a8 --- /dev/null +++ b/lib/Headers/fmaintrin.h @@ -0,0 +1,229 @@ +/*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __FMAINTRIN_H +#define __FMAINTRIN_H + +#ifndef __FMA__ +# error "FMA instruction set is not enabled" +#else + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) +{ + return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) +{ + return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) +{ + return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) +{ + return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C); +} + +#endif /* __FMA__ */ + +#endif /* __FMAINTRIN_H */ diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 1605525..15b65f3 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -72,4 +72,30 @@ #include #endif +#ifdef __FMA__ +#include +#endif + +#ifdef __RDRND__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand16_step(unsigned short *__p) +{ + return __builtin_ia32_rdrand16_step(__p); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand32_step(unsigned int *__p) +{ + return __builtin_ia32_rdrand32_step(__p); +} + +#ifdef __x86_64__ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_rdrand64_step(unsigned long long *__p) +{ + return __builtin_ia32_rdrand64_step(__p); +} +#endif +#endif /* __RDRND__ */ + #endif /* __IMMINTRIN_H */ diff --git a/lib/Headers/stddef.h b/lib/Headers/stddef.h index 9e87ee89..eb919b5 100644 --- a/lib/Headers/stddef.h +++ b/lib/Headers/stddef.h @@ -43,10 +43,20 @@ typedef __WCHAR_TYPE__ wchar_t; #undef NULL #ifdef __cplusplus -#undef __null // VC++ hack. -#define NULL __null +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# define NULL 0 +# endif #else -#define NULL ((void*)0) +# define NULL ((void*)0) +#endif + +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif #endif #define offsetof(t, d) __builtin_offsetof(t, d) diff --git a/lib/Headers/wmmintrin.h b/lib/Headers/wmmintrin.h index 8f58850..dca896f 100644 --- a/lib/Headers/wmmintrin.h +++ b/lib/Headers/wmmintrin.h @@ -24,11 +24,13 @@ #ifndef _WMMINTRIN_H #define _WMMINTRIN_H -#if !defined (__AES__) -# error "AES instructions not enabled" +#include + +#if !defined (__AES__) && !defined (__PCLMUL__) +# error "AES/PCLMUL instructions not enabled" #else -#include +#ifdef __AES__ static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) _mm_aesenc_si128(__m128i __V, __m128i __R) @@ -64,4 +66,14 @@ _mm_aesimc_si128(__m128i __V) __builtin_ia32_aeskeygenassist128((C), (R)) #endif /* __AES__ */ + +#ifdef __PCLMUL__ + +#define _mm_clmulepi64_si128(__X, __Y, __I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ + (__v2di)(__m128i)(__Y), (char)(__I))) + +#endif /* __PCLMUL__ */ + +#endif /* __AES__ || __PCLMUL__ */ #endif /* _WMMINTRIN_H */ diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h index f5e4d88..556cd01 100644 --- a/lib/Headers/x86intrin.h +++ b/lib/Headers/x86intrin.h @@ -46,10 +46,18 @@ #include #endif +#ifdef __SSE4A__ +#include +#endif + #ifdef __FMA4__ #include #endif -// FIXME: SSE4A, XOP, LWP, ABM +#ifdef __XOP__ +#include +#endif + +// FIXME: LWP #endif /* __X86INTRIN_H */ diff --git a/lib/Headers/xopintrin.h b/lib/Headers/xopintrin.h new file mode 100644 index 0000000..d107be4 --- /dev/null +++ b/lib/Headers/xopintrin.h @@ -0,0 +1,411 @@ +/*===---- xopintrin.h - FMA4 intrinsics ------------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86INTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __XOPINTRIN_H +#define __XOPINTRIN_H + +#ifndef __XOP__ +# error "XOP instruction set is not enabled" +#else + +#include + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddw_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddd_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_haddq_epu32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubw_epi8(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubd_epi16(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_hsubq_epi32(__m128i __A) +{ + return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_rot_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); +} + +#define _mm_roti_epi8(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) + +#define _mm_roti_epi16(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) + +#define _mm_roti_epi32(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) + +#define _mm_roti_epi64(A, N) __extension__ ({ \ + __m128i __A = (A); \ + (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_shl_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi8(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi16(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi32(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); +} + +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) +_mm_sha_epi64(__m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); +} + +#define _mm_com_epu8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epu16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epu32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epu64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_com_epi8(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) + +#define _mm_com_epi16(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) + +#define _mm_com_epi32(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) + +#define _mm_com_epi64(A, B, N) __extension__ ({ \ + __m128i __A = (A); \ + __m128i __B = (B); \ + (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) + +#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m128d __X = (X); \ + __m128d __Y = (Y); \ + __m128i __C = (C); \ + (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ + (__v2di)__C, (I)); }) + +#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ + __m256d __X = (X); \ + __m256d __Y = (Y); \ + __m256i __C = (C); \ + (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ + (__v4di)__C, (I)); }) + +#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m128 __X = (X); \ + __m128 __Y = (Y); \ + __m128i __C = (C); \ + (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ + (__v4si)__C, (I)); }) + +#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ + __m256 __X = (X); \ + __m256 __Y = (Y); \ + __m256i __C = (C); \ + (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ + (__v8si)__C, (I)); }) + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ss(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_sd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); +} + +static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_ps(__m128 __A) +{ + return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); +} + +static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) +_mm_frcz_pd(__m128d __A) +{ + return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); +} + +static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_ps(__m256 __A) +{ + return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); +} + +static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) +_mm256_frcz_pd(__m256d __A) +{ + return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); +} + +#endif /* __XOP__ */ + +#endif /* __XOPINTRIN_H */ diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt index 0a2ffdb..241abbc 100644 --- a/lib/Lex/CMakeLists.txt +++ b/lib/Lex/CMakeLists.txt @@ -2,8 +2,6 @@ set(LLVM_LINK_COMPONENTS support) -set(LLVM_USED_LIBS clangBasic) - add_clang_library(clangLex HeaderMap.cpp HeaderSearch.cpp @@ -28,4 +26,12 @@ add_clang_library(clangLex TokenLexer.cpp ) -add_dependencies(clangLex ClangDiagnosticLex ClangAttrSpellings) +add_dependencies(clangLex + ClangAttrSpellings + ClangDiagnosticCommon + ClangDiagnosticLex + ) + +target_link_libraries(clangLex + clangBasic + ) diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp index d688e23..bb3a673 100644 --- a/lib/Lex/HeaderSearch.cpp +++ b/lib/Lex/HeaderSearch.cpp @@ -84,7 +84,7 @@ void HeaderSearch::PrintStats() { } /// CreateHeaderMap - This method returns a HeaderMap for the specified -/// FileEntry, uniquing them through the the 'HeaderMaps' datastructure. +/// FileEntry, uniquing them through the 'HeaderMaps' datastructure. const HeaderMap *HeaderSearch::CreateHeaderMap(const FileEntry *FE) { // We expect the number of headermaps to be small, and almost always empty. // If it ever grows, use of a linear search should be re-evaluated. @@ -390,10 +390,10 @@ void HeaderSearch::setTarget(const TargetInfo &Target) { //===----------------------------------------------------------------------===// -/// LookupFile - Given a "foo" or reference, look up the indicated file, +/// LookupFile - Given a "foo" or \ reference, look up the indicated file, /// return null on failure. isAngled indicates whether the file reference is -/// for system #include's or not (i.e. using <> instead of ""). CurFileEnt, if -/// non-null, indicates where the #including file is, in case a relative search +/// for system \#include's or not (i.e. using <> instead of ""). CurFileEnt, if +/// non-null, indicates where the \#including file is, in case a relative search /// is needed. const FileEntry *HeaderSearch::LookupFile( StringRef Filename, @@ -442,11 +442,19 @@ const FileEntry *HeaderSearch::LookupFile( // Leave CurDir unset. // This file is a system header or C++ unfriendly if the old file is. // - // Note that the temporary 'DirInfo' is required here, as either call to - // getFileInfo could resize the vector and we don't want to rely on order - // of evaluation. - unsigned DirInfo = getFileInfo(CurFileEnt).DirInfo; - getFileInfo(FE).DirInfo = DirInfo; + // Note that we only use one of FromHFI/ToHFI at once, due to potential + // reallocation of the underlying vector potentially making the first + // reference binding dangling. + HeaderFileInfo &FromHFI = getFileInfo(CurFileEnt); + unsigned DirInfo = FromHFI.DirInfo; + bool IndexHeaderMapHeader = FromHFI.IndexHeaderMapHeader; + StringRef Framework = FromHFI.Framework; + + HeaderFileInfo &ToHFI = getFileInfo(FE); + ToHFI.DirInfo = DirInfo; + ToHFI.IndexHeaderMapHeader = IndexHeaderMapHeader; + ToHFI.Framework = Framework; + if (SearchPath != NULL) { StringRef SearchPathRef(CurFileEnt->getDir()->getName()); SearchPath->clear(); @@ -510,6 +518,16 @@ const FileEntry *HeaderSearch::LookupFile( if (HFI.DirInfo == SrcMgr::C_User && InUserSpecifiedSystemFramework) HFI.DirInfo = SrcMgr::C_System; + // If the filename matches a known system header prefix, override + // whether the file is a system header. + for (unsigned j = SystemHeaderPrefixes.size(); j; --j) { + if (Filename.startswith(SystemHeaderPrefixes[j-1].first)) { + HFI.DirInfo = SystemHeaderPrefixes[j-1].second ? SrcMgr::C_System + : SrcMgr::C_User; + break; + } + } + // If this file is found in a header map and uses the framework style of // includes, then this header is part of a framework we're building. if (CurDir->isIndexHeaderMap()) { @@ -556,7 +574,7 @@ const FileEntry *HeaderSearch::LookupFile( } /// LookupSubframeworkHeader - Look up a subframework for the specified -/// #include file. For example, if #include'ing from +/// \#include file. For example, if \#include'ing from /// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox /// is a subframework within Carbon.framework. If so, return the FileEntry /// for the designated file, otherwise return null. @@ -739,9 +757,6 @@ void HeaderSearch::setHeaderFileInfoForUID(HeaderFileInfo HFI, unsigned UID) { FileInfo[UID] = HFI; } -/// ShouldEnterIncludeFile - Mark the specified file as a target of of a -/// #include, #include_next, or #import directive. Return false if #including -/// the file will have no effect or true if we should include it. bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ ++NumIncluded; // Count # of attempted #includes. @@ -1032,4 +1047,3 @@ void HeaderSearch::collectAllModules(llvm::SmallVectorImpl &Modules) { Modules.push_back(M->getValue()); } } - diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 535a852..5212dd8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -127,7 +127,7 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP) } /// Lexer constructor - Create a new raw lexer object. This object is only -/// suitable for calls to 'LexRawToken'. This lexer assumes that the text +/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, const char *BufStart, const char *BufPtr, const char *BufEnd) @@ -140,7 +140,7 @@ Lexer::Lexer(SourceLocation fileloc, const LangOptions &langOpts, } /// Lexer constructor - Create a new raw lexer object. This object is only -/// suitable for calls to 'LexRawToken'. This lexer assumes that the text +/// suitable for calls to 'LexFromRawLexer'. This lexer assumes that the text /// range will outlive it, so it doesn't take ownership of it. Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *FromFile, const SourceManager &SM, const LangOptions &langOpts) @@ -544,7 +544,6 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, if (InPreprocessorDirective) { // If we've hit the end of the file, we're done. if (TheTok.getKind() == tok::eof) { - InPreprocessorDirective = false; break; } @@ -820,10 +819,6 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, return CharSourceRange::getCharRange(Begin, End); } -/// \brief Accepts a range and returns a character range with file locations. -/// -/// Returns a null range if a part of the range resides inside a macro -/// expansion or the range does not reside on the same FileID. CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts) { @@ -1091,20 +1086,21 @@ static inline bool isIdentifierBody(unsigned char c) { } /// isHorizontalWhitespace - Return true if this character is horizontal -/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'. +/// whitespace: ' ', '\\t', '\\f', '\\v'. Note that this returns false for +/// '\\0'. static inline bool isHorizontalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; } /// isVerticalWhitespace - Return true if this character is vertical -/// whitespace: '\n', '\r'. Note that this returns false for '\0'. +/// whitespace: '\\n', '\\r'. Note that this returns false for '\\0'. static inline bool isVerticalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_VERT_WS) ? true : false; } /// isWhitespace - Return true if this character is horizontal or vertical -/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false -/// for '\0'. +/// whitespace: ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. Note that this returns +/// false for '\\0'. static inline bool isWhitespace(unsigned char c) { return (CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS)) ? true : false; } @@ -1124,6 +1120,11 @@ static inline bool isRawStringDelimBody(unsigned char c) { true : false; } +// Allow external clients to make use of CharInfo. +bool Lexer::isIdentifierBodyChar(char c, const LangOptions &LangOpts) { + return isIdentifierBody(c) || (c == '$' && LangOpts.DollarIdents); +} + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. @@ -1564,8 +1565,20 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { } // If we have a hex FP constant, continue. - if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) - return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + if ((C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) { + // Outside C99, we accept hexadecimal floating point numbers as a + // not-quite-conforming extension. Only do so if this looks like it's + // actually meant to be a hexfloat, and not if it has a ud-suffix. + bool IsHexFloat = true; + if (!LangOpts.C99) { + if (!isHexaLiteral(BufferPtr, LangOpts)) + IsHexFloat = false; + else if (std::find(BufferPtr, CurPtr, '_') != CurPtr) + IsHexFloat = false; + } + if (IsHexFloat) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + } // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; @@ -1635,7 +1648,7 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::warn_unterminated_string); + Diag(BufferPtr, diag::ext_unterminated_string); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } @@ -1755,7 +1768,7 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { // Skip escaped characters. if (C == '\\') { // Skip the escaped character. - C = getAndAdvanceChar(CurPtr, Result); + getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && (CurPtr-1 == BufferEnd || // End of file. isCodeCompletionPoint(CurPtr-1)))) { @@ -1793,7 +1806,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::err_empty_character); + Diag(BufferPtr, diag::ext_empty_character); FormTokenWithChars(Result, CurPtr, tok::unknown); return; } @@ -1803,11 +1816,11 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr, if (C == '\\') { // Skip the escaped character. // FIXME: UCN's - C = getAndAdvanceChar(CurPtr, Result); + getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. if (!isLexingRawMode() && !LangOpts.AsmPreprocessor) - Diag(BufferPtr, diag::warn_unterminated_char); + Diag(BufferPtr, diag::ext_unterminated_char); FormTokenWithChars(Result, CurPtr-1, tok::unknown); return; } else if (C == 0) { @@ -1924,8 +1937,6 @@ bool Lexer::SkipBCPLComment(Token &Result, const char *CurPtr) { CurPtr = EscapePtr-2; else break; // This is a newline, we're done. - - C = *CurPtr; } // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to @@ -2022,7 +2033,7 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { // directly. FormTokenWithChars(Result, CurPtr, tok::comment); - if (!ParsingPreprocessorDirective) + if (!ParsingPreprocessorDirective || LexingRawMode) return true; // If this BCPL-style comment is in a macro definition, transmogrify it into @@ -2043,8 +2054,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) { } /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline -/// character (either \n or \r) is part of an escaped newline sequence. Issue a -/// diagnostic if so. We know that the newline is inside of a block comment. +/// character (either \\n or \\r) is part of an escaped newline sequence. Issue +/// a diagnostic if so. We know that the newline is inside of a block comment. static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L) { assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); @@ -2110,12 +2121,12 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, #undef bool #endif -/// SkipBlockComment - We have just read the /* characters from input. Read -/// until we find the */ characters that terminate the comment. Note that we -/// don't bother decoding trigraphs or escaped newlines in block comments, -/// because they cannot cause the comment to end. The only thing that can -/// happen is the comment could end with an escaped newline between the */ end -/// of comment. +/// We have just read from input the / and * characters that started a comment. +/// Read until we find the * and / characters that terminate the comment. +/// Note that we don't bother decoding trigraphs or escaped newlines in block +/// comments, because they cannot cause the comment to end. The only thing +/// that can happen is the comment could end with an escaped newline between +/// the terminating * and /. /// /// If we're in KeepCommentMode or any CommentHandler has inserted /// some tokens, this will store the first token and return true. @@ -2286,10 +2297,9 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) { /// ReadToEndOfLine - Read the rest of the current preprocessor line as an /// uninterpreted string. This switches the lexer out of directive mode. -std::string Lexer::ReadToEndOfLine() { +void Lexer::ReadToEndOfLine(SmallVectorImpl *Result) { assert(ParsingPreprocessorDirective && ParsingFilename == false && "Must be in a preprocessing directive!"); - std::string Result; Token Tmp; // CurPtr - Cache BufferPtr in an automatic variable. @@ -2298,7 +2308,8 @@ std::string Lexer::ReadToEndOfLine() { char Char = getAndAdvanceChar(CurPtr, Tmp); switch (Char) { default: - Result += Char; + if (Result) + Result->push_back(Char); break; case 0: // Null. // Found end of file? @@ -2306,11 +2317,12 @@ std::string Lexer::ReadToEndOfLine() { if (isCodeCompletionPoint(CurPtr-1)) { PP->CodeCompleteNaturalLanguage(); cutOffLexing(); - return Result; + return; } // Nope, normal character, continue. - Result += Char; + if (Result) + Result->push_back(Char); break; } // FALL THROUGH. @@ -2329,8 +2341,8 @@ std::string Lexer::ReadToEndOfLine() { } assert(Tmp.is(tok::eod) && "Unexpected token!"); - // Finally, we're done, return the string we found. - return Result; + // Finally, we're done; + return; } } } @@ -2383,7 +2395,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { BufferPtr = CurPtr; // Finally, let the preprocessor handle this. - return PP->HandleEndOfFile(Result); + return PP->HandleEndOfFile(Result, isPragmaLexer()); } /// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from @@ -2418,7 +2430,7 @@ unsigned Lexer::isNextPPTokenLParen() { return Tok.is(tok::l_paren); } -/// FindConflictEnd - Find the end of a version control conflict marker. +/// \brief Find the end of a version control conflict marker. static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK) { const char *Terminator = CMK == CMK_Perforce ? "<<<<\n" : ">>>>>>>"; @@ -2625,7 +2637,8 @@ LexNextToken: ParsingPreprocessorDirective = false; // Restore comment saving mode, in case it was disabled for directive. - SetCommentRetentionState(PP->getCommentRetentionState()); + if (PP) + SetCommentRetentionState(PP->getCommentRetentionState()); // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index c1d228b..9e3c778 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -250,6 +250,39 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, return true; } +/// MeasureUCNEscape - Determine the number of bytes within the resulting string +/// which this UCN will occupy. +static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, + const char *ThisTokEnd, unsigned CharByteWidth, + const LangOptions &Features, bool &HadError) { + // UTF-32: 4 bytes per escape. + if (CharByteWidth == 4) + return 4; + + uint32_t UcnVal = 0; + unsigned short UcnLen = 0; + FullSourceLoc Loc; + + if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, + UcnLen, Loc, 0, Features, true)) { + HadError = true; + return 0; + } + + // UTF-16: 2 bytes for BMP, 4 bytes otherwise. + if (CharByteWidth == 2) + return UcnVal <= 0xFFFF ? 2 : 4; + + // UTF-8. + if (UcnVal < 0x80) + return 1; + if (UcnVal < 0x800) + return 2; + if (UcnVal < 0x10000) + return 3; + return 4; +} + /// EncodeUCNEscape - Read the Universal Character Name, check constraints and /// convert the UTF32 to UTF8 or UTF16. This is a subroutine of /// StringLiteralParser. When we decide to implement UCN's for identifiers, @@ -265,7 +298,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, unsigned short UcnLen = 0; if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags, Features, true)) { - HadError = 1; + HadError = true; return; } @@ -289,7 +322,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, // using reinterpret_cast. UTF16 *ResultPtr = reinterpret_cast(ResultBuf); - if (UcnVal < (UTF32)0xFFFF) { + if (UcnVal <= (UTF32)0xFFFF) { *ResultPtr = UcnVal; ResultBuf += 2; return; @@ -756,6 +789,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { } +/// \verbatim /// user-defined-character-literal: [C++11 lex.ext] /// character-literal ud-suffix /// ud-suffix: @@ -791,6 +825,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) { /// \U hex-quad hex-quad /// hex-quad: /// hex-digit hex-digit hex-digit hex-digit +/// \endverbatim /// CharLiteralParser::CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, @@ -971,7 +1006,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, Value = (signed char)Value; } - +/// \verbatim /// string-literal: [C++0x lex.string] /// encoding-prefix " [s-char-sequence] " /// encoding-prefix R raw-string @@ -1023,6 +1058,7 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end, /// \U hex-quad hex-quad /// hex-quad: /// hex-digit hex-digit hex-digit hex-digit +/// \endverbatim /// StringLiteralParser:: StringLiteralParser(const Token *StringToks, unsigned NumStringToks, @@ -1037,10 +1073,8 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks, void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // The literal token may have come from an invalid source location (e.g. due // to a PCH error), in which case the token length will be 0. - if (NumStringToks == 0 || StringToks[0].getLength() < 2) { - hadError = true; - return; - } + if (NumStringToks == 0 || StringToks[0].getLength() < 2) + return DiagnoseLexingError(SourceLocation()); // Scan all of the string portions, remember the max individual token length, // computing a bound on the concatenated string length, and see whether any @@ -1057,10 +1091,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ // Implement Translation Phase #6: concatenation of string literals /// (C99 5.1.1.2p1). The common case is only one string fragment. for (unsigned i = 1; i != NumStringToks; ++i) { - if (StringToks[i].getLength() < 2) { - hadError = true; - return; - } + if (StringToks[i].getLength() < 2) + return DiagnoseLexingError(StringToks[i].getLocation()); // The string could be shorter than this if it needs cleaning, but this is a // reasonable bound, which is all we need. @@ -1123,10 +1155,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ unsigned ThisTokLen = Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features, &StringInvalid); - if (StringInvalid) { - hadError = true; - continue; - } + if (StringInvalid) + return DiagnoseLexingError(StringToks[i].getLocation()); const char *ThisTokBegin = ThisTokBuf; const char *ThisTokEnd = ThisTokBuf+ThisTokLen; @@ -1192,7 +1222,11 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ if (DiagnoseBadString(StringToks[i])) hadError = true; } else { - assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + if (ThisTokBuf[0] != '"') { + // The file may have come from PCH and then changed after loading the + // PCH; Fail gracefully. + return DiagnoseLexingError(StringToks[i].getLocation()); + } ++ThisTokBuf; // skip " // Check if this is a pascal string @@ -1296,45 +1330,10 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){ } } - /// copyStringFragment - This function copies from Start to End into ResultPtr. /// Performs widening for multi-byte characters. bool StringLiteralParser::CopyStringFragment(StringRef Fragment) { - assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4); - ConversionResult result = conversionOK; - // Copy the character span over. - if (CharByteWidth == 1) { - if (!isLegalUTF8String(reinterpret_cast(Fragment.begin()), - reinterpret_cast(Fragment.end()))) - result = sourceIllegal; - memcpy(ResultPtr, Fragment.data(), Fragment.size()); - ResultPtr += Fragment.size(); - } else if (CharByteWidth == 2) { - UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF16 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF16( - &sourceStart,sourceStart + Fragment.size(), - &targetStart,targetStart + 2*Fragment.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } else if (CharByteWidth == 4) { - UTF8 const *sourceStart = (UTF8 const *)Fragment.data(); - // FIXME: Make the type of the result buffer correct instead of - // using reinterpret_cast. - UTF32 *targetStart = reinterpret_cast(ResultPtr); - ConversionFlags flags = strictConversion; - result = ConvertUTF8toUTF32( - &sourceStart,sourceStart + Fragment.size(), - &targetStart,targetStart + 4*Fragment.size(),flags); - if (result==conversionOK) - ResultPtr = reinterpret_cast(targetStart); - } - assert((result != targetExhausted) - && "ConvertUTF8toUTFXX exhausted target buffer"); - return result != conversionOK; + return !ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr); } bool StringLiteralParser::DiagnoseBadString(const Token &Tok) { @@ -1349,6 +1348,12 @@ bool StringLiteralParser::DiagnoseBadString(const Token &Tok) { return !NoErrorOnBadEncoding; } +void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) { + hadError = true; + if (Diags) + Diags->Report(Loc, diag::err_lexing_string); +} + /// getOffsetOfStringByte - This function returns the offset of the /// specified byte of the string data represented by Token. This handles /// advancing over escape sequences in the string. @@ -1365,14 +1370,31 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, if (StringInvalid) return 0; + const char *SpellingStart = SpellingPtr; + const char *SpellingEnd = SpellingPtr+TokLen; + + // Handle UTF-8 strings just like narrow strings. + if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8') + SpellingPtr += 2; + assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' && SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet"); + // For raw string literals, this is easy. + if (SpellingPtr[0] == 'R') { + assert(SpellingPtr[1] == '"' && "Should be a raw string literal!"); + // Skip 'R"'. + SpellingPtr += 2; + while (*SpellingPtr != '(') { + ++SpellingPtr; + assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal"); + } + // Skip '('. + ++SpellingPtr; + return SpellingPtr - SpellingStart + ByteNo; + } - const char *SpellingStart = SpellingPtr; - const char *SpellingEnd = SpellingPtr+TokLen; - - // Skip over the leading quote. + // Skip over the leading quote assert(SpellingPtr[0] == '"' && "Should be a string literal!"); ++SpellingPtr; @@ -1389,11 +1411,23 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok, // Otherwise, this is an escape character. Advance over it. bool HadError = false; - ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, - FullSourceLoc(Tok.getLocation(), SM), - CharByteWidth*8, Diags); + if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') { + const char *EscapePtr = SpellingPtr; + unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd, + 1, Features, HadError); + if (Len > ByteNo) { + // ByteNo is somewhere within the escape sequence. + SpellingPtr = EscapePtr; + break; + } + ByteNo -= Len; + } else { + ProcessCharEscape(SpellingPtr, SpellingEnd, HadError, + FullSourceLoc(Tok.getLocation(), SM), + CharByteWidth*8, Diags); + --ByteNo; + } assert(!HadError && "This method isn't valid on erroneous strings"); - --ByteNo; } return SpellingPtr-SpellingStart; diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp index 625a204..74b9cbc 100644 --- a/lib/Lex/PPDirectives.cpp +++ b/lib/Lex/PPDirectives.cpp @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements # directive processing for the Preprocessor. -// +/// +/// \file +/// \brief Implements # directive processing for the Preprocessor. +/// //===----------------------------------------------------------------------===// #include "clang/Lex/Preprocessor.h" @@ -61,8 +62,8 @@ MacroInfo *Preprocessor::CloneMacroInfo(const MacroInfo &MacroToClone) { return MI; } -/// ReleaseMacroInfo - Release the specified MacroInfo. This memory will -/// be reused for allocating new MacroInfo objects. +/// \brief Release the specified MacroInfo to be reused for allocating +/// new MacroInfo objects. void Preprocessor::ReleaseMacroInfo(MacroInfo *MI) { MacroInfoChain *MIChain = (MacroInfoChain*) MI; if (MacroInfoChain *Prev = MIChain->Prev) { @@ -82,8 +83,8 @@ void Preprocessor::ReleaseMacroInfo(MacroInfo *MI) { MI->Destroy(); } -/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the -/// current line until the tok::eod token is found. +/// \brief Read and discard all tokens remaining on the current line until +/// the tok::eod token is found. void Preprocessor::DiscardUntilEndOfDirective() { Token Tmp; do { @@ -92,11 +93,13 @@ void Preprocessor::DiscardUntilEndOfDirective() { } while (Tmp.isNot(tok::eod)); } -/// ReadMacroName - Lex and validate a macro name, which occurs after a -/// #define or #undef. This sets the token kind to eod and discards the rest -/// of the macro line if the macro name is invalid. isDefineUndef is 1 if -/// this is due to a a #define, 2 if #undef directive, 0 if it is something -/// else (e.g. #ifdef). +/// \brief Lex and validate a macro name, which occurs after a +/// \#define or \#undef. +/// +/// This sets the token kind to eod and discards the rest +/// of the macro line if the macro name is invalid. \p isDefineUndef is 1 if +/// this is due to a a \#define, 2 if \#undef directive, 0 if it is something +/// else (e.g. \#ifdef). void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { // Read the token, don't allow macro expansion on it. LexUnexpandedToken(MacroNameTok); @@ -157,8 +160,9 @@ void Preprocessor::ReadMacroName(Token &MacroNameTok, char isDefineUndef) { return DiscardUntilEndOfDirective(); } -/// CheckEndOfDirective - Ensure that the next token is a tok::eod token. If -/// not, emit a diagnostic and consume up until the eod. If EnableMacros is +/// \brief Ensure that the next token is a tok::eod token. +/// +/// If not, emit a diagnostic and consume up until the eod. If EnableMacros is /// true, then we consider macros that expand to zero tokens as being ok. void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { Token Tmp; @@ -191,14 +195,14 @@ void Preprocessor::CheckEndOfDirective(const char *DirType, bool EnableMacros) { -/// SkipExcludedConditionalBlock - We just read a #if or related directive and -/// decided that the subsequent tokens are in the #if'd out portion of the -/// file. Lex the rest of the file, until we see an #endif. If +/// SkipExcludedConditionalBlock - We just read a \#if or related directive and +/// decided that the subsequent tokens are in the \#if'd out portion of the +/// file. Lex the rest of the file, until we see an \#endif. If /// FoundNonSkipPortion is true, then we have already emitted code for part of -/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk -/// is true, then #else directives are ok, if not, then we have already seen one -/// so a #else directive is a duplicate. When this returns, the caller can lex -/// the first valid token. +/// this \#if directive, so \#else/\#elif blocks should never be entered. +/// If ElseOk is true, then \#else directives are ok, if not, then we have +/// already seen one so a \#else directive is a duplicate. When this returns, +/// the caller can lex the first valid token. void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, bool FoundNonSkipPortion, bool FoundElse, @@ -317,7 +321,6 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, } else if (Directive[0] == 'e') { StringRef Sub = Directive.substr(1); if (Sub == "ndif") { // "endif" - CheckEndOfDirective("endif"); PPConditionalInfo CondInfo; CondInfo.WasSkipping = true; // Silence bogus warning. bool InCond = CurPPLexer->popConditionalLevel(CondInfo); @@ -326,9 +329,16 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // If we popped the outermost skipping block, we're done skipping! if (!CondInfo.WasSkipping) { + // Restore the value of LexingRawMode so that trailing comments + // are handled correctly, if we've reached the outermost block. + CurPPLexer->LexingRawMode = false; + CheckEndOfDirective("endif"); + CurPPLexer->LexingRawMode = true; if (Callbacks) Callbacks->Endif(Tok.getLocation(), CondInfo.IfLoc); break; + } else { + DiscardUntilEndOfDirective(); } } else if (Sub == "lse") { // "else". // #else directive in a skipping conditional. If not in some other @@ -346,7 +356,11 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // entered, enter the #else block now. if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { CondInfo.FoundNonSkip = true; + // Restore the value of LexingRawMode so that trailing comments + // are handled correctly. + CurPPLexer->LexingRawMode = false; CheckEndOfDirective("else"); + CurPPLexer->LexingRawMode = true; if (Callbacks) Callbacks->Else(Tok.getLocation(), CondInfo.IfLoc); break; @@ -484,9 +498,6 @@ void Preprocessor::PTHSkipExcludedConditionalBlock() { } } -/// LookupFile - Given a "foo" or reference, look up the indicated file, -/// return null on failure. isAngled indicates whether the file reference is -/// for system #include's or not (i.e. using <> instead of ""). const FileEntry *Preprocessor::LookupFile( StringRef Filename, bool isAngled, @@ -553,6 +564,21 @@ const FileEntry *Preprocessor::LookupFile( // Preprocessor Directive Handling. //===----------------------------------------------------------------------===// +class Preprocessor::ResetMacroExpansionHelper { +public: + ResetMacroExpansionHelper(Preprocessor *pp) + : PP(pp), save(pp->DisableMacroExpansion) { + if (pp->MacroExpansionInDirectivesOverride) + pp->DisableMacroExpansion = false; + } + ~ResetMacroExpansionHelper() { + PP->DisableMacroExpansion = save; + } +private: + Preprocessor *PP; + bool save; +}; + /// HandleDirective - This callback is invoked when the lexer sees a # token /// at the start of a line. This consumes the directive, modifies the /// lexer/preprocessor state, and advances the lexer(s) so that the next token @@ -604,6 +630,10 @@ void Preprocessor::HandleDirective(Token &Result) { Diag(Result, diag::ext_embedded_directive); } + // Temporarily enable macro expansion if set so + // and reset to previous state when returning from this function. + ResetMacroExpansionHelper helper(this); + TryAgain: switch (Result.getKind()) { case tok::eod: @@ -774,23 +804,19 @@ static bool GetLineValue(Token &DigitTok, unsigned &Val, Val = NextVal; } - // Reject 0, this is needed both by #line numbers and flags. - if (Val == 0) { - PP.Diag(DigitTok, DiagID); - PP.DiscardUntilEndOfDirective(); - return true; - } - - if (DigitTokBegin[0] == '0') + if (DigitTokBegin[0] == '0' && Val) PP.Diag(DigitTok.getLocation(), diag::warn_pp_line_decimal); return false; } -/// HandleLineDirective - Handle #line directive: C99 6.10.4. The two -/// acceptable forms are: +/// \brief Handle a \#line directive: C99 6.10.4. +/// +/// The two acceptable forms are: +/// \verbatim /// # line digit-sequence /// # line digit-sequence "s-char-sequence" +/// \endverbatim void Preprocessor::HandleLineDirective(Token &Tok) { // Read the line # and string argument. Per C99 6.10.4p5, these tokens are // expanded. @@ -801,6 +827,9 @@ void Preprocessor::HandleLineDirective(Token &Tok) { unsigned LineNo; if (GetLineValue(DigitTok, LineNo, diag::err_pp_line_requires_integer,*this)) return; + + if (LineNo == 0) + Diag(DigitTok, diag::ext_pp_line_zero); // Enforce C99 6.10.4p3: "The digit sequence shall not specify ... a // number greater than 2147483647". C90 requires that the line # be <= 32767. @@ -1018,15 +1047,13 @@ void Preprocessor::HandleUserDiagnosticDirective(Token &Tok, // tokens. For example, this is allowed: "#warning ` 'foo". GCC does // collapse multiple consequtive white space between tokens, but this isn't // specified by the standard. - std::string Message = CurLexer->ReadToEndOfLine(); + SmallString<128> Message; + CurLexer->ReadToEndOfLine(&Message); // Find the first non-whitespace character, so that we can make the // diagnostic more succinct. - StringRef Msg(Message); - size_t i = Msg.find_first_not_of(' '); - if (i < Msg.size()) - Msg = Msg.substr(i); - + StringRef Msg = Message.str().ltrim(" "); + if (isWarning) Diag(Tok, diag::pp_hash_warning) << Msg; else @@ -1135,7 +1162,7 @@ void Preprocessor::HandleMacroPrivateDirective(Token &Tok) { //===----------------------------------------------------------------------===// /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully -/// checked and spelled filename, e.g. as an operand of #include. This returns +/// checked and spelled filename, e.g. as an operand of \#include. This returns /// true if the input filename was in <>'s or false if it were in ""'s. The /// caller is expected to provide a buffer that is large enough to hold the /// spelling of the filename, but is also expected to handle the case when @@ -1179,11 +1206,14 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc, return isAngled; } -/// ConcatenateIncludeName - Handle cases where the #include name is expanded -/// from a macro as multiple tokens, which need to be glued together. This -/// occurs for code like: -/// #define FOO -/// #include FOO +/// \brief Handle cases where the \#include name is expanded from a macro +/// as multiple tokens, which need to be glued together. +/// +/// This occurs for code like: +/// \code +/// \#define FOO +/// \#include FOO +/// \endcode /// because in this case, "" is returned as 7 tokens, not one. /// /// This code concatenates and consumes tokens up to the '>' token. It returns @@ -1238,10 +1268,10 @@ bool Preprocessor::ConcatenateIncludeName( return true; } -/// HandleIncludeDirective - The "#include" tokens have just been read, read the -/// file to be included from the lexer, then include it! This is a common -/// routine with functionality shared between #include, #include_next and -/// #import. LookupFrom is set when this is a #include_next directive, it +/// HandleIncludeDirective - The "\#include" tokens have just been read, read +/// the file to be included from the lexer, then include it! This is a common +/// routine with functionality shared between \#include, \#include_next and +/// \#import. LookupFrom is set when this is a \#include_next directive, it /// specifies the file to start searching from. void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, Token &IncludeTok, @@ -1360,9 +1390,28 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, } if (File == 0) { - if (!SuppressIncludeNotFoundError) - Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; - return; + if (!SuppressIncludeNotFoundError) { + // If the file could not be located and it was included via angle + // brackets, we can attempt a lookup as though it were a quoted path to + // provide the user with a possible fixit. + if (isAngled) { + File = LookupFile(Filename, false, LookupFrom, CurDir, + Callbacks ? &SearchPath : 0, + Callbacks ? &RelativePath : 0, + getLangOpts().Modules ? &SuggestedModule : 0); + if (File) { + SourceRange Range(FilenameTok.getLocation(), CharEnd); + Diag(FilenameTok, diag::err_pp_file_not_found_not_fatal) << + Filename << + FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\""); + } + } + // If the file is still not found, just go with the vanilla diagnostic + if (!File) + Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; + } + if (!File) + return; } // If we are supposed to import a module rather than including the header, @@ -1465,7 +1514,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc, EnterSourceFile(FID, CurDir, FilenameTok.getLocation()); } -/// HandleIncludeNextDirective - Implements #include_next. +/// HandleIncludeNextDirective - Implements \#include_next. /// void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, Token &IncludeNextTok) { @@ -1488,7 +1537,7 @@ void Preprocessor::HandleIncludeNextDirective(SourceLocation HashLoc, return HandleIncludeDirective(HashLoc, IncludeNextTok, Lookup); } -/// HandleMicrosoftImportDirective - Implements #import for Microsoft Mode +/// HandleMicrosoftImportDirective - Implements \#import for Microsoft Mode void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { // The Microsoft #import directive takes a type library and generates header // files from it, and includes those. This is beyond the scope of what clang @@ -1502,7 +1551,7 @@ void Preprocessor::HandleMicrosoftImportDirective(Token &Tok) { DiscardUntilEndOfDirective(); } -/// HandleImportDirective - Implements #import. +/// HandleImportDirective - Implements \#import. /// void Preprocessor::HandleImportDirective(SourceLocation HashLoc, Token &ImportTok) { @@ -1634,7 +1683,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) { } } -/// HandleDefineDirective - Implements #define. This consumes the entire macro +/// HandleDefineDirective - Implements \#define. This consumes the entire macro /// line then lets the caller lex the next real token. void Preprocessor::HandleDefineDirective(Token &DefineTok) { ++NumDefined; @@ -1841,7 +1890,7 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok) { Callbacks->MacroDefined(MacroNameTok, MI); } -/// HandleUndefDirective - Implements #undef. +/// HandleUndefDirective - Implements \#undef. /// void Preprocessor::HandleUndefDirective(Token &UndefTok) { ++NumUndefined; @@ -1882,10 +1931,10 @@ void Preprocessor::HandleUndefDirective(Token &UndefTok) { // Preprocessor Conditional Directive Handling. //===----------------------------------------------------------------------===// -/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is -/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true -/// if any tokens have been returned or pp-directives activated before this -/// #ifndef has been lexed. +/// HandleIfdefDirective - Implements the \#ifdef/\#ifndef directive. isIfndef +/// is true when this is a \#ifndef directive. ReadAnyTokensBeforeDirective is +/// true if any tokens have been returned or pp-directives activated before this +/// \#ifndef has been lexed. /// void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, bool ReadAnyTokensBeforeDirective) { @@ -1947,7 +1996,7 @@ void Preprocessor::HandleIfdefDirective(Token &Result, bool isIfndef, } } -/// HandleIfDirective - Implements the #if directive. +/// HandleIfDirective - Implements the \#if directive. /// void Preprocessor::HandleIfDirective(Token &IfToken, bool ReadAnyTokensBeforeDirective) { @@ -1984,7 +2033,7 @@ void Preprocessor::HandleIfDirective(Token &IfToken, } } -/// HandleEndifDirective - Implements the #endif directive. +/// HandleEndifDirective - Implements the \#endif directive. /// void Preprocessor::HandleEndifDirective(Token &EndifToken) { ++NumEndif; @@ -2010,7 +2059,7 @@ void Preprocessor::HandleEndifDirective(Token &EndifToken) { Callbacks->Endif(EndifToken.getLocation(), CondInfo.IfLoc); } -/// HandleElseDirective - Implements the #else directive. +/// HandleElseDirective - Implements the \#else directive. /// void Preprocessor::HandleElseDirective(Token &Result) { ++NumElse; @@ -2039,7 +2088,7 @@ void Preprocessor::HandleElseDirective(Token &Result) { /*FoundElse*/true, Result.getLocation()); } -/// HandleElifDirective - Implements the #elif directive. +/// HandleElifDirective - Implements the \#elif directive. /// void Preprocessor::HandleElifDirective(Token &ElifToken) { ++NumElse; diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index b6689df..e824320 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -31,7 +31,7 @@ PPCallbacks::~PPCallbacks() {} //===----------------------------------------------------------------------===// /// isInPrimaryFile - Return true if we're in the top-level file, not in a -/// #include. This looks through macro expansions and active _Pragma lexers. +/// \#include. This looks through macro expansions and active _Pragma lexers. bool Preprocessor::isInPrimaryFile() const { if (IsFileLexer()) return IncludeMacroStack.empty(); diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp index fe70585..ebdb644 100644 --- a/lib/Lex/PPMacroExpansion.cpp +++ b/lib/Lex/PPMacroExpansion.cpp @@ -215,7 +215,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // If this is a function-like macro, read the arguments. if (MI->isFunctionLike()) { - // C99 6.10.3p10: If the preprocessing token immediately after the the macro + // C99 6.10.3p10: If the preprocessing token immediately after the macro // name isn't a '(', this macro should not be expanded. if (!isNextPPTokenLParen()) return true; @@ -242,9 +242,27 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier, // Remember where the token is expanded. SourceLocation ExpandLoc = Identifier.getLocation(); - - if (Callbacks) Callbacks->MacroExpands(Identifier, MI, - SourceRange(ExpandLoc, ExpansionEnd)); + SourceRange ExpansionRange(ExpandLoc, ExpansionEnd); + + if (Callbacks) { + if (InMacroArgs) { + // We can have macro expansion inside a conditional directive while + // reading the function macro arguments. To ensure, in that case, that + // MacroExpands callbacks still happen in source order, queue this + // callback to have it happen after the function macro callback. + DelayedMacroExpandsCallbacks.push_back( + MacroExpandsInfo(Identifier, MI, ExpansionRange)); + } else { + Callbacks->MacroExpands(Identifier, MI, ExpansionRange); + if (!DelayedMacroExpandsCallbacks.empty()) { + for (unsigned i=0, e = DelayedMacroExpandsCallbacks.size(); i!=e; ++i) { + MacroExpandsInfo &Info = DelayedMacroExpandsCallbacks[i]; + Callbacks->MacroExpands(Info.Tok, Info.MI, Info.Range); + } + DelayedMacroExpandsCallbacks.clear(); + } + } + } // If we started lexing a macro, enter the macro expansion body. @@ -469,10 +487,12 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName, } else if (MI->isVariadic() && (NumActuals+1 == MinArgsExpected || // A(x, ...) -> A(X) (NumActuals == 0 && MinArgsExpected == 2))) {// A(x,...) -> A() - // Varargs where the named vararg parameter is missing: ok as extension. - // #define A(x, ...) - // A("blah") + // Varargs where the named vararg parameter is missing: OK as extension. + // #define A(x, ...) + // A("blah") Diag(Tok, diag::ext_missing_varargs_arg); + Diag(MI->getDefinitionLoc(), diag::note_macro_here) + << MacroName.getIdentifierInfo(); // Remember this occurred, allowing us to elide the comma when used for // cases like: @@ -599,6 +619,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("address_sanitizer", LangOpts.AddressSanitizer) .Case("attribute_analyzer_noreturn", true) .Case("attribute_availability", true) + .Case("attribute_availability_with_message", true) .Case("attribute_cf_returns_not_retained", true) .Case("attribute_cf_returns_retained", true) .Case("attribute_deprecated_with_message", true) @@ -612,6 +633,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("attribute_objc_method_family", true) .Case("attribute_overloadable", true) .Case("attribute_unavailable_with_message", true) + .Case("attribute_unused_on_fields", true) .Case("blocks", LangOpts.Blocks) .Case("cxx_exceptions", LangOpts.Exceptions) .Case("cxx_rtti", LangOpts.RTTI) @@ -625,15 +647,16 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) { .Case("objc_fixed_enum", LangOpts.ObjC2) .Case("objc_instancetype", LangOpts.ObjC2) .Case("objc_modules", LangOpts.ObjC2 && LangOpts.Modules) - .Case("objc_nonfragile_abi", LangOpts.ObjCNonFragileABI) - .Case("objc_weak_class", LangOpts.ObjCNonFragileABI) + .Case("objc_nonfragile_abi", LangOpts.ObjCRuntime.isNonFragile()) + .Case("objc_weak_class", LangOpts.ObjCRuntime.hasWeakClassImport()) .Case("ownership_holds", true) .Case("ownership_returns", true) .Case("ownership_takes", true) .Case("objc_bool", true) - .Case("objc_subscripting", LangOpts.ObjCNonFragileABI) + .Case("objc_subscripting", LangOpts.ObjCRuntime.isNonFragile()) .Case("objc_array_literals", LangOpts.ObjC2) .Case("objc_dictionary_literals", LangOpts.ObjC2) + .Case("objc_boxed_expressions", LangOpts.ObjC2) .Case("arc_cf_code_audited", true) // C11 features .Case("c_alignas", LangOpts.C11) @@ -772,6 +795,7 @@ static bool HasAttribute(const IdentifierInfo *II) { if (Name.startswith("__") && Name.endswith("__") && Name.size() >= 4) Name = Name.substr(2, Name.size() - 4); + // FIXME: Do we need to handle namespaces here? return llvm::StringSwitch(Name) #include "clang/Lex/AttrSpellings.inc" .Default(false); @@ -1030,7 +1054,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { if (Tok.is(tok::l_paren)) { // Read the identifier Lex(Tok); - if (Tok.is(tok::identifier)) { + if (Tok.is(tok::identifier) || Tok.is(tok::kw_const)) { FeatureII = Tok.getIdentifierInfo(); // Read the ')'. diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp index e2a192b..c9cc4ad 100644 --- a/lib/Lex/Pragma.cpp +++ b/lib/Lex/Pragma.cpp @@ -100,9 +100,12 @@ void PragmaNamespace::HandlePragma(Preprocessor &PP, // Preprocessor Pragma Directive Handling. //===----------------------------------------------------------------------===// -/// HandlePragmaDirective - The "#pragma" directive has been parsed. Lex the +/// HandlePragmaDirective - The "\#pragma" directive has been parsed. Lex the /// rest of the pragma, passing it to the registered pragma handlers. void Preprocessor::HandlePragmaDirective(unsigned Introducer) { + if (!PragmasEnabled) + return; + ++NumPragma; // Invoke the first level of pragma handlers which reads the namespace id. @@ -314,7 +317,7 @@ void Preprocessor::HandleMicrosoft__pragma(Token &Tok) { return Lex(Tok); } -/// HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'. +/// HandlePragmaOnce - Handle \#pragma once. OnceTok is the 'once'. /// void Preprocessor::HandlePragmaOnce(Token &OnceTok) { if (isInPrimaryFile()) { @@ -336,7 +339,7 @@ void Preprocessor::HandlePragmaMark() { } -/// HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'. +/// HandlePragmaPoison - Handle \#pragma GCC poison. PoisonTok is the 'poison'. /// void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { Token Tok; @@ -378,7 +381,7 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) { } } -/// HandlePragmaSystemHeader - Implement #pragma GCC system_header. We know +/// HandlePragmaSystemHeader - Implement \#pragma GCC system_header. We know /// that the whole directive has been parsed. void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { if (isInPrimaryFile()) { @@ -411,7 +414,7 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { false, false, true, false); } -/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah. +/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. /// void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { Token FilenameTok; @@ -464,9 +467,12 @@ void Preprocessor::HandlePragmaDependency(Token &DependencyTok) { } } -/// HandlePragmaComment - Handle the microsoft #pragma comment extension. The -/// syntax is: -/// #pragma comment(linker, "foo") +/// \brief Handle the microsoft \#pragma comment extension. +/// +/// The syntax is: +/// \code +/// \#pragma comment(linker, "foo") +/// \endcode /// 'linker' is one of five identifiers: compiler, exestr, lib, linker, user. /// "foo" is a string, which is fully macro expanded, and permits string /// concatenation, embedded escape characters etc. See MSDN for more details. @@ -552,11 +558,15 @@ void Preprocessor::HandlePragmaComment(Token &Tok) { Callbacks->PragmaComment(CommentLoc, II, ArgumentString); } -/// HandlePragmaMessage - Handle the microsoft and gcc #pragma message +/// HandlePragmaMessage - Handle the microsoft and gcc \#pragma message /// extension. The syntax is: -/// #pragma message(string) +/// \code +/// \#pragma message(string) +/// \endcode /// OR, in GCC mode: -/// #pragma message string +/// \code +/// \#pragma message string +/// \endcode /// string is a string, which is fully macro expanded, and permits string /// concatenation, embedded escape characters, etc... See MSDN for more details. void Preprocessor::HandlePragmaMessage(Token &Tok) { @@ -679,9 +689,12 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) { return LookUpIdentifierInfo(MacroTok); } -/// HandlePragmaPushMacro - Handle #pragma push_macro. +/// \brief Handle \#pragma push_macro. +/// /// The syntax is: -/// #pragma push_macro("macro") +/// \code +/// \#pragma push_macro("macro") +/// \endcode void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) { // Parse the pragma directive and get the macro IdentifierInfo*. IdentifierInfo *IdentInfo = ParsePragmaPushOrPopMacro(PushMacroTok); @@ -703,9 +716,12 @@ void Preprocessor::HandlePragmaPushMacro(Token &PushMacroTok) { PragmaPushMacroInfo[IdentInfo].push_back(MacroCopyToPush); } -/// HandlePragmaPopMacro - Handle #pragma pop_macro. +/// \brief Handle \#pragma pop_macro. +/// /// The syntax is: +/// \code /// #pragma pop_macro("macro") +/// \endcode void Preprocessor::HandlePragmaPopMacro(Token &PopMacroTok) { SourceLocation MessageLoc = PopMacroTok.getLocation(); @@ -931,7 +947,7 @@ bool Preprocessor::LexOnOffSwitch(tok::OnOffSwitch &Result) { } namespace { -/// PragmaOnceHandler - "#pragma once" marks the file as atomically included. +/// PragmaOnceHandler - "\#pragma once" marks the file as atomically included. struct PragmaOnceHandler : public PragmaHandler { PragmaOnceHandler() : PragmaHandler("once") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -941,7 +957,7 @@ struct PragmaOnceHandler : public PragmaHandler { } }; -/// PragmaMarkHandler - "#pragma mark ..." is ignored by the compiler, and the +/// PragmaMarkHandler - "\#pragma mark ..." is ignored by the compiler, and the /// rest of the line is not lexed. struct PragmaMarkHandler : public PragmaHandler { PragmaMarkHandler() : PragmaHandler("mark") {} @@ -951,7 +967,7 @@ struct PragmaMarkHandler : public PragmaHandler { } }; -/// PragmaPoisonHandler - "#pragma poison x" marks x as not usable. +/// PragmaPoisonHandler - "\#pragma poison x" marks x as not usable. struct PragmaPoisonHandler : public PragmaHandler { PragmaPoisonHandler() : PragmaHandler("poison") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -960,7 +976,7 @@ struct PragmaPoisonHandler : public PragmaHandler { } }; -/// PragmaSystemHeaderHandler - "#pragma system_header" marks the current file +/// PragmaSystemHeaderHandler - "\#pragma system_header" marks the current file /// as a system header, which silences warnings in it. struct PragmaSystemHeaderHandler : public PragmaHandler { PragmaSystemHeaderHandler() : PragmaHandler("system_header") {} @@ -994,6 +1010,10 @@ struct PragmaDebugHandler : public PragmaHandler { llvm_unreachable("This is an assertion!"); } else if (II->isStr("crash")) { *(volatile int*) 0x11 = 0; + } else if (II->isStr("parser_crash")) { + Token Crasher; + Crasher.setKind(tok::annot_pragma_parser_crash); + PP.EnterToken(Crasher); } else if (II->isStr("llvm_fatal_error")) { llvm::report_fatal_error("#pragma clang __debug llvm_fatal_error"); } else if (II->isStr("llvm_unreachable")) { @@ -1023,7 +1043,7 @@ struct PragmaDebugHandler : public PragmaHandler { }; -/// PragmaDiagnosticHandler - e.g. '#pragma GCC diagnostic ignored "-Wformat"' +/// PragmaDiagnosticHandler - e.g. '\#pragma GCC diagnostic ignored "-Wformat"' struct PragmaDiagnosticHandler : public PragmaHandler { private: const char *Namespace; @@ -1117,7 +1137,7 @@ public: } }; -/// PragmaCommentHandler - "#pragma comment ...". +/// PragmaCommentHandler - "\#pragma comment ...". struct PragmaCommentHandler : public PragmaHandler { PragmaCommentHandler() : PragmaHandler("comment") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1126,7 +1146,7 @@ struct PragmaCommentHandler : public PragmaHandler { } }; -/// PragmaIncludeAliasHandler - "#pragma include_alias("...")". +/// PragmaIncludeAliasHandler - "\#pragma include_alias("...")". struct PragmaIncludeAliasHandler : public PragmaHandler { PragmaIncludeAliasHandler() : PragmaHandler("include_alias") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1135,7 +1155,7 @@ struct PragmaIncludeAliasHandler : public PragmaHandler { } }; -/// PragmaMessageHandler - "#pragma message("...")". +/// PragmaMessageHandler - "\#pragma message("...")". struct PragmaMessageHandler : public PragmaHandler { PragmaMessageHandler() : PragmaHandler("message") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1144,7 +1164,7 @@ struct PragmaMessageHandler : public PragmaHandler { } }; -/// PragmaPushMacroHandler - "#pragma push_macro" saves the value of the +/// PragmaPushMacroHandler - "\#pragma push_macro" saves the value of the /// macro on the top of the stack. struct PragmaPushMacroHandler : public PragmaHandler { PragmaPushMacroHandler() : PragmaHandler("push_macro") {} @@ -1155,7 +1175,7 @@ struct PragmaPushMacroHandler : public PragmaHandler { }; -/// PragmaPopMacroHandler - "#pragma pop_macro" sets the value of the +/// PragmaPopMacroHandler - "\#pragma pop_macro" sets the value of the /// macro to the value on the top of the stack. struct PragmaPopMacroHandler : public PragmaHandler { PragmaPopMacroHandler() : PragmaHandler("pop_macro") {} @@ -1167,7 +1187,7 @@ struct PragmaPopMacroHandler : public PragmaHandler { // Pragma STDC implementations. -/// PragmaSTDC_FENV_ACCESSHandler - "#pragma STDC FENV_ACCESS ...". +/// PragmaSTDC_FENV_ACCESSHandler - "\#pragma STDC FENV_ACCESS ...". struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler { PragmaSTDC_FENV_ACCESSHandler() : PragmaHandler("FENV_ACCESS") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1180,7 +1200,7 @@ struct PragmaSTDC_FENV_ACCESSHandler : public PragmaHandler { } }; -/// PragmaSTDC_CX_LIMITED_RANGEHandler - "#pragma STDC CX_LIMITED_RANGE ...". +/// PragmaSTDC_CX_LIMITED_RANGEHandler - "\#pragma STDC CX_LIMITED_RANGE ...". struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { PragmaSTDC_CX_LIMITED_RANGEHandler() : PragmaHandler("CX_LIMITED_RANGE") {} @@ -1191,7 +1211,7 @@ struct PragmaSTDC_CX_LIMITED_RANGEHandler : public PragmaHandler { } }; -/// PragmaSTDC_UnknownHandler - "#pragma STDC ...". +/// PragmaSTDC_UnknownHandler - "\#pragma STDC ...". struct PragmaSTDC_UnknownHandler : public PragmaHandler { PragmaSTDC_UnknownHandler() {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1202,7 +1222,7 @@ struct PragmaSTDC_UnknownHandler : public PragmaHandler { }; /// PragmaARCCFCodeAuditedHandler - -/// #pragma clang arc_cf_code_audited begin/end +/// \#pragma clang arc_cf_code_audited begin/end struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { PragmaARCCFCodeAuditedHandler() : PragmaHandler("arc_cf_code_audited") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, @@ -1259,7 +1279,7 @@ struct PragmaARCCFCodeAuditedHandler : public PragmaHandler { /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: -/// #pragma GCC poison/system_header/dependency and #pragma once. +/// \#pragma GCC poison/system_header/dependency and \#pragma once. void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler(new PragmaOnceHandler()); AddPragmaHandler(new PragmaMarkHandler()); diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp index 89d19fd..dfdeba3 100644 --- a/lib/Lex/PreprocessingRecord.cpp +++ b/lib/Lex/PreprocessingRecord.cpp @@ -48,7 +48,7 @@ PreprocessingRecord::PreprocessingRecord(SourceManager &SM, } /// \brief Returns a pair of [Begin, End) iterators of preprocessed entities -/// that source range \arg R encompasses. +/// that source range \p Range encompasses. std::pair PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) { if (Range.isInvalid()) @@ -89,7 +89,7 @@ static bool isPreprocessedEntityIfInFileID(PreprocessedEntity *PPE, FileID FID, /// /// Can be used to avoid implicit deserializations of preallocated /// preprocessed entities if we only care about entities of a specific file -/// and not from files #included in the range given at +/// and not from files \#included in the range given at /// \see getPreprocessedEntitiesInRange. bool PreprocessingRecord::isEntityInFileID(iterator PPEI, FileID FID) { if (FID.isInvalid()) diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 06e5685..614530c 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -66,54 +66,6 @@ Preprocessor::Preprocessor(DiagnosticsEngine &diags, LangOptions &opts, Record(0), MIChainHead(0), MICache(0) { OwnsHeaderSearch = OwnsHeaders; - - if (!DelayInitialization) { - assert(Target && "Must provide target information for PP initialization"); - Initialize(*Target); - } -} - -Preprocessor::~Preprocessor() { - assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); - - while (!IncludeMacroStack.empty()) { - delete IncludeMacroStack.back().TheLexer; - delete IncludeMacroStack.back().TheTokenLexer; - IncludeMacroStack.pop_back(); - } - - // Free any macro definitions. - for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) - I->MI.Destroy(); - - // Free any cached macro expanders. - for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) - delete TokenLexerCache[i]; - - // Free any cached MacroArgs. - for (MacroArgs *ArgList = MacroArgCache; ArgList; ) - ArgList = ArgList->deallocate(); - - // Release pragma information. - delete PragmaHandlers; - - // Delete the scratch buffer info. - delete ScratchBuf; - - // Delete the header search info, if we own it. - if (OwnsHeaderSearch) - delete &HeaderInfo; - - delete Callbacks; -} - -void Preprocessor::Initialize(const TargetInfo &Target) { - assert((!this->Target || this->Target == &Target) && - "Invalid override of target information"); - this->Target = &Target; - - // Initialize information about built-ins. - BuiltinInfo.InitializeTarget(Target); ScratchBuf = new ScratchBuffer(SourceMgr); CounterValue = 0; // __COUNTER__ starts at 0. @@ -134,10 +86,12 @@ void Preprocessor::Initialize(const TargetInfo &Target) { // Macro expansion is enabled. DisableMacroExpansion = false; + MacroExpansionInDirectivesOverride = false; InMacroArgs = false; InMacroArgPreExpansion = false; NumCachedTokenLexers = 0; - + PragmasEnabled = true; + CachedLexPos = 0; // We haven't read anything from the external source. @@ -170,7 +124,54 @@ void Preprocessor::Initialize(const TargetInfo &Target) { Ident___exception_info = Ident___exception_code = Ident___abnormal_termination = 0; Ident_GetExceptionInfo = Ident_GetExceptionCode = Ident_AbnormalTermination = 0; } + + if (!DelayInitialization) { + assert(Target && "Must provide target information for PP initialization"); + Initialize(*Target); + } +} + +Preprocessor::~Preprocessor() { + assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); + + while (!IncludeMacroStack.empty()) { + delete IncludeMacroStack.back().TheLexer; + delete IncludeMacroStack.back().TheTokenLexer; + IncludeMacroStack.pop_back(); + } + + // Free any macro definitions. + for (MacroInfoChain *I = MIChainHead ; I ; I = I->Next) + I->MI.Destroy(); + + // Free any cached macro expanders. + for (unsigned i = 0, e = NumCachedTokenLexers; i != e; ++i) + delete TokenLexerCache[i]; + + // Free any cached MacroArgs. + for (MacroArgs *ArgList = MacroArgCache; ArgList; ) + ArgList = ArgList->deallocate(); + + // Release pragma information. + delete PragmaHandlers; + + // Delete the scratch buffer info. + delete ScratchBuf; + + // Delete the header search info, if we own it. + if (OwnsHeaderSearch) + delete &HeaderInfo; + + delete Callbacks; +} + +void Preprocessor::Initialize(const TargetInfo &Target) { + assert((!this->Target || this->Target == &Target) && + "Invalid override of target information"); + this->Target = &Target; + // Initialize information about built-ins. + BuiltinInfo.InitializeTarget(Target); HeaderInfo.setTarget(Target); } @@ -236,6 +237,20 @@ void Preprocessor::PrintStats() { llvm::errs() << (NumFastTokenPaste+NumTokenPaste) << " token paste (##) operations performed, " << NumFastTokenPaste << " on the fast path.\n"; + + llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; + + llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); + llvm::errs() << "\n Macro Expanded Tokens: " + << llvm::capacity_in_bytes(MacroExpandedTokens); + llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); + llvm::errs() << "\n Macros: " << llvm::capacity_in_bytes(Macros); + llvm::errs() << "\n #pragma push_macro Info: " + << llvm::capacity_in_bytes(PragmaPushMacroInfo); + llvm::errs() << "\n Poison Reasons: " + << llvm::capacity_in_bytes(PoisonReasons); + llvm::errs() << "\n Comment Handlers: " + << llvm::capacity_in_bytes(CommentHandlers) << "\n"; } Preprocessor::macro_iterator @@ -514,9 +529,19 @@ void Preprocessor::HandleIdentifier(Token &Identifier) { // If the information about this identifier is out of date, update it from // the external source. + // We have to treat __VA_ARGS__ in a special way, since it gets + // serialized with isPoisoned = true, but our preprocessor may have + // unpoisoned it if we're defining a C99 macro. if (II.isOutOfDate()) { + bool CurrentIsPoisoned = false; + if (&II == Ident__VA_ARGS__) + CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); + ExternalSource->updateOutOfDateIdentifier(II); Identifier.setKind(II.getTokenID()); + + if (&II == Ident__VA_ARGS__) + II.setIsPoisoned(CurrentIsPoisoned); } // If this identifier was poisoned, and if it was not produced from a macro @@ -622,14 +647,14 @@ void Preprocessor::LexAfterModuleImport(Token &Result) { /*IsIncludeDirective=*/false); } -void Preprocessor::AddCommentHandler(CommentHandler *Handler) { +void Preprocessor::addCommentHandler(CommentHandler *Handler) { assert(Handler && "NULL comment handler"); assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == CommentHandlers.end() && "Comment handler already registered"); CommentHandlers.push_back(Handler); } -void Preprocessor::RemoveCommentHandler(CommentHandler *Handler) { +void Preprocessor::removeCommentHandler(CommentHandler *Handler) { std::vector::iterator Pos = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); assert(Pos != CommentHandlers.end() && "Comment handler not registered"); diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index a72bbca..a64c84d 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -27,7 +27,7 @@ PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); } -/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and +/// \brief After the preprocessor has parsed a \#include, lex and /// (potentially) macro expand the filename. void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) { assert(ParsingPreprocessorDirective && diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp index 84a46ed..dd7ebb0 100644 --- a/lib/Lex/TokenConcatenation.cpp +++ b/lib/Lex/TokenConcatenation.cpp @@ -14,6 +14,7 @@ #include "clang/Lex/TokenConcatenation.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/ErrorHandling.h" +#include using namespace clang; diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index 696754c..ade40da 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -252,9 +252,9 @@ void TokenLexer::ExpandFunctionArguments() { const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); unsigned NumToks = MacroArgs::getArgLength(ArgToks); if (NumToks) { // Not an empty argument? - // If this is the GNU ", ## __VA_ARG__" extension, and we just learned - // that __VA_ARG__ expands to multiple tokens, avoid a pasting error when - // the expander trys to paste ',' with the first token of the __VA_ARG__ + // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned + // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when + // the expander trys to paste ',' with the first token of the __VA_ARGS__ // expansion. if (PasteBefore && ResultToks.size() >= 2 && ResultToks[ResultToks.size()-2].is(tok::comma) && @@ -568,8 +568,8 @@ bool TokenLexer::PasteTokens(Token &Tok) { << Buffer.str(); } - // Do not consume the RHS. - --CurToken; + // An error has occurred so exit loop. + break; } // Turn ## into 'unknown' to avoid # ## # from looking like a paste @@ -578,7 +578,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { Result.setKind(tok::unknown); } - // Transfer properties of the LHS over the the Result. + // Transfer properties of the LHS over the Result. Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); diff --git a/lib/Makefile b/lib/Makefile index 2eb72a9..1f14aa0 100755 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## CLANG_LEVEL := .. -PARALLEL_DIRS = Headers Basic Lex Parse AST Sema CodeGen Analysis \ +PARALLEL_DIRS = Headers Basic Lex Parse AST ASTMatchers Sema CodeGen Analysis \ StaticAnalyzer Edit Rewrite ARCMigrate Serialization Frontend \ FrontendTool Tooling Driver diff --git a/lib/Parse/CMakeLists.txt b/lib/Parse/CMakeLists.txt index 6c980ce..55e2aeb 100644 --- a/lib/Parse/CMakeLists.txt +++ b/lib/Parse/CMakeLists.txt @@ -1,5 +1,3 @@ -set(LLVM_USED_LIBS clangBasic clangAST clangLex clangSema) - add_clang_library(clangParse ParseAST.cpp ParseCXXInlineMethods.cpp @@ -16,4 +14,21 @@ add_clang_library(clangParse Parser.cpp ) -add_dependencies(clangParse ClangAttrClasses ClangAttrList ClangDeclNodes ClangDiagnosticParse ClangStmtNodes ClangAttrLateParsed) +add_dependencies(clangParse + ClangAttrClasses + ClangAttrLateParsed + ClangAttrList + ClangAttrParsedAttrList + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticCommon + ClangDiagnosticParse + ClangStmtNodes + ) + +target_link_libraries(clangParse + clangBasic + clangAST + clangLex + clangSema + ) diff --git a/lib/Parse/ParseAST.cpp b/lib/Parse/ParseAST.cpp index d1c2624..bd4f859 100644 --- a/lib/Parse/ParseAST.cpp +++ b/lib/Parse/ParseAST.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "clang/Parse/ParseAST.h" +#include "clang/Parse/ParseDiagnostic.h" #include "clang/Sema/Sema.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "clang/Sema/SemaConsumer.h" #include "clang/Sema/ExternalSemaSource.h" #include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/Stmt.h" @@ -77,28 +79,29 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) { S.getPreprocessor().EnterMainSourceFile(); P.Initialize(); S.Initialize(); - - if (ExternalASTSource *External = S.getASTContext().getExternalSource()) - External->StartTranslationUnit(Consumer); - - bool Abort = false; + + // C11 6.9p1 says translation units must have at least one top-level + // declaration. C++ doesn't have this restriction. We also don't want to + // complain if we have a precompiled header, although technically if the PCH + // is empty we should still emit the (pedantic) diagnostic. Parser::DeclGroupPtrTy ADecl; - - while (!P.ParseTopLevelDecl(ADecl)) { // Not end of file. - // If we got a null return and something *was* parsed, ignore it. This - // is due to a top-level semicolon, an action override, or a parse error - // skipping something. - if (ADecl) { - if (!Consumer->HandleTopLevelDecl(ADecl.get())) { - Abort = true; - break; - } - } - }; - - if (Abort) - return; - + ExternalASTSource *External = S.getASTContext().getExternalSource(); + if (External) + External->StartTranslationUnit(Consumer); + + if (P.ParseTopLevelDecl(ADecl)) { + if (!External && !S.getLangOpts().CPlusPlus) + P.Diag(diag::ext_empty_translation_unit); + } else { + do { + // If we got a null return and something *was* parsed, ignore it. This + // is due to a top-level semicolon, an action override, or a parse error + // skipping something. + if (ADecl && !Consumer->HandleTopLevelDecl(ADecl.get())) + return; + } while (!P.ParseTopLevelDecl(ADecl)); + } + // Process any TopLevelDecls generated by #pragma weak. for (SmallVector::iterator I = S.WeakTopLevelDecls().begin(), diff --git a/lib/Parse/ParseCXXInlineMethods.cpp b/lib/Parse/ParseCXXInlineMethods.cpp index c7b29d9..abce27c 100644 --- a/lib/Parse/ParseCXXInlineMethods.cpp +++ b/lib/Parse/ParseCXXInlineMethods.cpp @@ -16,6 +16,7 @@ #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Scope.h" #include "clang/AST/DeclTemplate.h" +#include "RAIIObjectsForParser.h" using namespace clang; /// ParseCXXInlineMethodDef - We parsed and verified that the specified @@ -45,7 +46,7 @@ Decl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS, else { FnD = Actions.ActOnCXXMemberDeclarator(getCurScope(), AS, D, move(TemplateParams), 0, - VS, /*HasDeferredInit=*/false); + VS, ICIS_NoInit); if (FnD) { Actions.ProcessDeclAttributeList(getCurScope(), FnD, AccessAttrs, false, true); @@ -108,6 +109,7 @@ Decl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS, // or if we are about to parse function member template then consume // the tokens and store them for parsing at the end of the translation unit. if (getLangOpts().DelayedTemplateParsing && + DefinitionKind == FDK_Definition && ((Actions.CurContext->isDependentContext() || TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate) && !Actions.IsInsideALocalClassWithinATemplateFunction())) { @@ -458,7 +460,7 @@ void Parser::ParseLexedMemberInitializers(ParsingClass &Class) { Actions.ActOnStartDelayedMemberDeclarations(getCurScope(), Class.TagOrTemplate); - { + if (!Class.LateParsedDeclarations.empty()) { // C++11 [expr.prim.general]p4: // Otherwise, if a member-declarator declares a non-static data member // (9.2) of a class X, the expression this is a prvalue of type "pointer @@ -492,7 +494,7 @@ void Parser::ParseLexedMemberInitializer(LateParsedMemberInitializer &MI) { ConsumeAnyToken(); SourceLocation EqualLoc; - + ExprResult Init = ParseCXXMemberInitializer(MI.Field, /*IsFunction=*/false, EqualLoc); diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp index 7995e68..b830d9c 100644 --- a/lib/Parse/ParseDecl.cpp +++ b/lib/Parse/ParseDecl.cpp @@ -14,6 +14,7 @@ #include "clang/Parse/Parser.h" #include "clang/Parse/ParseDiagnostic.h" #include "clang/Basic/OpenCL.h" +#include "clang/Sema/Lookup.h" #include "clang/Sema/Scope.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/PrettyDeclStackTrace.h" @@ -37,6 +38,8 @@ TypeResult Parser::ParseTypeName(SourceRange *Range, AccessSpecifier AS, Decl **OwnedType) { DeclSpecContext DSC = getDeclSpecContextFromDeclaratorContext(Context); + if (DSC == DSC_normal) + DSC = DSC_type_specifier; // Parse the common declaration-specifiers piece. DeclSpec DS(AttrFactory); @@ -156,7 +159,7 @@ void Parser::ParseGNUAttributes(ParsedAttributes &attrs, } } else { attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, - 0, SourceLocation(), 0, 0); + 0, SourceLocation(), 0, 0, AttributeList::AS_GNU); } } if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) @@ -272,67 +275,175 @@ void Parser::ParseGNUAttributeArgs(IdentifierInfo *AttrName, if (!ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) { AttributeList *attr = Attrs.addNew(AttrName, SourceRange(AttrNameLoc, RParen), 0, AttrNameLoc, - ParmName, ParmLoc, ArgExprs.take(), ArgExprs.size()); - if (BuiltinType && attr->getKind() == AttributeList::AT_iboutletcollection) + ParmName, ParmLoc, ArgExprs.take(), ArgExprs.size(), + AttributeList::AS_GNU); + if (BuiltinType && attr->getKind() == AttributeList::AT_IBOutletCollection) Diag(Tok, diag::err_iboutletcollection_builtintype); } } +/// \brief Parses a single argument for a declspec, including the +/// surrounding parens. +void Parser::ParseMicrosoftDeclSpecWithSingleArg(IdentifierInfo *AttrName, + SourceLocation AttrNameLoc, + ParsedAttributes &Attrs) +{ + BalancedDelimiterTracker T(*this, tok::l_paren); + if (T.expectAndConsume(diag::err_expected_lparen_after, + AttrName->getNameStart(), tok::r_paren)) + return; + + ExprResult ArgExpr(ParseConstantExpression()); + if (ArgExpr.isInvalid()) { + T.skipToEnd(); + return; + } + Expr *ExprList = ArgExpr.take(); + Attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, SourceLocation(), + &ExprList, 1, AttributeList::AS_Declspec); + + T.consumeClose(); +} + +/// \brief Determines whether a declspec is a "simple" one requiring no +/// arguments. +bool Parser::IsSimpleMicrosoftDeclSpec(IdentifierInfo *Ident) { + return llvm::StringSwitch(Ident->getName()) + .Case("dllimport", true) + .Case("dllexport", true) + .Case("noreturn", true) + .Case("nothrow", true) + .Case("noinline", true) + .Case("naked", true) + .Case("appdomain", true) + .Case("process", true) + .Case("jitintrinsic", true) + .Case("noalias", true) + .Case("restrict", true) + .Case("novtable", true) + .Case("selectany", true) + .Case("thread", true) + .Default(false); +} + +/// \brief Attempts to parse a declspec which is not simple (one that takes +/// parameters). Will return false if we properly handled the declspec, or +/// true if it is an unknown declspec. +void Parser::ParseComplexMicrosoftDeclSpec(IdentifierInfo *Ident, + SourceLocation Loc, + ParsedAttributes &Attrs) { + // Try to handle the easy case first -- these declspecs all take a single + // parameter as their argument. + if (llvm::StringSwitch(Ident->getName()) + .Case("uuid", true) + .Case("align", true) + .Case("allocate", true) + .Default(false)) { + ParseMicrosoftDeclSpecWithSingleArg(Ident, Loc, Attrs); + } else if (Ident->getName() == "deprecated") { + // The deprecated declspec has an optional single argument, so we will + // check for a l-paren to decide whether we should parse an argument or + // not. + if (Tok.getKind() == tok::l_paren) + ParseMicrosoftDeclSpecWithSingleArg(Ident, Loc, Attrs); + else + Attrs.addNew(Ident, Loc, 0, Loc, 0, SourceLocation(), 0, 0, + AttributeList::AS_Declspec); + } else if (Ident->getName() == "property") { + // The property declspec is more complex in that it can take one or two + // assignment expressions as a parameter, but the lhs of the assignment + // must be named get or put. + // + // For right now, we will just skip to the closing right paren of the + // property expression. + // + // FIXME: we should deal with __declspec(property) at some point because it + // is used in the platform SDK headers for the Parallel Patterns Library + // and ATL. + BalancedDelimiterTracker T(*this, tok::l_paren); + if (T.expectAndConsume(diag::err_expected_lparen_after, + Ident->getNameStart(), tok::r_paren)) + return; + T.skipToEnd(); + } else { + // We don't recognize this as a valid declspec, but instead of creating the + // attribute and allowing sema to warn about it, we will warn here instead. + // This is because some attributes have multiple spellings, but we need to + // disallow that for declspecs (such as align vs aligned). If we made the + // attribute, we'd have to split the valid declspec spelling logic into + // both locations. + Diag(Loc, diag::warn_ms_declspec_unknown) << Ident; + + // If there's an open paren, we should eat the open and close parens under + // the assumption that this unknown declspec has parameters. + BalancedDelimiterTracker T(*this, tok::l_paren); + if (!T.consumeOpen()) + T.skipToEnd(); + } +} -/// ParseMicrosoftDeclSpec - Parse an __declspec construct -/// /// [MS] decl-specifier: /// __declspec ( extended-decl-modifier-seq ) /// /// [MS] extended-decl-modifier-seq: /// extended-decl-modifier[opt] /// extended-decl-modifier extended-decl-modifier-seq - -void Parser::ParseMicrosoftDeclSpec(ParsedAttributes &attrs) { +void Parser::ParseMicrosoftDeclSpec(ParsedAttributes &Attrs) { assert(Tok.is(tok::kw___declspec) && "Not a declspec!"); ConsumeToken(); - if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, - "declspec")) { - SkipUntil(tok::r_paren, true); // skip until ) or ; + BalancedDelimiterTracker T(*this, tok::l_paren); + if (T.expectAndConsume(diag::err_expected_lparen_after, "__declspec", + tok::r_paren)) return; - } - while (Tok.getIdentifierInfo()) { - IdentifierInfo *AttrName = Tok.getIdentifierInfo(); - SourceLocation AttrNameLoc = ConsumeToken(); - - // FIXME: Remove this when we have proper __declspec(property()) support. - // Just skip everything inside property(). - if (AttrName->getName() == "property") { - ConsumeParen(); - SkipUntil(tok::r_paren); + // An empty declspec is perfectly legal and should not warn. Additionally, + // you can specify multiple attributes per declspec. + while (Tok.getKind() != tok::r_paren) { + // We expect either a well-known identifier or a generic string. Anything + // else is a malformed declspec. + bool IsString = Tok.getKind() == tok::string_literal ? true : false; + if (!IsString && Tok.getKind() != tok::identifier && + Tok.getKind() != tok::kw_restrict) { + Diag(Tok, diag::err_ms_declspec_type); + T.skipToEnd(); + return; } - if (Tok.is(tok::l_paren)) { - ConsumeParen(); - // FIXME: This doesn't parse __declspec(property(get=get_func_name)) - // correctly. - ExprResult ArgExpr(ParseAssignmentExpression()); - if (!ArgExpr.isInvalid()) { - Expr *ExprList = ArgExpr.take(); - attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, - SourceLocation(), &ExprList, 1, true); + + IdentifierInfo *AttrName; + SourceLocation AttrNameLoc; + if (IsString) { + SmallString<8> StrBuffer; + bool Invalid = false; + StringRef Str = PP.getSpelling(Tok, StrBuffer, &Invalid); + if (Invalid) { + T.skipToEnd(); + return; } - if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) - SkipUntil(tok::r_paren, false); + AttrName = PP.getIdentifierInfo(Str); + AttrNameLoc = ConsumeStringToken(); } else { - attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, - 0, SourceLocation(), 0, 0, true); + AttrName = Tok.getIdentifierInfo(); + AttrNameLoc = ConsumeToken(); } + + if (IsString || IsSimpleMicrosoftDeclSpec(AttrName)) + // If we have a generic string, we will allow it because there is no + // documented list of allowable string declspecs, but we know they exist + // (for instance, SAL declspecs in older versions of MSVC). + // + // Alternatively, if the identifier is a simple one, then it requires no + // arguments and can be turned into an attribute directly. + Attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, SourceLocation(), + 0, 0, AttributeList::AS_Declspec); + else + ParseComplexMicrosoftDeclSpec(AttrName, AttrNameLoc, Attrs); } - if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) - SkipUntil(tok::r_paren, false); - return; + T.consumeClose(); } void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) { // Treat these like attributes - // FIXME: Allow Sema to distinguish between these and real attributes! while (Tok.is(tok::kw___fastcall) || Tok.is(tok::kw___stdcall) || Tok.is(tok::kw___thiscall) || Tok.is(tok::kw___cdecl) || Tok.is(tok::kw___ptr64) || Tok.is(tok::kw___w64) || @@ -340,12 +451,8 @@ void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) { Tok.is(tok::kw___unaligned)) { IdentifierInfo *AttrName = Tok.getIdentifierInfo(); SourceLocation AttrNameLoc = ConsumeToken(); - if (Tok.is(tok::kw___ptr64) || Tok.is(tok::kw___w64) || - Tok.is(tok::kw___ptr32)) - // FIXME: Support these properly! - continue; attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, - SourceLocation(), 0, 0, true); + SourceLocation(), 0, 0, AttributeList::AS_MSTypespec); } } @@ -355,7 +462,7 @@ void Parser::ParseBorlandTypeAttributes(ParsedAttributes &attrs) { IdentifierInfo *AttrName = Tok.getIdentifierInfo(); SourceLocation AttrNameLoc = ConsumeToken(); attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, - SourceLocation(), 0, 0, true); + SourceLocation(), 0, 0, AttributeList::AS_MSTypespec); } } @@ -365,7 +472,7 @@ void Parser::ParseOpenCLAttributes(ParsedAttributes &attrs) { SourceLocation AttrNameLoc = ConsumeToken(); attrs.addNew(PP.getIdentifierInfo("opencl_kernel_function"), AttrNameLoc, 0, AttrNameLoc, 0, - SourceLocation(), 0, 0, false); + SourceLocation(), 0, 0, AttributeList::AS_GNU); } } @@ -374,42 +481,42 @@ void Parser::ParseOpenCLQualifiers(DeclSpec &DS) { switch(Tok.getKind()) { // OpenCL qualifiers: case tok::kw___private: - case tok::kw_private: + case tok::kw_private: DS.getAttributes().addNewInteger( - Actions.getASTContext(), + Actions.getASTContext(), PP.getIdentifierInfo("address_space"), Loc, 0); break; - + case tok::kw___global: DS.getAttributes().addNewInteger( Actions.getASTContext(), PP.getIdentifierInfo("address_space"), Loc, LangAS::opencl_global); break; - + case tok::kw___local: DS.getAttributes().addNewInteger( Actions.getASTContext(), PP.getIdentifierInfo("address_space"), Loc, LangAS::opencl_local); break; - + case tok::kw___constant: DS.getAttributes().addNewInteger( Actions.getASTContext(), PP.getIdentifierInfo("address_space"), Loc, LangAS::opencl_constant); break; - + case tok::kw___read_only: DS.getAttributes().addNewInteger( - Actions.getASTContext(), + Actions.getASTContext(), PP.getIdentifierInfo("opencl_image_access"), Loc, CLIA_read_only); break; - + case tok::kw___write_only: DS.getAttributes().addNewInteger( - Actions.getASTContext(), + Actions.getASTContext(), PP.getIdentifierInfo("opencl_image_access"), Loc, CLIA_write_only); break; - + case tok::kw___read_write: DS.getAttributes().addNewInteger( Actions.getASTContext(), @@ -490,21 +597,21 @@ VersionTuple Parser::ParseVersionTuple(SourceRange &Range) { if (AfterMinor == ActualLength) { ConsumeToken(); - + // We had major.minor. if (Major == 0 && Minor == 0) { Diag(Tok, diag::err_zero_version); return VersionTuple(); } - return VersionTuple(Major, Minor); + return VersionTuple(Major, Minor); } // If what follows is not a '.', we have a problem. if (ThisTokBegin[AfterMinor] != '.') { Diag(Tok, diag::err_expected_version); SkipUntil(tok::comma, tok::r_paren, true, true, true); - return VersionTuple(); + return VersionTuple(); } // Parse the subminor version. @@ -599,7 +706,7 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, if (UnavailableLoc.isValid()) { Diag(KeywordLoc, diag::err_availability_redundant) << Keyword << SourceRange(UnavailableLoc); - } + } UnavailableLoc = KeywordLoc; if (Tok.isNot(tok::comma)) @@ -607,8 +714,8 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, ConsumeToken(); continue; - } - + } + if (Tok.isNot(tok::equal)) { Diag(Tok, diag::err_expected_equal_after) << Keyword; @@ -625,10 +732,10 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, MessageExpr = ParseStringLiteralExpression(); break; } - + SourceRange VersionRange; VersionTuple Version = ParseVersionTuple(VersionRange); - + if (Version.empty()) { SkipUntil(tok::r_paren); return; @@ -641,13 +748,13 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, Index = Deprecated; else if (Keyword == Ident_obsoleted) Index = Obsoleted; - else + else Index = Unknown; if (Index < Unknown) { if (!Changes[Index].KeywordLoc.isInvalid()) { Diag(KeywordLoc, diag::err_availability_redundant) - << Keyword + << Keyword << SourceRange(Changes[Index].KeywordLoc, Changes[Index].VersionRange.getEnd()); } @@ -693,15 +800,15 @@ void Parser::ParseAvailabilityAttribute(IdentifierInfo &Availability, } // Record this attribute - attrs.addNew(&Availability, - SourceRange(AvailabilityLoc, T.getCloseLocation()), + attrs.addNew(&Availability, + SourceRange(AvailabilityLoc, T.getCloseLocation()), 0, AvailabilityLoc, Platform, PlatformLoc, Changes[Introduced], Changes[Deprecated], - Changes[Obsoleted], + Changes[Obsoleted], UnavailableLoc, MessageExpr.take(), - false, false); + AttributeList::AS_GNU); } @@ -739,16 +846,16 @@ void Parser::ParseLexedAttributes(ParsingClass &Class) { if (!AlreadyHasClassScope) Actions.ActOnStartDelayedMemberDeclarations(getCurScope(), Class.TagOrTemplate); - { + if (!Class.LateParsedDeclarations.empty()) { // Allow 'this' within late-parsed attributes. - Sema::CXXThisScopeRAII ThisScope(Actions, Class.TagOrTemplate, + Sema::CXXThisScopeRAII ThisScope(Actions, Class.TagOrTemplate, /*TypeQuals=*/0); - + for (unsigned i = 0, ni = Class.LateParsedDeclarations.size(); i < ni; ++i){ Class.LateParsedDeclarations[i]->ParseLexedAttributes(); } } - + if (!AlreadyHasClassScope) Actions.ActOnFinishDelayedMemberDeclarations(getCurScope(), Class.TagOrTemplate); @@ -770,7 +877,7 @@ void Parser::ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D, /// \brief Finish parsing an attribute for which parsing was delayed. /// This will be called at the end of parsing a class declaration /// for each LateParsedAttribute. We consume the saved tokens and -/// create an attribute with the arguments filled in. We add this +/// create an attribute with the arguments filled in. We add this /// to the Attribute list for the decl. void Parser::ParseLexedAttribute(LateParsedAttribute &LA, bool EnterScope, bool OnDefinition) { @@ -885,10 +992,10 @@ void Parser::ParseThreadSafetyAttribute(IdentifierInfo &AttrName, BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); - + ExprVector ArgExprs(Actions); bool ArgExprsOk = true; - + // now parse the list of expressions while (Tok.isNot(tok::r_paren)) { ExprResult ArgExpr(ParseAssignmentExpression()); @@ -906,7 +1013,7 @@ void Parser::ParseThreadSafetyAttribute(IdentifierInfo &AttrName, // Match the ')'. if (ArgExprsOk && !T.consumeClose()) { Attrs.addNew(&AttrName, AttrNameLoc, 0, AttrNameLoc, 0, SourceLocation(), - ArgExprs.take(), ArgExprs.size()); + ArgExprs.take(), ArgExprs.size(), AttributeList::AS_GNU); } if (EndLoc) *EndLoc = T.getCloseLocation(); @@ -975,7 +1082,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(StmtVector &Stmts, // Must temporarily exit the objective-c container scope for // parsing c none objective-c decls. ObjCDeclContextSwitch ObjCDC(*this); - + Decl *SingleDecl = 0; Decl *OwnedType = 0; switch (Tok.getKind()) { @@ -992,7 +1099,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(StmtVector &Stmts, SingleDecl = ParseNamespace(Context, DeclEnd, InlineLoc); break; } - return ParseSimpleDeclaration(Stmts, Context, DeclEnd, attrs, + return ParseSimpleDeclaration(Stmts, Context, DeclEnd, attrs, true); case tok::kw_namespace: ProhibitAttributes(attrs); @@ -1010,7 +1117,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(StmtVector &Stmts, default: return ParseSimpleDeclaration(Stmts, Context, DeclEnd, attrs, true); } - + // This routine returns a DeclGroup, if the thing we parsed only contains a // single decl, convert it now. Alias declarations can also declare a type; // include that too if it is present. @@ -1019,10 +1126,12 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(StmtVector &Stmts, /// simple-declaration: [C99 6.7: declaration] [C++ 7p1: dcl.dcl] /// declaration-specifiers init-declarator-list[opt] ';' +/// [C++11] attribute-specifier-seq decl-specifier-seq[opt] +/// init-declarator-list ';' ///[C90/C++]init-declarator-list ';' [TODO] /// [OMP] threadprivate-directive [TODO] /// -/// for-range-declaration: [C++0x 6.5p1: stmt.ranged] +/// for-range-declaration: [C++11 6.5p1: stmt.ranged] /// attribute-specifier-seq[opt] type-specifier-seq declarator /// /// If RequireSemi is false, this does not check for a ';' at the end of the @@ -1031,12 +1140,11 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(StmtVector &Stmts, /// If FRI is non-null, we might be parsing a for-range-declaration instead /// of a simple-declaration. If we find that we are, we also parse the /// for-range-initializer, and place it here. -Parser::DeclGroupPtrTy Parser::ParseSimpleDeclaration(StmtVector &Stmts, - unsigned Context, - SourceLocation &DeclEnd, - ParsedAttributes &attrs, - bool RequireSemi, - ForRangeInit *FRI) { +Parser::DeclGroupPtrTy +Parser::ParseSimpleDeclaration(StmtVector &Stmts, unsigned Context, + SourceLocation &DeclEnd, + ParsedAttributesWithRange &attrs, + bool RequireSemi, ForRangeInit *FRI) { // Parse the common declaration-specifiers piece. ParsingDeclSpec DS(*this); DS.takeAttributesFrom(attrs); @@ -1047,14 +1155,15 @@ Parser::DeclGroupPtrTy Parser::ParseSimpleDeclaration(StmtVector &Stmts, // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" // declaration-specifiers init-declarator-list[opt] ';' if (Tok.is(tok::semi)) { + DeclEnd = Tok.getLocation(); if (RequireSemi) ConsumeToken(); Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, DS); DS.complete(TheDecl); return Actions.ConvertDeclToDeclGroup(TheDecl); } - - return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI); + + return ParseDeclGroup(DS, Context, /*FunctionDefs=*/ false, &DeclEnd, FRI); } /// Returns true if this might be the start of a declarator, or a common typo @@ -1161,15 +1270,33 @@ void Parser::SkipMalformedDecl() { case tok::kw_inline: // 'inline namespace' at the start of a line is almost certainly - // a good place to pick back up parsing. - if (Tok.isAtStartOfLine() && NextToken().is(tok::kw_namespace)) + // a good place to pick back up parsing, except in an Objective-C + // @interface context. + if (Tok.isAtStartOfLine() && NextToken().is(tok::kw_namespace) && + (!ParsingInObjCContainer || CurParsedObjCImpl)) return; break; case tok::kw_namespace: // 'namespace' at the start of a line is almost certainly a good - // place to pick back up parsing. - if (Tok.isAtStartOfLine()) + // place to pick back up parsing, except in an Objective-C + // @interface context. + if (Tok.isAtStartOfLine() && + (!ParsingInObjCContainer || CurParsedObjCImpl)) + return; + break; + + case tok::at: + // @end is very much like } in Objective-C contexts. + if (NextToken().isObjCAtKeyword(tok::objc_end) && + ParsingInObjCContainer) + return; + break; + + case tok::minus: + case tok::plus: + // - and + probably start new method declarations in Objective-C contexts. + if (Tok.isAtStartOfLine() && ParsingInObjCContainer) return; break; @@ -1214,7 +1341,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, // declaration. We have to check this because __attribute__ might be the // start of a function definition in GCC-extended K&R C. !isDeclarationAfterDeclarator()) { - + if (isStartOfFunctionDefinition(D)) { if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) { Diag(Tok, diag::err_function_declared_typedef); @@ -1227,7 +1354,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, ParseFunctionDefinition(D, ParsedTemplateInfo(), &LateParsedAttrs); return Actions.ConvertDeclToDeclGroup(TheDecl); } - + if (isDeclarationSpecifier()) { // If there is an invalid declaration specifier right after the function // prototype, then we must be in a missing semicolon case where this isn't @@ -1269,7 +1396,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, DeclsInGroup.push_back(FirstDecl); bool ExpectSemi = Context != Declarator::ForContext; - + // If we don't have a comma, it is either the end of the list (a ';') or an // error, bail out. while (Tok.is(tok::comma)) { @@ -1303,7 +1430,7 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, Decl *ThisDecl = ParseDeclarationAfterDeclarator(D); D.complete(ThisDecl); if (ThisDecl) - DeclsInGroup.push_back(ThisDecl); + DeclsInGroup.push_back(ThisDecl); } } @@ -1311,10 +1438,9 @@ Parser::DeclGroupPtrTy Parser::ParseDeclGroup(ParsingDeclSpec &DS, *DeclEnd = Tok.getLocation(); if (ExpectSemi && - ExpectAndConsume(tok::semi, - Context == Declarator::FileContext - ? diag::err_invalid_token_after_toplevel_declarator - : diag::err_expected_semi_declaration)) { + ExpectAndConsumeSemi(Context == Declarator::FileContext + ? diag::err_invalid_token_after_toplevel_declarator + : diag::err_expected_semi_declaration)) { // Okay, there was no semicolon and one was expected. If we see a // declaration specifier, just assume it was missing and continue parsing. // Otherwise things are very confused and we skip to recover. @@ -1388,7 +1514,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(Declarator &D, case ParsedTemplateInfo::NonTemplate: ThisDecl = Actions.ActOnDeclarator(getCurScope(), D); break; - + case ParsedTemplateInfo::Template: case ParsedTemplateInfo::ExplicitSpecialization: ThisDecl = Actions.ActOnTemplateDeclarator(getCurScope(), @@ -1397,9 +1523,9 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(Declarator &D, TemplateInfo.TemplateParams->size()), D); break; - + case ParsedTemplateInfo::ExplicitInstantiation: { - DeclResult ThisRes + DeclResult ThisRes = Actions.ActOnExplicitInstantiation(getCurScope(), TemplateInfo.ExternLoc, TemplateInfo.TemplateLoc, @@ -1408,7 +1534,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(Declarator &D, SkipUntil(tok::semi, true, true); return 0; } - + ThisDecl = ThisRes.get(); break; } @@ -1441,10 +1567,11 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(Declarator &D, if (Tok.is(tok::code_completion)) { Actions.CodeCompleteInitializer(getCurScope(), ThisDecl); + Actions.FinalizeDeclaration(ThisDecl); cutOffParsing(); return 0; } - + ExprResult Init(ParseInitializer()); if (getLangOpts().CPlusPlus && D.getCXXScopeSpec().isSet()) { @@ -1497,7 +1624,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(Declarator &D, Actions.AddInitializerToDecl(ThisDecl, Initializer.take(), /*DirectInit=*/true, TypeContainsAuto); } - } else if (getLangOpts().CPlusPlus0x && Tok.is(tok::l_brace)) { + } else if (getLangOpts().CPlusPlus0x && Tok.is(tok::l_brace) && + (!CurParsedObjCImpl || !D.isFunctionDeclarator())) { // Parse C++0x braced-init-list. Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists); @@ -1543,7 +1671,8 @@ void Parser::ParseSpecifierQualifierList(DeclSpec &DS, AccessSpecifier AS, // Validate declspec for type-name. unsigned Specs = DS.getParsedSpecifiers(); - if (DSC == DSC_type_specifier && !DS.hasTypeSpecifier()) { + if ((DSC == DSC_type_specifier || DSC == DSC_trailing) && + !DS.hasTypeSpecifier()) { Diag(Tok, diag::err_expected_type); DS.SetTypeSpecError(); } else if (Specs == DeclSpec::PQ_None && !DS.getNumProtocolQualifiers() && @@ -1635,12 +1764,13 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, assert(!DS.hasTypeSpecifier() && "Type specifier checked above"); // Since we know that this either implicit int (which is rare) or an - // error, do lookahead to try to do better recovery. This never applies within - // a type specifier. - // FIXME: Don't bail out here in languages with no implicit int (like - // C++ with no -fms-extensions). This is much more likely to be an undeclared - // type or typo than a use of implicit int. - if (DSC != DSC_type_specifier && + // error, do lookahead to try to do better recovery. This never applies + // within a type specifier. Outside of C++, we allow this even if the + // language doesn't "officially" support implicit int -- we support + // implicit int as an extension in C99 and C11. Allegedly, MS also + // supports implicit int in C++ mode. + if (DSC != DSC_type_specifier && DSC != DSC_trailing && + (!getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt) && isValidAfterIdentifierInDeclarator(NextToken())) { // If this token is valid for implicit int, e.g. "static x = 4", then // we just avoid eating the identifier, so it will be parsed as the @@ -1648,6 +1778,13 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, return false; } + if (getLangOpts().CPlusPlus && + DS.getStorageClassSpec() == DeclSpec::SCS_auto) { + // Don't require a type specifier if we have the 'auto' storage class + // specifier in C++98 -- we'll promote it to a type specifier. + return false; + } + // Otherwise, if we don't consume this token, we are going to emit an // error anyway. Try to recover from various common problems. Check // to see if this was a reference to a tag name without a tag specified. @@ -1671,9 +1808,20 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, } if (TagName) { + IdentifierInfo *TokenName = Tok.getIdentifierInfo(); + LookupResult R(Actions, TokenName, SourceLocation(), + Sema::LookupOrdinaryName); + Diag(Loc, diag::err_use_of_tag_name_without_tag) - << Tok.getIdentifierInfo() << TagName << getLangOpts().CPlusPlus - << FixItHint::CreateInsertion(Tok.getLocation(),FixitTagName); + << TokenName << TagName << getLangOpts().CPlusPlus + << FixItHint::CreateInsertion(Tok.getLocation(), FixitTagName); + + if (Actions.LookupParsedName(R, getCurScope(), SS)) { + for (LookupResult::iterator I = R.begin(), IEnd = R.end(); + I != IEnd; ++I) + Diag((*I)->getLocation(), diag::note_decl_hiding_tag_type) + << TokenName << TagName; + } // Parse this as a tag as if the missing tag were present. if (TagKind == tok::kw_enum) @@ -1685,11 +1833,55 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, } } - // This is almost certainly an invalid type name. Let the action emit a + // Determine whether this identifier could plausibly be the name of something + // being declared (with a missing type). + if (DSC != DSC_type_specifier && DSC != DSC_trailing && + (!SS || DSC == DSC_top_level || DSC == DSC_class)) { + // Look ahead to the next token to try to figure out what this declaration + // was supposed to be. + switch (NextToken().getKind()) { + case tok::comma: + case tok::equal: + case tok::kw_asm: + case tok::l_brace: + case tok::l_square: + case tok::semi: + // This looks like a variable declaration. The type is probably missing. + // We're done parsing decl-specifiers. + return false; + + case tok::l_paren: { + // static x(4); // 'x' is not a type + // x(int n); // 'x' is not a type + // x (*p)[]; // 'x' is a type + // + // Since we're in an error case (or the rare 'implicit int in C++' MS + // extension), we can afford to perform a tentative parse to determine + // which case we're in. + TentativeParsingAction PA(*this); + ConsumeToken(); + TPResult TPR = TryParseDeclarator(/*mayBeAbstract*/false); + PA.Revert(); + if (TPR == TPResult::False()) + return false; + // The identifier is followed by a parenthesized declarator. + // It's supposed to be a type. + break; + } + + default: + // This is probably supposed to be a type. This includes cases like: + // int f(itn); + // struct S { unsinged : 4; }; + break; + } + } + + // This is almost certainly an invalid type name. Let the action emit a // diagnostic and attempt to recover. ParsedType T; - if (Actions.DiagnoseUnknownTypeName(*Tok.getIdentifierInfo(), Loc, - getCurScope(), SS, T)) { + IdentifierInfo *II = Tok.getIdentifierInfo(); + if (Actions.DiagnoseUnknownTypeName(II, Loc, getCurScope(), SS, T)) { // The action emitted a diagnostic, so we don't have to. if (T) { // The action has suggested that the type T could be used. Set that as @@ -1700,11 +1892,15 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, DS.SetTypeSpecType(DeclSpec::TST_typename, Loc, PrevSpec, DiagID, T); DS.SetRangeEnd(Tok.getLocation()); ConsumeToken(); - + // There may be other declaration specifiers after this. + return true; + } else if (II != Tok.getIdentifierInfo()) { + // If no type was suggested, the correction is to a keyword + Tok.setKind(II->getTokenID()); // There may be other declaration specifiers after this. return true; } - + // Fall through; the action had no suggestion for us. } else { // The action did not emit a diagnostic, so emit one now. @@ -1729,7 +1925,7 @@ bool Parser::ParseImplicitInt(DeclSpec &DS, CXXScopeSpec *SS, /// /// \param Context the declarator context, which is one of the /// Declarator::TheContext enumerator values. -Parser::DeclSpecContext +Parser::DeclSpecContext Parser::getDeclSpecContextFromDeclaratorContext(unsigned Context) { if (Context == Declarator::MemberContext) return DSC_class; @@ -1806,8 +2002,12 @@ void Parser::ParseAlignmentSpecifier(ParsedAttributes &Attrs, ExprVector ArgExprs(Actions); ArgExprs.push_back(ArgExpr.release()); + // FIXME: This should not be GNU, but we since the attribute used is + // based on the spelling, and there is no true spelling for + // C++11 attributes, this isn't accepted. Attrs.addNew(PP.getIdentifierInfo("aligned"), KWLoc, 0, KWLoc, - 0, T.getOpenLocation(), ArgExprs.take(), 1, false, true); + 0, T.getOpenLocation(), ArgExprs.take(), 1, + AttributeList::AS_GNU); } /// ParseDeclarationSpecifiers @@ -1845,8 +2045,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, DS.SetRangeStart(Tok.getLocation()); DS.SetRangeEnd(Tok.getLocation()); } - + bool EnteringContext = (DSContext == DSC_class || DSContext == DSC_top_level); + bool AttrsLastTime = false; + ParsedAttributesWithRange attrs(AttrFactory); while (1) { bool isInvalid = false; const char *PrevSpec = 0; @@ -1857,14 +2059,32 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, switch (Tok.getKind()) { default: DoneWithDeclSpec: - // [C++0x] decl-specifier-seq: decl-specifier attribute-specifier-seq[opt] - MaybeParseCXX0XAttributes(DS.getAttributes()); + if (!AttrsLastTime) + ProhibitAttributes(attrs); + else + DS.takeAttributesFrom(attrs); // If this is not a declaration specifier token, we're done reading decl // specifiers. First verify that DeclSpec's are consistent. DS.Finish(Diags, PP); return; + case tok::l_square: + case tok::kw_alignas: + if (!isCXX11AttributeSpecifier()) + goto DoneWithDeclSpec; + + ProhibitAttributes(attrs); + // FIXME: It would be good to recover by accepting the attributes, + // but attempting to do that now would cause serious + // madness in terms of diagnostics. + attrs.clear(); + attrs.Range = SourceRange(); + + ParseCXX11Attributes(attrs); + AttrsLastTime = true; + continue; + case tok::code_completion: { Sema::ParserCompletionContext CCC = Sema::PCC_Namespace; if (DS.hasTypeSpecifier()) { @@ -1875,25 +2095,25 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, Scope::FunctionPrototypeScope | Scope::AtCatchScope)) == 0; bool AllowNestedNameSpecifiers - = DSContext == DSC_top_level || + = DSContext == DSC_top_level || (DSContext == DSC_class && DS.isFriendSpecified()); Actions.CodeCompleteDeclSpec(getCurScope(), DS, - AllowNonIdentifiers, + AllowNonIdentifiers, AllowNestedNameSpecifiers); return cutOffParsing(); - } - + } + if (getCurScope()->getFnParent() || getCurScope()->getBlockParent()) CCC = Sema::PCC_LocalDeclarationSpecifiers; else if (TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate) - CCC = DSContext == DSC_class? Sema::PCC_MemberTemplate + CCC = DSContext == DSC_class? Sema::PCC_MemberTemplate : Sema::PCC_Template; else if (DSContext == DSC_class) CCC = Sema::PCC_Class; else if (CurParsedObjCImpl) CCC = Sema::PCC_ObjCImplementation; - + Actions.CodeCompleteOrdinaryName(getCurScope(), CCC); return cutOffParsing(); } @@ -1910,7 +2130,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, continue; case tok::annot_cxxscope: { - if (DS.hasTypeSpecifier()) + if (DS.hasTypeSpecifier() || DS.isTypeAltiVecVector()) goto DoneWithDeclSpec; CXXScopeSpec SS; @@ -1940,10 +2160,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // // the name is instead considered to name the constructor of // class C. - // + // // Thus, if the template-name is actually the constructor // name, then the code is ill-formed; this interpretation is - // reinforced by the NAD status of core issue 635. + // reinforced by the NAD status of core issue 635. TemplateIdAnnotation *TemplateId = takeTemplateIdAnnotation(Next); if ((DSContext == DSC_top_level || (DSContext == DSC_class && DS.isFriendSpecified())) && @@ -1980,7 +2200,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, if (Tok.getAnnotationValue()) { ParsedType T = getTypeAnnotation(Tok); isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typename, - Tok.getAnnotationEndLoc(), + Tok.getAnnotationEndLoc(), PrevSpec, DiagID, T); } else @@ -1996,7 +2216,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // check whether this is a constructor declaration. if ((DSContext == DSC_top_level || (DSContext == DSC_class && DS.isFriendSpecified())) && - Actions.isCurrentClassName(*Next.getIdentifierInfo(), getCurScope(), + Actions.isCurrentClassName(*Next.getIdentifierInfo(), getCurScope(), &SS)) { if (isConstructorDeclarator()) goto DoneWithDeclSpec; @@ -2049,7 +2269,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, DiagID, T); } else DS.SetTypeSpecError(); - + if (isInvalid) break; @@ -2058,10 +2278,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // Objective-C supports syntax of the form 'id' where 'id' // is a specific typedef and 'itf' where 'itf' is an - // Objective-C interface. + // Objective-C interface. if (Tok.is(tok::less) && getLangOpts().ObjC1) ParseObjCProtocolQualifiers(DS); - + continue; } @@ -2082,7 +2302,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // We're done with the declaration-specifiers. goto DoneWithDeclSpec; - + // typedef-name case tok::kw_decltype: case tok::identifier: { @@ -2108,6 +2328,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, if (TryAltiVecToken(DS, Loc, PrevSpec, DiagID, isInvalid)) break; + // [AltiVec] 2.2: [If the 'vector' specifier is used] The syntax does not + // allow the use of a typedef name as a type specifier. + if (DS.isTypeAltiVecVector()) + goto DoneWithDeclSpec; + ParsedType TypeRep = Actions.getTypeName(*Tok.getIdentifierInfo(), Tok.getLocation(), getCurScope()); @@ -2136,10 +2361,10 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // Objective-C supports syntax of the form 'id' where 'id' // is a specific typedef and 'itf' where 'itf' is an - // Objective-C interface. + // Objective-C interface. if (Tok.is(tok::less) && getLangOpts().ObjC1) ParseObjCProtocolQualifiers(DS); - + // Need to support trailing type qualifiers (e.g. "id

const"). // If a type specifier follows, it will be diagnosed elsewhere. continue; @@ -2179,9 +2404,16 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, continue; // Microsoft single token adornments. - case tok::kw___forceinline: - // FIXME: Add handling here! - break; + case tok::kw___forceinline: { + isInvalid = DS.SetFunctionSpecInline(Loc, PrevSpec, DiagID); + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + // FIXME: This does not work correctly if it is set to be a declspec + // attribute, and a GNU attribute is simply incorrect. + DS.getAttributes().addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, + SourceLocation(), 0, 0, AttributeList::AS_GNU); + continue; + } case tok::kw___ptr64: case tok::kw___ptr32: @@ -2266,7 +2498,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // alignment-specifier case tok::kw__Alignas: if (!getLangOpts().C11) - Diag(Tok, diag::ext_c11_alignas); + Diag(Tok, diag::ext_c11_alignment) << Tok.getName(); ParseAlignmentSpecifier(DS.getAttributes()); continue; @@ -2285,7 +2517,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, case tok::kw___module_private__: isInvalid = DS.setModulePrivateSpec(Loc, PrevSpec, DiagID); break; - + // constexpr case tok::kw_constexpr: isInvalid = DS.SetConstexprSpec(Loc, PrevSpec, DiagID); @@ -2422,15 +2654,15 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, // cv-qualifier: case tok::kw_const: isInvalid = DS.SetTypeQual(DeclSpec::TQ_const, Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/true); break; case tok::kw_volatile: isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/true); break; case tok::kw_restrict: isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/true); break; // C++ typename-specifier: @@ -2461,7 +2693,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, continue; // OpenCL qualifiers: - case tok::kw_private: + case tok::kw_private: if (!getLangOpts().OpenCL) goto DoneWithDeclSpec; case tok::kw___private: @@ -2473,7 +2705,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, case tok::kw___read_write: ParseOpenCLQualifiers(DS); break; - + case tok::less: // GCC ObjC supports types like "" as a synonym for // "id". This is hopelessly old fashioned and dangerous, @@ -2485,7 +2717,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, Diag(Loc, diag::warn_objc_protocol_qualifier_missing_id) << FixItHint::CreateInsertion(Loc, "id") << SourceRange(Loc, DS.getSourceRange().getEnd()); - + // Need to support trailing type qualifiers (e.g. "id

const"). // If a type specifier follows, it will be diagnosed elsewhere. continue; @@ -2494,7 +2726,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, if (isInvalid) { assert(PrevSpec && "Method did not return previous specifier!"); assert(DiagID); - + if (DiagID == diag::ext_duplicate_declspec) Diag(Tok, DiagID) << PrevSpec << FixItHint::CreateRemoval(Tok.getLocation()); @@ -2505,6 +2737,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, DS.SetRangeEnd(Tok.getLocation()); if (DiagID != diag::err_bool_redeclaration) ConsumeToken(); + + AttrsLastTime = false; } } @@ -2526,8 +2760,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, /// [GNU] declarator[opt] ':' constant-expression attributes[opt] /// void Parser:: -ParseStructDeclaration(DeclSpec &DS, FieldCallback &Fields) { - +ParseStructDeclaration(ParsingDeclSpec &DS, FieldCallback &Fields) { + if (Tok.is(tok::kw___extension__)) { // __extension__ silences extension warnings in the subexpression. ExtensionRAIIObject O(Diags); // Use RAII to do this. @@ -2541,7 +2775,9 @@ ParseStructDeclaration(DeclSpec &DS, FieldCallback &Fields) { // If there are no declarators, this is a free-standing declaration // specifier. Let the actions module cope with it. if (Tok.is(tok::semi)) { - Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, DS); + Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none, + DS); + DS.complete(TheDecl); return; } @@ -2549,8 +2785,7 @@ ParseStructDeclaration(DeclSpec &DS, FieldCallback &Fields) { bool FirstDeclarator = true; SourceLocation CommaLoc; while (1) { - ParsingDeclRAIIObject PD(*this); - FieldDeclarator DeclaratorInfo(DS); + ParsingFieldDeclarator DeclaratorInfo(*this, DS); DeclaratorInfo.D.setCommaLoc(CommaLoc); // Attributes are only allowed here on successive declarators. @@ -2578,8 +2813,7 @@ ParseStructDeclaration(DeclSpec &DS, FieldCallback &Fields) { MaybeParseGNUAttributes(DeclaratorInfo.D); // We're done with this declarator; invoke the callback. - Decl *D = Fields.invoke(DeclaratorInfo); - PD.complete(D); + Fields.invoke(DeclaratorInfo); // If we don't have a comma, it is either the end of the list (a ';') // or an error, bail out. @@ -2630,16 +2864,10 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, // Check for extraneous top-level semicolon. if (Tok.is(tok::semi)) { - Diag(Tok, diag::ext_extra_struct_semi) - << DeclSpec::getSpecifierName((DeclSpec::TST)TagType) - << FixItHint::CreateRemoval(Tok.getLocation()); - ConsumeToken(); + ConsumeExtraSemi(InsideStruct, TagType); continue; } - // Parse all the comma separated declarators. - DeclSpec DS(AttrFactory); - if (!Tok.is(tok::at)) { struct CFieldCallback : FieldCallback { Parser &P; @@ -2650,16 +2878,18 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, SmallVectorImpl &FieldDecls) : P(P), TagDecl(TagDecl), FieldDecls(FieldDecls) {} - virtual Decl *invoke(FieldDeclarator &FD) { + void invoke(ParsingFieldDeclarator &FD) { // Install the declarator into the current TagDecl. Decl *Field = P.Actions.ActOnField(P.getCurScope(), TagDecl, FD.D.getDeclSpec().getSourceRange().getBegin(), FD.D, FD.BitfieldSize); FieldDecls.push_back(Field); - return Field; + FD.complete(Field); } } Callback(*this, TagDecl, FieldDecls); + // Parse all the comma separated declarators. + ParsingDeclSpec DS(*this); ParseStructDeclaration(DS, Callback); } else { // Handle @defs ConsumeToken(); @@ -2752,30 +2982,46 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, return cutOffParsing(); } + // If attributes exist after tag, parse them. + ParsedAttributesWithRange attrs(AttrFactory); + MaybeParseGNUAttributes(attrs); + MaybeParseCXX0XAttributes(attrs); + + // If declspecs exist after tag, parse them. + while (Tok.is(tok::kw___declspec)) + ParseMicrosoftDeclSpec(attrs); + SourceLocation ScopedEnumKWLoc; bool IsScopedUsingClassTag = false; + // In C++11, recognize 'enum class' and 'enum struct'. if (getLangOpts().CPlusPlus0x && (Tok.is(tok::kw_class) || Tok.is(tok::kw_struct))) { Diag(Tok, diag::warn_cxx98_compat_scoped_enum); IsScopedUsingClassTag = Tok.is(tok::kw_class); ScopedEnumKWLoc = ConsumeToken(); - } - // C++11 [temp.explicit]p12: The usual access controls do not apply to names - // used to specify explicit instantiations. We extend this to also cover - // explicit specializations. - Sema::SuppressAccessChecksRAII SuppressAccess(Actions, - TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation || - TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization); + // Attributes are not allowed between these keywords. Diagnose, + // but then just treat them like they appeared in the right place. + ProhibitAttributes(attrs); - // If attributes exist after tag, parse them. - ParsedAttributes attrs(AttrFactory); - MaybeParseGNUAttributes(attrs); + // They are allowed afterwards, though. + MaybeParseGNUAttributes(attrs); + MaybeParseCXX0XAttributes(attrs); + while (Tok.is(tok::kw___declspec)) + ParseMicrosoftDeclSpec(attrs); + } - // If declspecs exist after tag, parse them. - while (Tok.is(tok::kw___declspec)) - ParseMicrosoftDeclSpec(attrs); + // C++11 [temp.explicit]p12: + // The usual access controls do not apply to names used to specify + // explicit instantiations. + // We extend this to also cover explicit specializations. Note that + // we don't suppress if this turns out to be an elaborated type + // specifier. + bool shouldDelayDiagsInTag = + (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation || + TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization); + SuppressAccessChecks diagsFromTag(*this, shouldDelayDiagsInTag); // Enum definitions should not be parsed in a trailing-return-type. bool AllowDeclaration = DSC != DSC_trailing; @@ -2789,8 +3035,8 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, // "enum foo : bar;" is not a potential typo for "enum foo::bar;" // if a fixed underlying type is allowed. ColonProtectionRAIIObject X(*this, AllowFixedUnderlyingType); - - if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), + + if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/false)) return; @@ -2831,32 +3077,35 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, IsScopedUsingClassTag = false; } - // Stop suppressing access control now we've parsed the enum name. - SuppressAccess.done(); + // Okay, end the suppression area. We'll decide whether to emit the + // diagnostics in a second. + if (shouldDelayDiagsInTag) + diagsFromTag.done(); TypeResult BaseType; // Parse the fixed underlying type. + bool CanBeBitfield = getCurScope()->getFlags() & Scope::ClassScope; if (AllowFixedUnderlyingType && Tok.is(tok::colon)) { bool PossibleBitfield = false; - if (getCurScope()->getFlags() & Scope::ClassScope) { + if (CanBeBitfield) { // If we're in class scope, this can either be an enum declaration with // an underlying type, or a declaration of a bitfield member. We try to // use a simple disambiguation scheme first to catch the common cases - // (integer literal, sizeof); if it's still ambiguous, we then consider - // anything that's a simple-type-specifier followed by '(' as an - // expression. This suffices because function types are not valid + // (integer literal, sizeof); if it's still ambiguous, we then consider + // anything that's a simple-type-specifier followed by '(' as an + // expression. This suffices because function types are not valid // underlying types anyway. TPResult TPR = isExpressionOrTypeSpecifierSimple(NextToken().getKind()); - // If the next token starts an expression, we know we're parsing a + // If the next token starts an expression, we know we're parsing a // bit-field. This is the common case. if (TPR == TPResult::True()) PossibleBitfield = true; // If the next token starts a type-specifier-seq, it may be either a // a fixed underlying type or the start of a function-style cast in C++; - // lookahead one more token to see if it's obvious that we have a + // lookahead one more token to see if it's obvious that we have a // fixed underlying type. - else if (TPR == TPResult::False() && + else if (TPR == TPResult::False() && GetLookAheadToken(2).getKind() == tok::semi) { // Consume the ':'. ConsumeToken(); @@ -2894,7 +3143,7 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, if (!PossibleBitfield) { SourceRange Range; BaseType = ParseTypeName(&Range); - + if (!getLangOpts().CPlusPlus0x && !getLangOpts().ObjC2) Diag(StartLoc, diag::ext_ms_enum_fixed_underlying_type) << Range; @@ -2914,16 +3163,39 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, // enum foo {..}; void bar() { enum foo x; } <- use of old foo. // Sema::TagUseKind TUK; - if (DS.isFriendSpecified()) - TUK = Sema::TUK_Friend; - else if (!AllowDeclaration) + if (!AllowDeclaration) { TUK = Sema::TUK_Reference; - else if (Tok.is(tok::l_brace)) - TUK = Sema::TUK_Definition; - else if (Tok.is(tok::semi) && DSC != DSC_type_specifier) - TUK = Sema::TUK_Declaration; - else + } else if (Tok.is(tok::l_brace)) { + if (DS.isFriendSpecified()) { + Diag(Tok.getLocation(), diag::err_friend_decl_defines_type) + << SourceRange(DS.getFriendSpecLoc()); + ConsumeBrace(); + SkipUntil(tok::r_brace); + TUK = Sema::TUK_Friend; + } else { + TUK = Sema::TUK_Definition; + } + } else if (DSC != DSC_type_specifier && + (Tok.is(tok::semi) || + (Tok.isAtStartOfLine() && + !isValidAfterTypeSpecifier(CanBeBitfield)))) { + TUK = DS.isFriendSpecified() ? Sema::TUK_Friend : Sema::TUK_Declaration; + if (Tok.isNot(tok::semi)) { + // A semicolon was missing after this declaration. Diagnose and recover. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl, + "enum"); + PP.EnterToken(Tok); + Tok.setKind(tok::semi); + } + } else { TUK = Sema::TUK_Reference; + } + + // If this is an elaborated type specifier, and we delayed + // diagnostics before, just merge them into the current pool. + if (TUK == Sema::TUK_Reference && shouldDelayDiagsInTag) { + diagsFromTag.redelay(); + } MultiTemplateParamsArg TParams; if (TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate && @@ -2947,6 +3219,9 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, TemplateInfo.TemplateParams->size()); } + if (TUK == Sema::TUK_Reference) + ProhibitAttributes(attrs); + if (!Name && TUK != Sema::TUK_Definition) { Diag(Tok, diag::err_enumerator_unnamed_no_def); @@ -2966,52 +3241,44 @@ void Parser::ParseEnumSpecifier(SourceLocation StartLoc, DeclSpec &DS, IsScopedUsingClassTag, BaseType); if (IsDependent) { - // This enum has a dependent nested-name-specifier. Handle it as a + // This enum has a dependent nested-name-specifier. Handle it as a // dependent tag. if (!Name) { DS.SetTypeSpecError(); Diag(Tok, diag::err_expected_type_name_after_typename); return; } - + TypeResult Type = Actions.ActOnDependentTag(getCurScope(), DeclSpec::TST_enum, - TUK, SS, Name, StartLoc, + TUK, SS, Name, StartLoc, NameLoc); if (Type.isInvalid()) { DS.SetTypeSpecError(); return; } - + if (DS.SetTypeSpecType(DeclSpec::TST_typename, StartLoc, NameLoc.isValid() ? NameLoc : StartLoc, PrevSpec, DiagID, Type.get())) Diag(StartLoc, DiagID) << PrevSpec; - + return; } if (!TagDecl) { - // The action failed to produce an enumeration tag. If this is a + // The action failed to produce an enumeration tag. If this is a // definition, consume the entire definition. if (Tok.is(tok::l_brace) && TUK != Sema::TUK_Reference) { ConsumeBrace(); SkipUntil(tok::r_brace); } - + DS.SetTypeSpecError(); return; } - if (Tok.is(tok::l_brace) && TUK != Sema::TUK_Reference) { - if (TUK == Sema::TUK_Friend) { - Diag(Tok, diag::err_friend_decl_defines_type) - << SourceRange(DS.getFriendSpecLoc()); - ConsumeBrace(); - SkipUntil(tok::r_brace); - } else { - ParseEnumBody(StartLoc, TagDecl); - } - } + if (Tok.is(tok::l_brace) && TUK != Sema::TUK_Reference) + ParseEnumBody(StartLoc, TagDecl); if (DS.SetTypeSpecType(DeclSpec::TST_enum, StartLoc, NameLoc.isValid() ? NameLoc : StartLoc, @@ -3051,13 +3318,15 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) { SourceLocation IdentLoc = ConsumeToken(); // If attributes exist after the enumerator, parse them. - ParsedAttributes attrs(AttrFactory); + ParsedAttributesWithRange attrs(AttrFactory); MaybeParseGNUAttributes(attrs); + MaybeParseCXX0XAttributes(attrs); + ProhibitAttributes(attrs); SourceLocation EqualLoc; ExprResult AssignedVal; - ParsingDeclRAIIObject PD(*this); - + ParsingDeclRAIIObject PD(*this, ParsingDeclRAIIObject::NoParent); + if (Tok.is(tok::equal)) { EqualLoc = ConsumeToken(); AssignedVal = ParseConstantExpression(); @@ -3072,26 +3341,27 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) { attrs.getList(), EqualLoc, AssignedVal.release()); PD.complete(EnumConstDecl); - + EnumConstantDecls.push_back(EnumConstDecl); LastEnumConstDecl = EnumConstDecl; if (Tok.is(tok::identifier)) { // We're missing a comma between enumerators. SourceLocation Loc = PP.getLocForEndOfToken(PrevTokLocation); - Diag(Loc, diag::err_enumerator_list_missing_comma) + Diag(Loc, diag::err_enumerator_list_missing_comma) << FixItHint::CreateInsertion(Loc, ", "); continue; } - + if (Tok.isNot(tok::comma)) break; SourceLocation CommaLoc = ConsumeToken(); if (Tok.isNot(tok::identifier)) { if (!getLangOpts().C99 && !getLangOpts().CPlusPlus0x) - Diag(CommaLoc, diag::ext_enumerator_list_comma) - << getLangOpts().CPlusPlus + Diag(CommaLoc, getLangOpts().CPlusPlus ? + diag::ext_enumerator_list_comma_cxx : + diag::ext_enumerator_list_comma_c) << FixItHint::CreateRemoval(CommaLoc); else if (getLangOpts().CPlusPlus0x) Diag(CommaLoc, diag::warn_cxx98_compat_enumerator_list_comma) @@ -3114,6 +3384,18 @@ void Parser::ParseEnumBody(SourceLocation StartLoc, Decl *EnumDecl) { EnumScope.Exit(); Actions.ActOnTagFinishDefinition(getCurScope(), EnumDecl, T.getCloseLocation()); + + // The next token must be valid after an enum definition. If not, a ';' + // was probably forgotten. + bool CanBeBitfield = getCurScope()->getFlags() & Scope::ClassScope; + if (!isValidAfterTypeSpecifier(CanBeBitfield)) { + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl, "enum"); + // Push this token back into the preprocessor and change our current token + // to ';' so that the rest of the code recovers as though there were an + // ';' after the definition. + PP.EnterToken(Tok); + Tok.setKind(tok::semi); + } } /// isTypeSpecifierQualifier - Return true if the current token could be the @@ -3171,14 +3453,14 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const { case tok::kw__Decimal64: case tok::kw__Decimal128: case tok::kw___vector: - + // struct-or-union-specifier (C99) or class-specifier (C++) case tok::kw_class: case tok::kw_struct: case tok::kw_union: // enum-specifier case tok::kw_enum: - + // typedef-name case tok::annot_typename: return true; @@ -3319,16 +3601,16 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { return true; if (Tok.is(tok::identifier)) return false; - + // If we're in Objective-C and we have an Objective-C class type followed - // by an identifier and then either ':' or ']', in a place where an + // by an identifier and then either ':' or ']', in a place where an // expression is permitted, then this is probably a class message send // missing the initial '['. In this case, we won't consider this to be // the start of a declaration. - if (DisambiguatingWithExpression && + if (DisambiguatingWithExpression && isStartOfObjCClassMessageMissingOpenBracket()) return false; - + return isDeclarationSpecifier(); case tok::coloncolon: // ::foo::bar @@ -3353,7 +3635,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { // Modules case tok::kw___module_private__: - + // type-specifiers case tok::kw_short: case tok::kw_long: @@ -3423,7 +3705,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) { case tok::annot_typename: return !DisambiguatingWithExpression || !isStartOfObjCClassMessageMissingOpenBracket(); - + case tok::kw___declspec: case tok::kw___cdecl: case tok::kw___stdcall: @@ -3453,7 +3735,7 @@ bool Parser::isConstructorDeclarator() { // Parse the C++ scope specifier. CXXScopeSpec SS; - if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), + if (ParseOptionalCXXScopeSpecifier(SS, ParsedType(), /*EnteringContext=*/true)) { TPA.Revert(); return false; @@ -3540,10 +3822,10 @@ bool Parser::isConstructorDeclarator() { /// ParseTypeQualifierListOpt /// type-qualifier-list: [C99 6.7.5] /// type-qualifier -/// [vendor] attributes +/// [vendor] attributes /// [ only if VendorAttributesAllowed=true ] /// type-qualifier-list type-qualifier -/// [vendor] type-qualifier-list attributes +/// [vendor] type-qualifier-list attributes /// [ only if VendorAttributesAllowed=true ] /// [C++0x] attribute-specifier[opt] is allowed before cv-qualifier-seq /// [ only if CXX0XAttributesAllowed=true ] @@ -3571,22 +3853,22 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, case tok::code_completion: Actions.CodeCompleteTypeQualifiers(DS); return cutOffParsing(); - + case tok::kw_const: isInvalid = DS.SetTypeQual(DeclSpec::TQ_const , Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/false); break; case tok::kw_volatile: isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/false); break; case tok::kw_restrict: isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, DiagID, - getLangOpts()); + getLangOpts(), /*IsTypeSpec*/false); break; // OpenCL qualifiers: - case tok::kw_private: + case tok::kw_private: if (!getLangOpts().OpenCL) goto DoneWithTypeQuals; case tok::kw___private: @@ -3692,7 +3974,7 @@ void Parser::ParseDeclaratorInternal(Declarator &D, DirectDeclParseFunction DirectDeclParser) { if (Diags.hasAllExtensionsSilenced()) D.setExtension(); - + // C++ member pointers start with a '::' or a nested-name. // Member pointers get special handling, since there's no place for the // scope spec in the generic path below. @@ -3886,7 +4168,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) { if (D.getCXXScopeSpec().isEmpty()) { bool EnteringContext = D.getContext() == Declarator::FileContext || D.getContext() == Declarator::MemberContext; - ParseOptionalCXXScopeSpecifier(D.getCXXScopeSpec(), ParsedType(), + ParseOptionalCXXScopeSpecifier(D.getCXXScopeSpec(), ParsedType(), EnteringContext); } @@ -3899,9 +4181,9 @@ void Parser::ParseDirectDeclarator(Declarator &D) { // C++0x [dcl.fct]p14: // There is a syntactic ambiguity when an ellipsis occurs at the end - // of a parameter-declaration-clause without a preceding comma. In - // this case, the ellipsis is parsed as part of the - // abstract-declarator if the type of the parameter names a template + // of a parameter-declaration-clause without a preceding comma. In + // this case, the ellipsis is parsed as part of the + // abstract-declarator if the type of the parameter names a template // parameter pack that has not been expanded; otherwise, it is parsed // as part of the parameter-declaration-clause. if (Tok.is(tok::ellipsis) && D.getCXXScopeSpec().isEmpty() && @@ -3940,9 +4222,9 @@ void Parser::ParseDirectDeclarator(Declarator &D) { AllowConstructorName = (D.getContext() == Declarator::MemberContext); SourceLocation TemplateKWLoc; - if (ParseUnqualifiedId(D.getCXXScopeSpec(), - /*EnteringContext=*/true, - /*AllowDestructorName=*/true, + if (ParseUnqualifiedId(D.getCXXScopeSpec(), + /*EnteringContext=*/true, + /*AllowDestructorName=*/true, AllowConstructorName, ParsedType(), TemplateKWLoc, @@ -3992,6 +4274,8 @@ void Parser::ParseDirectDeclarator(Declarator &D) { // portion is empty), if an abstract-declarator is allowed. D.SetIdentifier(0, Tok.getLocation()); } else { + if (Tok.getKind() == tok::annot_pragma_parser_crash) + *(volatile int*) 0x11 = 0; if (D.getContext() == Declarator::MemberContext) Diag(Tok, diag::err_expected_member_name_or_semi) << D.getDeclSpec().getSourceRange(); @@ -4020,17 +4304,14 @@ void Parser::ParseDirectDeclarator(Declarator &D) { // The paren may be part of a C++ direct initializer, eg. "int x(1);". // In such a case, check if we actually have a function declarator; if it // is not, the declarator has been fully parsed. - if (getLangOpts().CPlusPlus && D.mayBeFollowedByCXXDirectInit()) { - // When not in file scope, warn for ambiguous function declarators, just - // in case the author intended it as a variable definition. - bool warnIfAmbiguous = D.getContext() != Declarator::FileContext; - if (!isCXXFunctionDeclarator(warnIfAmbiguous)) - break; - } + bool IsAmbiguous = false; + if (getLangOpts().CPlusPlus && D.mayBeFollowedByCXXDirectInit() && + !isCXXFunctionDeclarator(&IsAmbiguous)) + break; ParsedAttributes attrs(AttrFactory); BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); - ParseFunctionDeclarator(D, attrs, T); + ParseFunctionDeclarator(D, attrs, T, IsAmbiguous); PrototypeScope.Exit(); } else if (Tok.is(tok::l_square)) { ParseBracketDeclarator(D); @@ -4038,7 +4319,7 @@ void Parser::ParseDirectDeclarator(Declarator &D) { break; } } -} +} /// ParseParenDeclarator - We parsed the declarator D up to a paren. This is /// only called before the identifier, so these are most likely just grouping @@ -4124,7 +4405,7 @@ void Parser::ParseParenDeclarator(Declarator &D) { ParseDeclaratorInternal(D, &Parser::ParseDirectDeclarator); // Match the ')'. T.consumeClose(); - D.AddTypeInfo(DeclaratorChunk::getParen(T.getOpenLocation(), + D.AddTypeInfo(DeclaratorChunk::getParen(T.getOpenLocation(), T.getCloseLocation()), attrs, T.getCloseLocation()); @@ -4147,7 +4428,7 @@ void Parser::ParseParenDeclarator(Declarator &D) { // function prototype scope, including parameter declarators. ParseScope PrototypeScope(this, Scope::FunctionPrototypeScope|Scope::DeclScope); - ParseFunctionDeclarator(D, attrs, T, RequiresArg); + ParseFunctionDeclarator(D, attrs, T, false, RequiresArg); PrototypeScope.Exit(); } @@ -4173,8 +4454,9 @@ void Parser::ParseParenDeclarator(Declarator &D) { void Parser::ParseFunctionDeclarator(Declarator &D, ParsedAttributes &FirstArgAttrs, BalancedDelimiterTracker &Tracker, + bool IsAmbiguous, bool RequiresArg) { - assert(getCurScope()->isFunctionPrototypeScope() && + assert(getCurScope()->isFunctionPrototypeScope() && "Should call from a Function scope"); // lparen is already consumed! assert(D.isPastIdentifier() && "Should not call before identifier!"); @@ -4198,7 +4480,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D, SmallVector DynamicExceptionRanges; ExprResult NoexceptExpr; ParsedAttributes FnAttrs(AttrFactory); - ParsedType TrailingReturnType; + TypeResult TrailingReturnType; Actions.ActOnStartFunctionDeclarator(); @@ -4248,16 +4530,16 @@ void Parser::ParseFunctionDeclarator(Declarator &D, } // C++11 [expr.prim.general]p3: - // If a declaration declares a member function or member function - // template of a class X, the expression this is a prvalue of type + // If a declaration declares a member function or member function + // template of a class X, the expression this is a prvalue of type // "pointer to cv-qualifier-seq X" between the optional cv-qualifer-seq - // and the end of the function-definition, member-declarator, or + // and the end of the function-definition, member-declarator, or // declarator. - bool IsCXX11MemberFunction = + bool IsCXX11MemberFunction = getLangOpts().CPlusPlus0x && (D.getContext() == Declarator::MemberContext || (D.getContext() == Declarator::FileContext && - D.getCXXScopeSpec().isValid() && + D.getCXXScopeSpec().isValid() && Actions.CurContext->isRecord())); Sema::CXXThisScopeRAII ThisScope(Actions, dyn_cast(Actions.CurContext), @@ -4280,7 +4562,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D, if (getLangOpts().CPlusPlus0x && Tok.is(tok::arrow)) { Diag(Tok, diag::warn_cxx98_compat_trailing_return_type); SourceRange Range; - TrailingReturnType = ParseTrailingReturnType(Range).get(); + TrailingReturnType = ParseTrailingReturnType(Range); if (Range.getEnd().isValid()) EndLoc = Range.getEnd(); } @@ -4290,7 +4572,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D, // Remember that we parsed a function type, and remember the attributes. D.AddTypeInfo(DeclaratorChunk::getFunction(HasProto, /*isVariadic=*/EllipsisLoc.isValid(), - EllipsisLoc, + IsAmbiguous, EllipsisLoc, ParamInfo.data(), ParamInfo.size(), DS.getTypeQualifiers(), RefQualifierIsLValueRef, @@ -4303,7 +4585,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D, DynamicExceptions.size(), NoexceptExpr.isUsable() ? NoexceptExpr.get() : 0, - Tracker.getOpenLocation(), + Tracker.getOpenLocation(), EndLoc, D, TrailingReturnType), FnAttrs, EndLoc); @@ -4528,7 +4810,7 @@ void Parser::ParseParameterDeclarationClause( // Consume the '='. ConsumeToken(); - // The argument isn't actually potentially evaluated unless it is + // The argument isn't actually potentially evaluated unless it is // used. EnterExpressionEvaluationContext Eval(Actions, Sema::PotentiallyEvaluatedIfUsed, @@ -4560,7 +4842,7 @@ void Parser::ParseParameterDeclarationClause( if (Tok.isNot(tok::comma)) { if (Tok.is(tok::ellipsis)) { EllipsisLoc = ConsumeToken(); // Consume the ellipsis. - + if (!getLangOpts().CPlusPlus) { // We have ellipsis without a preceding ',', which is ill-formed // in C. Complain and provide the fix. @@ -4568,7 +4850,7 @@ void Parser::ParseParameterDeclarationClause( << FixItHint::CreateInsertion(EllipsisLoc, ", "); } } - + break; } @@ -4598,7 +4880,7 @@ void Parser::ParseBracketDeclarator(Declarator &D) { T.consumeClose(); ParsedAttributes attrs(AttrFactory); MaybeParseCXX0XAttributes(attrs); - + // Remember that we parsed the empty array type. ExprResult NumElements; D.AddTypeInfo(DeclaratorChunk::getArray(0, false, false, 0, @@ -4646,7 +4928,7 @@ void Parser::ParseBracketDeclarator(Declarator &D) { // Handle the case where we have '[*]' as the array size. However, a leading // star could be the start of an expression, for example 'X[*p + 4]'. Verify - // the the token after the star is a ']'. Since stars in arrays are + // the token after the star is a ']'. Since stars in arrays are // infrequent, use of lookahead is not costly here. if (Tok.is(tok::star) && GetLookAheadToken(1).is(tok::r_square)) { ConsumeToken(); // Eat the '*'. diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp index 5e6c4f5..3dc96cf 100644 --- a/lib/Parse/ParseDeclCXX.cpp +++ b/lib/Parse/ParseDeclCXX.cpp @@ -444,6 +444,13 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context, CXXScopeSpec SS; SourceLocation TypenameLoc; bool IsTypeName; + ParsedAttributesWithRange attrs(AttrFactory); + + // FIXME: Simply skip the attributes and diagnose, don't bother parsing them. + MaybeParseCXX0XAttributes(attrs); + ProhibitAttributes(attrs); + attrs.clear(); + attrs.Range = SourceRange(); // Ignore optional 'typename'. // FIXME: This is wrong; we should parse this as a typename-specifier. @@ -480,7 +487,7 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context, return 0; } - ParsedAttributes attrs(AttrFactory); + MaybeParseCXX0XAttributes(attrs); // Maybe this is an alias-declaration. bool IsAliasDecl = Tok.is(tok::equal); @@ -533,9 +540,14 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context, TypeAlias = ParseTypeName(0, TemplateInfo.Kind ? Declarator::AliasTemplateContext : Declarator::AliasDeclContext, AS, OwnedType); - } else + } else { + // C++11 attributes are not allowed on a using-declaration, but GNU ones + // are. + ProhibitAttributes(attrs); + // Parse (optional) attributes (most likely GNU strong-using extension). MaybeParseGNUAttributes(attrs); + } // Eat ';'. DeclEnd = Tok.getLocation(); @@ -572,6 +584,7 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context, MultiTemplateParamsArg TemplateParamsArg(Actions, TemplateParams ? TemplateParams->data() : 0, TemplateParams ? TemplateParams->size() : 0); + // FIXME: Propagate attributes. return Actions.ActOnAliasDeclaration(getCurScope(), AS, TemplateParamsArg, UsingLoc, Name, TypeAlias); } @@ -874,10 +887,12 @@ Parser::TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc, } // We have an identifier; check whether it is actually a type. + IdentifierInfo *CorrectedII = 0; ParsedType Type = Actions.getTypeName(*Id, IdLoc, getCurScope(), &SS, true, false, ParsedType(), /*IsCtorOrDtorName=*/false, - /*NonTrivialTypeSourceInfo=*/true); + /*NonTrivialTypeSourceInfo=*/true, + &CorrectedII); if (!Type) { Diag(IdLoc, diag::err_expected_class_name); return true; @@ -900,6 +915,77 @@ Parser::TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc, return Actions.ActOnTypeName(getCurScope(), DeclaratorInfo); } +void Parser::ParseMicrosoftInheritanceClassAttributes(ParsedAttributes &attrs) { + while (Tok.is(tok::kw___single_inheritance) || + Tok.is(tok::kw___multiple_inheritance) || + Tok.is(tok::kw___virtual_inheritance)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + attrs.addNew(AttrName, AttrNameLoc, 0, AttrNameLoc, 0, + SourceLocation(), 0, 0, AttributeList::AS_GNU); + } +} + +/// Determine whether the following tokens are valid after a type-specifier +/// which could be a standalone declaration. This will conservatively return +/// true if there's any doubt, and is appropriate for insert-';' fixits. +bool Parser::isValidAfterTypeSpecifier(bool CouldBeBitfield) { + // This switch enumerates the valid "follow" set for type-specifiers. + switch (Tok.getKind()) { + default: break; + case tok::semi: // struct foo {...} ; + case tok::star: // struct foo {...} * P; + case tok::amp: // struct foo {...} & R = ... + case tok::identifier: // struct foo {...} V ; + case tok::r_paren: //(struct foo {...} ) {4} + case tok::annot_cxxscope: // struct foo {...} a:: b; + case tok::annot_typename: // struct foo {...} a ::b; + case tok::annot_template_id: // struct foo {...} a ::b; + case tok::l_paren: // struct foo {...} ( x); + case tok::comma: // __builtin_offsetof(struct foo{...} , + return true; + case tok::colon: + return CouldBeBitfield; // enum E { ... } : 2; + // Type qualifiers + case tok::kw_const: // struct foo {...} const x; + case tok::kw_volatile: // struct foo {...} volatile x; + case tok::kw_restrict: // struct foo {...} restrict x; + case tok::kw_inline: // struct foo {...} inline foo() {}; + // Storage-class specifiers + case tok::kw_static: // struct foo {...} static x; + case tok::kw_extern: // struct foo {...} extern x; + case tok::kw_typedef: // struct foo {...} typedef x; + case tok::kw_register: // struct foo {...} register x; + case tok::kw_auto: // struct foo {...} auto x; + case tok::kw_mutable: // struct foo {...} mutable x; + case tok::kw_constexpr: // struct foo {...} constexpr x; + // As shown above, type qualifiers and storage class specifiers absolutely + // can occur after class specifiers according to the grammar. However, + // almost no one actually writes code like this. If we see one of these, + // it is much more likely that someone missed a semi colon and the + // type/storage class specifier we're seeing is part of the *next* + // intended declaration, as in: + // + // struct foo { ... } + // typedef int X; + // + // We'd really like to emit a missing semicolon error instead of emitting + // an error on the 'int' saying that you can't have two type specifiers in + // the same declaration of X. Because of this, we look ahead past this + // token to see if it's a type specifier. If so, we know the code is + // otherwise invalid, so we can produce the expected semi error. + if (!isKnownToBeTypeSpecifier(NextToken())) + return true; + break; + case tok::r_brace: // struct bar { struct foo {...} } + // Missing ';' at end of struct is accepted as an extension in C mode. + if (!getLangOpts().CPlusPlus) + return true; + break; + } + return false; +} + /// ParseClassSpecifier - Parse a C++ class-specifier [C++ class] or /// elaborated-type-specifier [C++ dcl.type.elab]; we can't tell which /// until we reach the start of a definition or see a token that @@ -968,11 +1054,15 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // As an extension we do not perform access checking on the names used to // specify explicit specializations either. This is important to allow // specializing traits classes for private types. - Sema::SuppressAccessChecksRAII SuppressAccess(Actions, - TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation || - TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization); + // + // Note that we don't suppress if this turns out to be an elaborated + // type specifier. + bool shouldDelayDiagsInTag = + (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation || + TemplateInfo.Kind == ParsedTemplateInfo::ExplicitSpecialization); + SuppressAccessChecks diagsFromTag(*this, shouldDelayDiagsInTag); - ParsedAttributes attrs(AttrFactory); + ParsedAttributesWithRange attrs(AttrFactory); // If attributes exist after tag, parse them. if (Tok.is(tok::kw___attribute)) ParseGNUAttributes(attrs); @@ -981,6 +1071,12 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, while (Tok.is(tok::kw___declspec)) ParseMicrosoftDeclSpec(attrs); + // Parse inheritance specifiers. + if (Tok.is(tok::kw___single_inheritance) || + Tok.is(tok::kw___multiple_inheritance) || + Tok.is(tok::kw___virtual_inheritance)) + ParseMicrosoftInheritanceClassAttributes(attrs); + // If C++0x attributes exist here, parse them. // FIXME: Are we consistent with the ordering of parsing of different // styles of attributes? @@ -1103,10 +1199,6 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, } } - // As soon as we're finished parsing the class's template-id, turn access - // checking back on. - SuppressAccess.done(); - // There are four options here. // - If we are in a trailing return type, this is always just a reference, // and we must not try to parse a definition. For instance, @@ -1144,11 +1236,29 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // Okay, this is a class definition. TUK = Sema::TUK_Definition; } - } else if (Tok.is(tok::semi) && DSC != DSC_type_specifier) + } else if (DSC != DSC_type_specifier && + (Tok.is(tok::semi) || + (Tok.isAtStartOfLine() && !isValidAfterTypeSpecifier(false)))) { TUK = DS.isFriendSpecified() ? Sema::TUK_Friend : Sema::TUK_Declaration; - else + if (Tok.isNot(tok::semi)) { + // A semicolon was missing after this declaration. Diagnose and recover. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl, + TagType == DeclSpec::TST_class ? "class" : + TagType == DeclSpec::TST_struct ? "struct" : "union"); + PP.EnterToken(Tok); + Tok.setKind(tok::semi); + } + } else TUK = Sema::TUK_Reference; + // If this is an elaborated type specifier, and we delayed + // diagnostics before, just merge them into the current pool. + if (shouldDelayDiagsInTag) { + diagsFromTag.done(); + if (TUK == Sema::TUK_Reference) + diagsFromTag.redelay(); + } + if (!Name && !TemplateId && (DS.getTypeSpecType() == DeclSpec::TST_error || TUK != Sema::TUK_Definition)) { if (DS.getTypeSpecType() != DeclSpec::TST_error) { @@ -1175,6 +1285,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, if (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation && TUK == Sema::TUK_Declaration) { // This is an explicit instantiation of a class template. + ProhibitAttributes(attrs); + TagOrTempResult = Actions.ActOnExplicitInstantiation(getCurScope(), TemplateInfo.ExternLoc, @@ -1196,6 +1308,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, } else if (TUK == Sema::TUK_Reference || (TUK == Sema::TUK_Friend && TemplateInfo.Kind == ParsedTemplateInfo::NonTemplate)) { + ProhibitAttributes(attrs); TypeResult = Actions.ActOnTagTemplateIdType(TUK, TagType, StartLoc, TemplateId->SS, TemplateId->TemplateKWLoc, @@ -1260,6 +1373,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // // template struct Outer::Inner; // + ProhibitAttributes(attrs); + TagOrTempResult = Actions.ActOnExplicitInstantiation(getCurScope(), TemplateInfo.ExternLoc, @@ -1268,6 +1383,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, NameLoc, attrs.getList()); } else if (TUK == Sema::TUK_Friend && TemplateInfo.Kind != ParsedTemplateInfo::NonTemplate) { + ProhibitAttributes(attrs); + TagOrTempResult = Actions.ActOnTemplatedFriendTag(getCurScope(), DS.getFriendSpecLoc(), TagType, StartLoc, SS, @@ -1281,6 +1398,9 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // FIXME: Diagnose this particular error. } + if (TUK != Sema::TUK_Declaration && TUK != Sema::TUK_Definition) + ProhibitAttributes(attrs); + bool IsDependent = false; // Don't pass down template parameter lists if this is just a tag @@ -1344,77 +1464,19 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, // impossible token occurs next, we assume that the programmer forgot a ; at // the end of the declaration and recover that way. // - // This switch enumerates the valid "follow" set for definition. - if (TUK == Sema::TUK_Definition) { - bool ExpectedSemi = true; - switch (Tok.getKind()) { - default: break; - case tok::semi: // struct foo {...} ; - case tok::star: // struct foo {...} * P; - case tok::amp: // struct foo {...} & R = ... - case tok::identifier: // struct foo {...} V ; - case tok::r_paren: //(struct foo {...} ) {4} - case tok::annot_cxxscope: // struct foo {...} a:: b; - case tok::annot_typename: // struct foo {...} a ::b; - case tok::annot_template_id: // struct foo {...} a ::b; - case tok::l_paren: // struct foo {...} ( x); - case tok::comma: // __builtin_offsetof(struct foo{...} , - ExpectedSemi = false; - break; - // Type qualifiers - case tok::kw_const: // struct foo {...} const x; - case tok::kw_volatile: // struct foo {...} volatile x; - case tok::kw_restrict: // struct foo {...} restrict x; - case tok::kw_inline: // struct foo {...} inline foo() {}; - // Storage-class specifiers - case tok::kw_static: // struct foo {...} static x; - case tok::kw_extern: // struct foo {...} extern x; - case tok::kw_typedef: // struct foo {...} typedef x; - case tok::kw_register: // struct foo {...} register x; - case tok::kw_auto: // struct foo {...} auto x; - case tok::kw_mutable: // struct foo {...} mutable x; - case tok::kw_constexpr: // struct foo {...} constexpr x; - // As shown above, type qualifiers and storage class specifiers absolutely - // can occur after class specifiers according to the grammar. However, - // almost no one actually writes code like this. If we see one of these, - // it is much more likely that someone missed a semi colon and the - // type/storage class specifier we're seeing is part of the *next* - // intended declaration, as in: - // - // struct foo { ... } - // typedef int X; - // - // We'd really like to emit a missing semicolon error instead of emitting - // an error on the 'int' saying that you can't have two type specifiers in - // the same declaration of X. Because of this, we look ahead past this - // token to see if it's a type specifier. If so, we know the code is - // otherwise invalid, so we can produce the expected semi error. - if (!isKnownToBeTypeSpecifier(NextToken())) - ExpectedSemi = false; - break; - - case tok::r_brace: // struct bar { struct foo {...} } - // Missing ';' at end of struct is accepted as an extension in C mode. - if (!getLangOpts().CPlusPlus) - ExpectedSemi = false; - break; - } - - // C++ [temp]p3 In a template-declaration which defines a class, no - // declarator is permitted. - if (TemplateInfo.Kind) - ExpectedSemi = true; - - if (ExpectedSemi) { - ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl, - TagType == DeclSpec::TST_class ? "class" - : TagType == DeclSpec::TST_struct? "struct" : "union"); - // Push this token back into the preprocessor and change our current token - // to ';' so that the rest of the code recovers as though there were an - // ';' after the definition. - PP.EnterToken(Tok); - Tok.setKind(tok::semi); - } + // Also enforce C++ [temp]p3: + // In a template-declaration which defines a class, no declarator + // is permitted. + if (TUK == Sema::TUK_Definition && + (TemplateInfo.Kind || !isValidAfterTypeSpecifier(false))) { + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_tagdecl, + TagType == DeclSpec::TST_class ? "class" : + TagType == DeclSpec::TST_struct ? "struct" : "union"); + // Push this token back into the preprocessor and change our current token + // to ';' so that the rest of the code recovers as though there were an + // ';' after the definition. + PP.EnterToken(Tok); + Tok.setKind(tok::semi); } } @@ -1696,12 +1758,16 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, } // Access declarations. + bool MalformedTypeSpec = false; if (!TemplateInfo.Kind && - (Tok.is(tok::identifier) || Tok.is(tok::coloncolon)) && - !TryAnnotateCXXScopeToken() && - Tok.is(tok::annot_cxxscope)) { - bool isAccessDecl = false; - if (NextToken().is(tok::identifier)) + (Tok.is(tok::identifier) || Tok.is(tok::coloncolon))) { + if (TryAnnotateCXXScopeToken()) + MalformedTypeSpec = true; + + bool isAccessDecl; + if (Tok.isNot(tok::annot_cxxscope)) + isAccessDecl = false; + else if (NextToken().is(tok::identifier)) isAccessDecl = GetLookAheadToken(2).is(tok::semi); else isAccessDecl = NextToken().is(tok::kw_operator); @@ -1798,6 +1864,8 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, // Parse the common declaration-specifiers piece. ParsingDeclSpec DS(*this, TemplateDiags); DS.takeAttributesFrom(attrs); + if (MalformedTypeSpec) + DS.SetTypeSpecError(); ParseDeclarationSpecifiers(DS, TemplateInfo, AS, DSC_class, &CommonLateParsedAttrs); @@ -1915,9 +1983,8 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, LateParsedAttrs.clear(); // Consume the ';' - it's optional unless we have a delete or default - if (Tok.is(tok::semi)) { - ConsumeToken(); - } + if (Tok.is(tok::semi)) + ConsumeExtraSemi(AfterMemberFunctionDefinition); return; } @@ -1961,18 +2028,19 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, // goes before or after the GNU attributes and __asm__. ParseOptionalCXX0XVirtSpecifierSeq(VS); - bool HasDeferredInitializer = false; + InClassInitStyle HasInClassInit = ICIS_NoInit; if ((Tok.is(tok::equal) || Tok.is(tok::l_brace)) && !HasInitializer) { if (BitfieldSize.get()) { Diag(Tok, diag::err_bitfield_member_init); SkipUntil(tok::comma, true, true); } else { HasInitializer = true; - HasDeferredInitializer = !DeclaratorInfo.isDeclarationOfFunction() && - DeclaratorInfo.getDeclSpec().getStorageClassSpec() - != DeclSpec::SCS_static && - DeclaratorInfo.getDeclSpec().getStorageClassSpec() - != DeclSpec::SCS_typedef; + if (!DeclaratorInfo.isDeclarationOfFunction() && + DeclaratorInfo.getDeclSpec().getStorageClassSpec() + != DeclSpec::SCS_static && + DeclaratorInfo.getDeclSpec().getStorageClassSpec() + != DeclSpec::SCS_typedef) + HasInClassInit = Tok.is(tok::equal) ? ICIS_CopyInit : ICIS_ListInit; } } @@ -1990,7 +2058,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, DeclaratorInfo, move(TemplateParams), BitfieldSize.release(), - VS, HasDeferredInitializer); + VS, HasInClassInit); if (AccessAttrs) Actions.ProcessDeclAttributeList(getCurScope(), ThisDecl, AccessAttrs, false, true); @@ -2006,15 +2074,15 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS, LateParsedAttrs.clear(); // Handle the initializer. - if (HasDeferredInitializer) { + if (HasInClassInit != ICIS_NoInit) { // The initializer was deferred; parse it and cache the tokens. Diag(Tok, getLangOpts().CPlusPlus0x ? diag::warn_cxx98_compat_nonstatic_member_init : diag::ext_nonstatic_member_init); if (DeclaratorInfo.isArrayOfUnknownBound()) { - // C++0x [dcl.array]p3: An array bound may also be omitted when the - // declarator is followed by an initializer. + // C++11 [dcl.array]p3: An array bound may also be omitted when the + // declarator is followed by an initializer. // // A brace-or-equal-initializer for a member-declarator is not an // initializer in the grammar, so this is ill-formed. @@ -2266,10 +2334,7 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc, // Check for extraneous top-level semicolon. if (Tok.is(tok::semi)) { - Diag(Tok, diag::ext_extra_struct_semi) - << DeclSpec::getSpecifierName((DeclSpec::TST)TagType) - << FixItHint::CreateRemoval(Tok.getLocation()); - ConsumeToken(); + ConsumeExtraSemi(InsideStruct, TagType); continue; } @@ -2779,7 +2844,7 @@ IdentifierInfo *Parser::TryParseCXX11AttributeIdentifier(SourceLocation &Loc) { StringRef Spelling = PP.getSpelling(Tok.getLocation(), SpellingBuf); if (std::isalpha(Spelling[0])) { Loc = ConsumeToken(); - return &PP.getIdentifierTable().get(Spelling.data()); + return &PP.getIdentifierTable().get(Spelling); } return 0; } @@ -2870,28 +2935,31 @@ void Parser::ParseCXX11AttributeSpecifier(ParsedAttributes &attrs, } bool AttrParsed = false; - // No scoped names are supported; ideally we could put all non-standard - // attributes into namespaces. - if (!ScopeName) { - switch (AttributeList::getKind(AttrName)) { - // No arguments - case AttributeList::AT_carries_dependency: - case AttributeList::AT_noreturn: { - if (Tok.is(tok::l_paren)) { - Diag(Tok.getLocation(), diag::err_cxx11_attribute_forbids_arguments) - << AttrName->getName(); - break; - } - - attrs.addNew(AttrName, AttrLoc, 0, AttrLoc, 0, - SourceLocation(), 0, 0, false, true); - AttrParsed = true; + switch (AttributeList::getKind(AttrName, ScopeName, + AttributeList::AS_CXX11)) { + // No arguments + case AttributeList::AT_CarriesDependency: + // FIXME: implement generic support of attributes with C++11 syntax + // see Parse/ParseDecl.cpp: ParseGNUAttributes + case AttributeList::AT_FallThrough: + case AttributeList::AT_NoReturn: { + if (Tok.is(tok::l_paren)) { + Diag(Tok.getLocation(), diag::err_cxx11_attribute_forbids_arguments) + << AttrName->getName(); break; } - // Silence warnings - default: break; - } + attrs.addNew(AttrName, + SourceRange(ScopeLoc.isValid() ? ScopeLoc : AttrLoc, + AttrLoc), + ScopeName, ScopeLoc, 0, + SourceLocation(), 0, 0, AttributeList::AS_CXX11); + AttrParsed = true; + break; + } + + // Silence warnings + default: break; } // Skip the entire parameter clause, if any @@ -2917,7 +2985,7 @@ void Parser::ParseCXX11AttributeSpecifier(ParsedAttributes &attrs, SkipUntil(tok::r_square, false); } -/// ParseCXX11Attributes - Parse a C++0x attribute-specifier-seq. +/// ParseCXX11Attributes - Parse a C++11 attribute-specifier-seq. /// /// attribute-specifier-seq: /// attribute-specifier-seq[opt] attribute-specifier @@ -2991,10 +3059,7 @@ void Parser::ParseMicrosoftIfExistsClassDeclaration(DeclSpec::TST TagType, // Check for extraneous top-level semicolon. if (Tok.is(tok::semi)) { - Diag(Tok, diag::ext_extra_struct_semi) - << DeclSpec::getSpecifierName((DeclSpec::TST)TagType) - << FixItHint::CreateRemoval(Tok.getLocation()); - ConsumeToken(); + ConsumeExtraSemi(InsideStruct, TagType); continue; } diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp index 6d31396..8d4668b 100644 --- a/lib/Parse/ParseExpr.cpp +++ b/lib/Parse/ParseExpr.cpp @@ -6,17 +6,19 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements the Expression parsing implementation. Expressions in -// C99 basically consist of a bunch of binary operators with unary operators and -// other random stuff at the leaves. -// -// In the C99 grammar, these unary operators bind tightest and are represented -// as the 'cast-expression' production. Everything else is either a binary -// operator (e.g. '/') or a ternary operator ("?:"). The unary leaves are -// handled by ParseCastExpression, the higher level pieces are handled by -// ParseBinaryExpression. -// +/// +/// \file +/// \brief Provides the Expression parsing implementation. +/// +/// Expressions in C99 basically consist of a bunch of binary operators with +/// unary operators and other random stuff at the leaves. +/// +/// In the C99 grammar, these unary operators bind tightest and are represented +/// as the 'cast-expression' production. Everything else is either a binary +/// operator (e.g. '/') or a ternary operator ("?:"). The unary leaves are +/// handled by ParseCastExpression, the higher level pieces are handled by +/// ParseBinaryExpression. +/// //===----------------------------------------------------------------------===// #include "clang/Parse/Parser.h" @@ -30,8 +32,7 @@ #include "llvm/ADT/SmallString.h" using namespace clang; -/// getBinOpPrecedence - Return the precedence of the specified binary operator -/// token. +/// \brief Return the precedence of the specified binary operator token. static prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus0x) { @@ -92,8 +93,7 @@ static prec::Level getBinOpPrecedence(tok::TokenKind Kind, } -/// ParseExpression - Simple precedence-based parser for binary/ternary -/// operators. +/// \brief Simple precedence-based parser for binary/ternary operators. /// /// Note: we diverge from the C99 grammar when parsing the assignment-expression /// production. C99 specifies that the LHS of an assignment operator should be @@ -104,6 +104,7 @@ static prec::Level getBinOpPrecedence(tok::TokenKind Kind, /// consistency, we parse the LHS as a conditional-expression, then check for /// l-value-ness in semantic analysis stages. /// +/// \verbatim /// pm-expression: [C++ 5.5] /// cast-expression /// pm-expression '.*' cast-expression @@ -175,6 +176,7 @@ static prec::Level getBinOpPrecedence(tok::TokenKind Kind, /// expression: [C99 6.5.17] /// assignment-expression ...[opt] /// expression ',' assignment-expression ...[opt] +/// \endverbatim ExprResult Parser::ParseExpression(TypeCastState isTypeCast) { ExprResult LHS(ParseAssignmentExpression(isTypeCast)); return ParseRHSOfBinaryExpression(move(LHS), prec::Comma); @@ -182,8 +184,8 @@ ExprResult Parser::ParseExpression(TypeCastState isTypeCast) { /// This routine is called when the '@' is seen and consumed. /// Current token is an Identifier and is not a 'try'. This -/// routine is necessary to disambiguate @try-statement from, -/// for example, @encode-expression. +/// routine is necessary to disambiguate \@try-statement from, +/// for example, \@encode-expression. /// ExprResult Parser::ParseExpressionWithLeadingAt(SourceLocation AtLoc) { @@ -211,7 +213,7 @@ Parser::ParseExpressionWithLeadingExtension(SourceLocation ExtLoc) { return ParseRHSOfBinaryExpression(move(LHS), prec::Comma); } -/// ParseAssignmentExpression - Parse an expr that doesn't include commas. +/// \brief Parse an expr that doesn't include (top-level) commas. ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) { if (Tok.is(tok::code_completion)) { Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Expression); @@ -228,11 +230,12 @@ ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) { return ParseRHSOfBinaryExpression(move(LHS), prec::Assignment); } -/// ParseAssignmentExprWithObjCMessageExprStart - Parse an assignment expression -/// where part of an objc message send has already been parsed. In this case -/// LBracLoc indicates the location of the '[' of the message send, and either -/// ReceiverName or ReceiverExpr is non-null indicating the receiver of the -/// message. +/// \brief Parse an assignment expression where part of an Objective-C message +/// send has already been parsed. +/// +/// In this case \p LBracLoc indicates the location of the '[' of the message +/// send, and either \p ReceiverName or \p ReceiverExpr is non-null indicating +/// the receiver of the message. /// /// Since this handles full assignment-expression's, it handles postfix /// expressions and other binary operators for these expressions as well. @@ -262,8 +265,8 @@ ExprResult Parser::ParseConstantExpression(TypeCastState isTypeCast) { return Actions.ActOnConstantExpression(Res); } -/// ParseRHSOfBinaryExpression - Parse a binary expression that starts with -/// LHS and has a precedence of at least MinPrec. +/// \brief Parse a binary expression that starts with \p LHS and has a +/// precedence of at least \p MinPrec. ExprResult Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { prec::Level NextTokPrec = getBinOpPrecedence(Tok.getKind(), @@ -439,10 +442,11 @@ Parser::ParseRHSOfBinaryExpression(ExprResult LHS, prec::Level MinPrec) { } } -/// ParseCastExpression - Parse a cast-expression, or, if isUnaryExpression is -/// true, parse a unary-expression. isAddressOfOperand exists because an -/// id-expression that is the operand of address-of gets special treatment -/// due to member pointers. +/// \brief Parse a cast-expression, or, if \p isUnaryExpression is true, +/// parse a unary-expression. +/// +/// \p isAddressOfOperand exists because an id-expression that is the +/// operand of address-of gets special treatment due to member pointers. /// ExprResult Parser::ParseCastExpression(bool isUnaryExpression, bool isAddressOfOperand, @@ -480,12 +484,15 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { }; } -/// ParseCastExpression - Parse a cast-expression, or, if isUnaryExpression is -/// true, parse a unary-expression. isAddressOfOperand exists because an -/// id-expression that is the operand of address-of gets special treatment -/// due to member pointers. NotCastExpr is set to true if the token is not the -/// start of a cast-expression, and no diagnostic is emitted in this case. +/// \brief Parse a cast-expression, or, if \pisUnaryExpression is true, parse +/// a unary-expression. /// +/// \p isAddressOfOperand exists because an id-expression that is the operand +/// of address-of gets special treatment due to member pointers. NotCastExpr +/// is set to true if the token is not the start of a cast-expression, and no +/// diagnostic is emitted in this case. +/// +/// \verbatim /// cast-expression: [C99 6.5.4] /// unary-expression /// '(' type-name ')' cast-expression @@ -500,6 +507,7 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// [C++11] 'sizeof' '...' '(' identifier ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' +/// [C11] '_Alignof' '(' type-name ')' /// [C++11] 'alignof' '(' type-id ')' /// [GNU] '&&' identifier /// [C++11] 'noexcept' '(' expression ')' [C++11 5.3.7] @@ -531,9 +539,9 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' /// [GNU] '__null' /// [OBJC] '[' objc-message-expr ']' -/// [OBJC] '@selector' '(' objc-selector-arg ')' -/// [OBJC] '@protocol' '(' identifier ')' -/// [OBJC] '@encode' '(' type-name ')' +/// [OBJC] '\@selector' '(' objc-selector-arg ')' +/// [OBJC] '\@protocol' '(' identifier ')' +/// [OBJC] '\@encode' '(' type-name ')' /// [OBJC] objc-string-literal /// [C++] simple-type-specifier '(' expression-list[opt] ')' [C++ 5.2.3] /// [C++11] simple-type-specifier braced-init-list [C++11 5.2.3] @@ -641,6 +649,7 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback { /// [Embarcadero] expression-trait: /// '__is_lvalue_expr' /// '__is_rvalue_expr' +/// \endverbatim /// ExprResult Parser::ParseCastExpression(bool isUnaryExpression, bool isAddressOfOperand, @@ -846,6 +855,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, break; case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2] case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU] + case tok::kw_L__FUNCTION__: // primary-expression: L__FUNCTION__ [MS] case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU] Res = Actions.ActOnPredefinedExpr(Tok.getLocation(), SavedKind); ConsumeToken(); @@ -912,12 +922,15 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, Res = Actions.ActOnUnaryOp(getCurScope(), SavedLoc, SavedKind, Res.get()); return move(Res); } - case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression - // unary-expression: 'sizeof' '(' type-name ')' - case tok::kw_alignof: + case tok::kw__Alignof: // unary-expression: '_Alignof' '(' type-name ')' + if (!getLangOpts().C11) + Diag(Tok, diag::ext_c11_alignment) << Tok.getName(); + // fallthrough + case tok::kw_alignof: // unary-expression: 'alignof' '(' type-id ')' case tok::kw___alignof: // unary-expression: '__alignof' unary-expression // unary-expression: '__alignof' '(' type-name ')' - // unary-expression: 'alignof' '(' type-id ')' + case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression + // unary-expression: 'sizeof' '(' type-name ')' case tok::kw_vec_step: // unary-expression: OpenCL 'vec_step' expression return ParseUnaryExprOrTypeTraitExpression(); case tok::ampamp: { // unary-expression: '&&' identifier @@ -1228,9 +1241,10 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, return ParsePostfixExpressionSuffix(Res); } -/// ParsePostfixExpressionSuffix - Once the leading part of a postfix-expression -/// is parsed, this method parses any suffixes that apply. +/// \brief Once the leading part of a postfix-expression is parsed, this +/// method parses any suffixes that apply. /// +/// \verbatim /// postfix-expression: [C99 6.5.2] /// primary-expression /// postfix-expression '[' expression ']' @@ -1246,7 +1260,7 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression, /// argument-expression-list: [C99 6.5.2] /// argument-expression ...[opt] /// argument-expression-list ',' assignment-expression ...[opt] -/// +/// \endverbatim ExprResult Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { // Now that the primary-expression piece of the postfix-expression has been @@ -1498,11 +1512,13 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { /// type-id. OpTok is the operand token (typeof/sizeof/alignof). Returns the /// expression (isCastExpr == false) or the type (isCastExpr == true). /// +/// \verbatim /// unary-expression: [C99 6.5.3] /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' +/// [C11] '_Alignof' '(' type-name ')' /// [C++0x] 'alignof' '(' type-id ')' /// /// [GNU] typeof-specifier: @@ -1513,7 +1529,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { /// [OpenCL 1.1 6.11.12] vec_step built-in function: /// vec_step ( expressions ) /// vec_step ( type-name ) -/// +/// \endverbatim ExprResult Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, bool &isCastExpr, @@ -1522,7 +1538,7 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, assert((OpTok.is(tok::kw_typeof) || OpTok.is(tok::kw_sizeof) || OpTok.is(tok::kw___alignof) || OpTok.is(tok::kw_alignof) || - OpTok.is(tok::kw_vec_step)) && + OpTok.is(tok::kw__Alignof) || OpTok.is(tok::kw_vec_step)) && "Not a typeof/sizeof/alignof/vec_step expression!"); ExprResult Operand; @@ -1571,17 +1587,22 @@ Parser::ParseExprAfterUnaryExprOrTypeTrait(const Token &OpTok, } -/// ParseUnaryExprOrTypeTraitExpression - Parse a sizeof or alignof expression. +/// \brief Parse a sizeof or alignof expression. +/// +/// \verbatim /// unary-expression: [C99 6.5.3] /// 'sizeof' unary-expression /// 'sizeof' '(' type-name ')' /// [C++0x] 'sizeof' '...' '(' identifier ')' /// [GNU] '__alignof' unary-expression /// [GNU] '__alignof' '(' type-name ')' +/// [C11] '_Alignof' '(' type-name ')' /// [C++0x] 'alignof' '(' type-id ')' +/// \endverbatim ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { - assert((Tok.is(tok::kw_sizeof) || Tok.is(tok::kw___alignof) - || Tok.is(tok::kw_alignof) || Tok.is(tok::kw_vec_step)) && + assert((Tok.is(tok::kw_sizeof) || Tok.is(tok::kw___alignof) || + Tok.is(tok::kw_alignof) || Tok.is(tok::kw__Alignof) || + Tok.is(tok::kw_vec_step)) && "Not a sizeof/alignof/vec_step expression!"); Token OpTok = Tok; ConsumeToken(); @@ -1629,7 +1650,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { RParenLoc); } - if (OpTok.is(tok::kw_alignof)) + if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw__Alignof)) Diag(OpTok, diag::warn_cxx98_compat_alignof); EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated); @@ -1643,7 +1664,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { CastRange); UnaryExprOrTypeTrait ExprKind = UETT_SizeOf; - if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw___alignof)) + if (OpTok.is(tok::kw_alignof) || OpTok.is(tok::kw___alignof) || + OpTok.is(tok::kw__Alignof)) ExprKind = UETT_AlignOf; else if (OpTok.is(tok::kw_vec_step)) ExprKind = UETT_VecStep; @@ -1667,6 +1689,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { /// ParseBuiltinPrimaryExpression /// +/// \verbatim /// primary-expression: [C99 6.5.1] /// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' /// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' @@ -1679,7 +1702,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { /// [GNU] identifier /// [GNU] offsetof-member-designator '.' identifier /// [GNU] offsetof-member-designator '[' expression ']' -/// +/// \endverbatim ExprResult Parser::ParseBuiltinPrimaryExpression() { ExprResult Res; const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); @@ -1869,6 +1892,7 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() { /// in ExprType. If stopIfCastExpr is true, it will only return the parsed type, /// not the parsed cast-expression. /// +/// \verbatim /// primary-expression: [C99 6.5.1] /// '(' expression ')' /// [GNU] '(' compound-statement ')' (if !ParenExprOnly) @@ -1883,12 +1907,12 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() { /// (__bridge type-name) cast-expression /// (__bridge_transfer type-name) cast-expression /// (__bridge_retained type-name) cast-expression +/// \endverbatim ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, bool isTypeCast, ParsedType &CastTy, SourceLocation &RParenLoc) { assert(Tok.is(tok::l_paren) && "Not a paren expr!"); - GreaterThanIsOperatorScope G(GreaterThanIsOperator, true); BalancedDelimiterTracker T(*this, tok::l_paren); if (T.consumeOpen()) return ExprError(); @@ -2102,10 +2126,11 @@ Parser::ParseParenExpression(ParenParseOption &ExprType, bool stopIfCastExpr, /// ParseCompoundLiteralExpression - We have parsed the parenthesized type-name /// and we are at the left brace. /// +/// \verbatim /// postfix-expression: [C99 6.5.2] /// '(' type-name ')' '{' initializer-list '}' /// '(' type-name ')' '{' initializer-list ',' '}' -/// +/// \endverbatim ExprResult Parser::ParseCompoundLiteralExpression(ParsedType Ty, SourceLocation LParenLoc, @@ -2123,8 +2148,10 @@ Parser::ParseCompoundLiteralExpression(ParsedType Ty, /// form string literals, and also handles string concatenation [C99 5.1.1.2, /// translation phase #6]. /// +/// \verbatim /// primary-expression: [C99 6.5.1] /// string-literal +/// \verbatim ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { assert(isTokenStringLiteral() && "Not a string literal!"); @@ -2145,6 +2172,7 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { /// ParseGenericSelectionExpression - Parse a C11 generic-selection /// [C11 6.5.1.1]. /// +/// \verbatim /// generic-selection: /// _Generic ( assignment-expression , generic-assoc-list ) /// generic-assoc-list: @@ -2153,6 +2181,7 @@ ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) { /// generic-association: /// type-name : assignment-expression /// default : assignment-expression +/// \endverbatim ExprResult Parser::ParseGenericSelectionExpression() { assert(Tok.is(tok::kw__Generic) && "_Generic keyword expected"); SourceLocation KeyLoc = ConsumeToken(); @@ -2239,6 +2268,7 @@ ExprResult Parser::ParseGenericSelectionExpression() { /// ParseExpressionList - Used for C/C++ (argument-)expression-list. /// +/// \verbatim /// argument-expression-list: /// assignment-expression /// argument-expression-list , assignment-expression @@ -2257,7 +2287,7 @@ ExprResult Parser::ParseGenericSelectionExpression() { /// [C++0x] initializer-clause: /// [C++0x] assignment-expression /// [C++0x] braced-init-list -/// +/// \endverbatim bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, SmallVectorImpl &CommaLocs, void (Sema::*Completer)(Scope *S, @@ -2297,10 +2327,11 @@ bool Parser::ParseExpressionList(SmallVectorImpl &Exprs, /// ParseBlockId - Parse a block-id, which roughly looks like int (int x). /// +/// \verbatim /// [clang] block-id: /// [clang] specifier-qualifier-list block-declarator -/// -void Parser::ParseBlockId() { +/// \endverbatim +void Parser::ParseBlockId(SourceLocation CaretLoc) { if (Tok.is(tok::code_completion)) { Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Type); return cutOffParsing(); @@ -2320,18 +2351,19 @@ void Parser::ParseBlockId() { MaybeParseGNUAttributes(DeclaratorInfo); // Inform sema that we are starting a block. - Actions.ActOnBlockArguments(DeclaratorInfo, getCurScope()); + Actions.ActOnBlockArguments(CaretLoc, DeclaratorInfo, getCurScope()); } /// ParseBlockLiteralExpression - Parse a block literal, which roughly looks /// like ^(int x){ return x+1; } /// +/// \verbatim /// block-literal: /// [clang] '^' block-args[opt] compound-statement /// [clang] '^' block-id compound-statement /// [clang] block-args: /// [clang] '(' parameter-list ')' -/// +/// \endverbatim ExprResult Parser::ParseBlockLiteralExpression() { assert(Tok.is(tok::caret) && "block literal starts with ^"); SourceLocation CaretLoc = ConsumeToken(); @@ -2377,13 +2409,13 @@ ExprResult Parser::ParseBlockLiteralExpression() { MaybeParseGNUAttributes(ParamInfo); // Inform sema that we are starting a block. - Actions.ActOnBlockArguments(ParamInfo, getCurScope()); + Actions.ActOnBlockArguments(CaretLoc, ParamInfo, getCurScope()); } else if (!Tok.is(tok::l_brace)) { - ParseBlockId(); + ParseBlockId(CaretLoc); } else { // Otherwise, pretend we saw (void). ParsedAttributes attrs(AttrFactory); - ParamInfo.AddTypeInfo(DeclaratorChunk::getFunction(true, false, + ParamInfo.AddTypeInfo(DeclaratorChunk::getFunction(true, false, false, SourceLocation(), 0, 0, 0, true, SourceLocation(), @@ -2400,7 +2432,7 @@ ExprResult Parser::ParseBlockLiteralExpression() { MaybeParseGNUAttributes(ParamInfo); // Inform sema that we are starting a block. - Actions.ActOnBlockArguments(ParamInfo, getCurScope()); + Actions.ActOnBlockArguments(CaretLoc, ParamInfo, getCurScope()); } diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp index 7152184..afac257 100644 --- a/lib/Parse/ParseExprCXX.cpp +++ b/lib/Parse/ParseExprCXX.cpp @@ -36,7 +36,7 @@ static int SelectDigraphErrorMessage(tok::TokenKind Kind) { } // Are the two tokens adjacent in the same source file? -static bool AreTokensAdjacent(Preprocessor &PP, Token &First, Token &Second) { +bool Parser::areTokensAdjacent(const Token &First, const Token &Second) { SourceManager &SM = PP.getSourceManager(); SourceLocation FirstLoc = SM.getSpellingLoc(First.getLocation()); SourceLocation FirstEnd = FirstLoc.getLocWithOffset(First.getLength()); @@ -80,7 +80,7 @@ void Parser::CheckForTemplateAndDigraph(Token &Next, ParsedType ObjectType, return; Token SecondToken = GetLookAheadToken(2); - if (!SecondToken.is(tok::colon) || !AreTokensAdjacent(PP, Next, SecondToken)) + if (!SecondToken.is(tok::colon) || !areTokensAdjacent(Next, SecondToken)) return; TemplateTy Template; @@ -642,7 +642,13 @@ llvm::Optional Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro){ while (Tok.isNot(tok::r_square)) { if (!first) { if (Tok.isNot(tok::comma)) { - if (Tok.is(tok::code_completion)) { + // Provide a completion for a lambda introducer here. Except + // in Objective-C, where this is Almost Surely meant to be a message + // send. In that case, fail here and let the ObjC message + // expression parser perform the completion. + if (Tok.is(tok::code_completion) && + !(getLangOpts().ObjC1 && Intro.Default == LCD_None && + !Intro.Captures.empty())) { Actions.CodeCompleteLambdaIntroducer(getCurScope(), Intro, /*AfterAmpersand=*/false); ConsumeCodeCompletionToken(); @@ -792,10 +798,10 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( MaybeParseCXX0XAttributes(Attr, &DeclEndLoc); // Parse trailing-return-type[opt]. - ParsedType TrailingReturnType; + TypeResult TrailingReturnType; if (Tok.is(tok::arrow)) { SourceRange Range; - TrailingReturnType = ParseTrailingReturnType(Range).get(); + TrailingReturnType = ParseTrailingReturnType(Range); if (Range.getEnd().isValid()) DeclEndLoc = Range.getEnd(); } @@ -804,7 +810,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( D.AddTypeInfo(DeclaratorChunk::getFunction(/*hasProto=*/true, /*isVariadic=*/EllipsisLoc.isValid(), - EllipsisLoc, + /*isAmbiguous=*/false, EllipsisLoc, ParamInfo.data(), ParamInfo.size(), DS.getTypeQualifiers(), /*RefQualifierIsLValueRef=*/true, @@ -838,10 +844,10 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( } // Parse the return type, if there is one. - ParsedType TrailingReturnType; + TypeResult TrailingReturnType; if (Tok.is(tok::arrow)) { SourceRange Range; - TrailingReturnType = ParseTrailingReturnType(Range).get(); + TrailingReturnType = ParseTrailingReturnType(Range); if (Range.getEnd().isValid()) DeclEndLoc = Range.getEnd(); } @@ -849,6 +855,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( ParsedAttributes Attr(AttrFactory); D.AddTypeInfo(DeclaratorChunk::getFunction(/*hasProto=*/true, /*isVariadic=*/false, + /*isAmbiguous=*/false, /*EllipsisLoc=*/SourceLocation(), /*Params=*/0, /*NumParams=*/0, /*TypeQuals=*/0, @@ -921,7 +928,7 @@ ExprResult Parser::ParseCXXCasts() { // diagnose error, suggest fix, and recover parsing. Token Next = NextToken(); if (Tok.is(tok::l_square) && Tok.getLength() == 2 && Next.is(tok::colon) && - AreTokensAdjacent(PP, Tok, Next)) + areTokensAdjacent(Tok, Next)) FixDigraph(*this, PP, Tok, Next, Kind, /*AtDigraph*/true); if (ExpectAndConsume(tok::less, diag::err_expected_less_after, CastName)) @@ -1235,8 +1242,6 @@ Parser::ParseCXXTypeConstructExpression(const DeclSpec &DS) { MultiExprArg(&InitList, 1), SourceLocation()); } else { - GreaterThanIsOperatorScope G(GreaterThanIsOperator, true); - BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); @@ -1298,7 +1303,12 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut, return true; } + ParsedAttributesWithRange attrs(AttrFactory); + MaybeParseCXX0XAttributes(attrs); + if (!isCXXConditionDeclaration()) { + ProhibitAttributes(attrs); + // Parse the expression. ExprOut = ParseExpression(); // expression DeclOut = 0; @@ -1379,39 +1389,6 @@ bool Parser::ParseCXXCondition(ExprResult &ExprOut, return false; } -/// \brief Determine whether the current token starts a C++ -/// simple-type-specifier. -bool Parser::isCXXSimpleTypeSpecifier() const { - switch (Tok.getKind()) { - case tok::annot_typename: - case tok::kw_short: - case tok::kw_long: - case tok::kw___int64: - case tok::kw___int128: - case tok::kw_signed: - case tok::kw_unsigned: - case tok::kw_void: - case tok::kw_char: - case tok::kw_int: - case tok::kw_half: - case tok::kw_float: - case tok::kw_double: - case tok::kw_wchar_t: - case tok::kw_char16_t: - case tok::kw_char32_t: - case tok::kw_bool: - case tok::kw_decltype: - case tok::kw_typeof: - case tok::kw___underlying_type: - return true; - - default: - break; - } - - return false; -} - /// ParseCXXSimpleTypeSpecifier - [C++ 7.1.5.2] Simple type specifiers. /// This should only be called when the current token is known to be part of /// simple-type-specifier. @@ -2426,10 +2403,14 @@ Parser::ParseCXXDeleteExpression(bool UseGlobal, SourceLocation Start) { // Array delete? bool ArrayDelete = false; if (Tok.is(tok::l_square) && NextToken().is(tok::r_square)) { - // FIXME: This could be the start of a lambda-expression. We should - // disambiguate this, but that will require arbitrary lookahead if - // the next token is '(': - // delete [](int*){ /* ... */ + // C++11 [expr.delete]p1: + // Whenever the delete keyword is followed by empty square brackets, it + // shall be interpreted as [array delete]. + // [Footnote: A lambda expression with a lambda-introducer that consists + // of empty square brackets can follow the delete keyword if + // the lambda expression is enclosed in parentheses.] + // FIXME: Produce a better diagnostic if the '[]' is unambiguously a + // lambda-introducer. ArrayDelete = true; BalancedDelimiterTracker T(*this, tok::l_square); diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp index 789a8ae..db35a38 100644 --- a/lib/Parse/ParseObjc.cpp +++ b/lib/Parse/ParseObjc.cpp @@ -308,16 +308,16 @@ public: MethodImplKind(MethodImplKind) { } - Decl *invoke(FieldDeclarator &FD) { + void invoke(ParsingFieldDeclarator &FD) { if (FD.D.getIdentifier() == 0) { P.Diag(AtLoc, diag::err_objc_property_requires_field_name) << FD.D.getSourceRange(); - return 0; + return; } if (FD.BitfieldSize) { P.Diag(AtLoc, diag::err_objc_property_bitfield) << FD.D.getSourceRange(); - return 0; + return; } // Install the property declarator into interfaceDecl. @@ -344,7 +344,7 @@ public: if (!isOverridingProperty) Props.push_back(Property); - return Property; + FD.complete(Property); } }; @@ -375,9 +375,9 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey, while (1) { // If this is a method prototype, parse it. if (Tok.is(tok::minus) || Tok.is(tok::plus)) { - Decl *methodPrototype = - ParseObjCMethodPrototype(MethodImplKind, false); - allMethods.push_back(methodPrototype); + if (Decl *methodPrototype = + ParseObjCMethodPrototype(MethodImplKind, false)) + allMethods.push_back(methodPrototype); // Consume the ';' here, since ParseObjCMethodPrototype() is re-used for // method definitions. if (ExpectAndConsumeSemi(diag::err_expected_semi_after_method_proto)) { @@ -420,7 +420,7 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey, // erroneous r_brace would cause an infinite loop if not handled here. if (Tok.is(tok::r_brace)) break; - ParsedAttributes attrs(AttrFactory); + ParsedAttributesWithRange attrs(AttrFactory); allTUVariables.push_back(ParseDeclarationOrFunctionDefinition(attrs)); continue; } @@ -493,7 +493,7 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey, OCDS, AtLoc, LParenLoc, MethodImplKind); // Parse all the comma separated declarators. - DeclSpec DS(AttrFactory); + ParsingDeclSpec DS(*this); ParseStructDeclaration(DS, Callback); ExpectAndConsume(tok::semi, diag::err_expected_semi_decl_list); @@ -894,6 +894,7 @@ ParsedType Parser::ParseObjCTypeName(ObjCDeclSpec &DS, DeclSpec declSpec(AttrFactory); declSpec.setObjCQualifiers(&DS); ParseSpecifierQualifierList(declSpec); + declSpec.SetRangeEnd(Tok.getLocation()); Declarator declarator(declSpec, context); ParseDeclarator(declarator); @@ -965,7 +966,7 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, tok::TokenKind mType, tok::ObjCKeywordKind MethodImplKind, bool MethodDefinition) { - ParsingDeclRAIIObject PD(*this); + ParsingDeclRAIIObject PD(*this, ParsingDeclRAIIObject::NoParent); if (Tok.is(tok::code_completion)) { Actions.CodeCompleteObjCMethodDecl(getCurScope(), mType == tok::minus, @@ -1000,8 +1001,8 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, if (!SelIdent && Tok.isNot(tok::colon)) { // missing selector name. Diag(Tok, diag::err_expected_selector_for_method) << SourceRange(mLoc, Tok.getLocation()); - // Skip until we get a ; or {}. - SkipUntil(tok::r_brace); + // Skip until we get a ; or @. + SkipUntil(tok::at, true /*StopAtSemi*/, true /*don't consume*/); return 0; } @@ -1105,7 +1106,7 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, } bool isVariadic = false; - + bool cStyleParamWarned = false; // Parse the (optional) parameter list. while (Tok.is(tok::comma)) { ConsumeToken(); @@ -1114,6 +1115,10 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, ConsumeToken(); break; } + if (!cStyleParamWarned) { + Diag(Tok, diag::warn_cstyle_param); + cStyleParamWarned = true; + } DeclSpec DS(AttrFactory); ParseDeclarationSpecifiers(DS); // Parse the declarator. @@ -1125,7 +1130,6 @@ Decl *Parser::ParseObjCMethodDecl(SourceLocation mLoc, ParmDecl.getIdentifierLoc(), Param, 0)); - } // FIXME: Add support for optional parameter list... @@ -1258,9 +1262,7 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl, // Check for extraneous top-level semicolon. if (Tok.is(tok::semi)) { - Diag(Tok, diag::ext_extra_ivar_semi) - << FixItHint::CreateRemoval(Tok.getLocation()); - ConsumeToken(); + ConsumeExtraSemi(InstanceVariableList); continue; } @@ -1304,7 +1306,7 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl, P(P), IDecl(IDecl), visibility(V), AllIvarDecls(AllIvarDecls) { } - Decl *invoke(FieldDeclarator &FD) { + void invoke(ParsingFieldDeclarator &FD) { P.Actions.ActOnObjCContainerStartDefinition(IDecl); // Install the declarator into the interface decl. Decl *Field @@ -1314,12 +1316,12 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl, P.Actions.ActOnObjCContainerFinishDefinition(); if (Field) AllIvarDecls.push_back(Field); - return Field; + FD.complete(Field); } } Callback(*this, interfaceDecl, visibility, AllIvarDecls); // Parse all the comma separated declarators. - DeclSpec DS(AttrFactory); + ParsingDeclSpec DS(*this); ParseStructDeclaration(DS, Callback); if (Tok.is(tok::semi)) { @@ -1348,15 +1350,15 @@ void Parser::ParseObjCClassInstanceVariables(Decl *interfaceDecl, /// objc-protocol-forward-reference /// /// objc-protocol-definition: -/// @protocol identifier +/// \@protocol identifier /// objc-protocol-refs[opt] /// objc-interface-decl-list -/// @end +/// \@end /// /// objc-protocol-forward-reference: -/// @protocol identifier-list ';' +/// \@protocol identifier-list ';' /// -/// "@protocol identifier ;" should be resolved as "@protocol +/// "\@protocol identifier ;" should be resolved as "\@protocol /// identifier-list ;": objc-interface-decl-list may not start with a /// semicolon in the first alternative if objc-protocol-refs are omitted. Parser::DeclGroupPtrTy @@ -1573,10 +1575,16 @@ void Parser::ObjCImplParsingDataRAII::finish(SourceRange AtEnd) { assert(!Finished); P.Actions.DefaultSynthesizeProperties(P.getCurScope(), Dcl); for (size_t i = 0; i < LateParsedObjCMethods.size(); ++i) - P.ParseLexedObjCMethodDefs(*LateParsedObjCMethods[i]); + P.ParseLexedObjCMethodDefs(*LateParsedObjCMethods[i], + true/*Methods*/); P.Actions.ActOnAtEnd(P.getCurScope(), AtEnd); + if (HasCFunction) + for (size_t i = 0; i < LateParsedObjCMethods.size(); ++i) + P.ParseLexedObjCMethodDefs(*LateParsedObjCMethods[i], + false/*c-functions*/); + /// \brief Clear and free the cached objc methods. for (LateParsedObjCMethodContainer::iterator I = LateParsedObjCMethods.begin(), @@ -1608,8 +1616,8 @@ Decl *Parser::ParseObjCAtAliasDeclaration(SourceLocation atLoc) { SourceLocation classLoc = ConsumeToken(); // consume class-name; ExpectAndConsume(tok::semi, diag::err_expected_semi_after, "@compatibility_alias"); - return Actions.ActOnCompatiblityAlias(atLoc, aliasId, aliasLoc, - classId, classLoc); + return Actions.ActOnCompatibilityAlias(atLoc, aliasId, aliasLoc, + classId, classLoc); } /// property-synthesis: @@ -1913,6 +1921,43 @@ Parser::ParseObjCAutoreleasePoolStmt(SourceLocation atLoc) { AutoreleasePoolBody.take()); } +/// StashAwayMethodOrFunctionBodyTokens - Consume the tokens and store them +/// for later parsing. +void Parser::StashAwayMethodOrFunctionBodyTokens(Decl *MDecl) { + LexedMethod* LM = new LexedMethod(this, MDecl); + CurParsedObjCImpl->LateParsedObjCMethods.push_back(LM); + CachedTokens &Toks = LM->Toks; + // Begin by storing the '{' or 'try' or ':' token. + Toks.push_back(Tok); + if (Tok.is(tok::kw_try)) { + ConsumeToken(); + if (Tok.is(tok::colon)) { + Toks.push_back(Tok); + ConsumeToken(); + while (Tok.isNot(tok::l_brace)) { + ConsumeAndStoreUntil(tok::l_paren, Toks, /*StopAtSemi=*/false); + ConsumeAndStoreUntil(tok::r_paren, Toks, /*StopAtSemi=*/false); + } + } + Toks.push_back(Tok); // also store '{' + } + else if (Tok.is(tok::colon)) { + ConsumeToken(); + while (Tok.isNot(tok::l_brace)) { + ConsumeAndStoreUntil(tok::l_paren, Toks, /*StopAtSemi=*/false); + ConsumeAndStoreUntil(tok::r_paren, Toks, /*StopAtSemi=*/false); + } + Toks.push_back(Tok); // also store '{' + } + ConsumeBrace(); + // Consume everything up to (and including) the matching right brace. + ConsumeAndStoreUntil(tok::r_brace, Toks, /*StopAtSemi=*/false); + while (Tok.is(tok::kw_catch)) { + ConsumeAndStoreUntil(tok::l_brace, Toks, /*StopAtSemi=*/false); + ConsumeAndStoreUntil(tok::r_brace, Toks, /*StopAtSemi=*/false); + } +} + /// objc-method-def: objc-method-proto ';'[opt] '{' body '}' /// Decl *Parser::ParseObjCMethodDefinition() { @@ -1950,23 +1995,10 @@ Decl *Parser::ParseObjCMethodDefinition() { // Allow the rest of sema to find private method decl implementations. Actions.AddAnyMethodToGlobalPool(MDecl); - - if (CurParsedObjCImpl) { - // Consume the tokens and store them for later parsing. - LexedMethod* LM = new LexedMethod(this, MDecl); - CurParsedObjCImpl->LateParsedObjCMethods.push_back(LM); - CachedTokens &Toks = LM->Toks; - // Begin by storing the '{' token. - Toks.push_back(Tok); - ConsumeBrace(); - // Consume everything up to (and including) the matching right brace. - ConsumeAndStoreUntil(tok::r_brace, Toks, /*StopAtSemi=*/false); - - } else { - ConsumeBrace(); - SkipUntil(tok::r_brace, /*StopAtSemi=*/false); - } - + assert (CurParsedObjCImpl + && "ParseObjCMethodDefinition - Method out of @implementation"); + // Consume the tokens and store them for later parsing. + StashAwayMethodOrFunctionBodyTokens(MDecl); return MDecl; } @@ -2066,6 +2098,10 @@ ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) { // Objective-C dictionary literal return ParsePostfixExpressionSuffix(ParseObjCDictionaryLiteral(AtLoc)); + case tok::l_paren: + // Objective-C boxed expression + return ParsePostfixExpressionSuffix(ParseObjCBoxedExpr(AtLoc)); + default: if (Tok.getIdentifierInfo() == 0) return ExprError(Diag(AtLoc, diag::err_unexpected_at)); @@ -2077,8 +2113,23 @@ ExprResult Parser::ParseObjCAtExpression(SourceLocation AtLoc) { return ParsePostfixExpressionSuffix(ParseObjCProtocolExpression(AtLoc)); case tok::objc_selector: return ParsePostfixExpressionSuffix(ParseObjCSelectorExpression(AtLoc)); - default: - return ExprError(Diag(AtLoc, diag::err_unexpected_at)); + default: { + const char *str = 0; + if (GetLookAheadToken(1).is(tok::l_brace)) { + char ch = Tok.getIdentifierInfo()->getNameStart()[0]; + str = + ch == 't' ? "try" + : (ch == 'f' ? "finally" + : (ch == 'a' ? "autoreleasepool" : 0)); + } + if (str) { + SourceLocation kwLoc = Tok.getLocation(); + return ExprError(Diag(AtLoc, diag::err_unexpected_at) << + FixItHint::CreateReplacement(kwLoc, str)); + } + else + return ExprError(Diag(AtLoc, diag::err_unexpected_at)); + } } } } @@ -2112,7 +2163,7 @@ bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) { Tok.is(tok::kw_typename) || Tok.is(tok::annot_cxxscope)) TryAnnotateTypeOrScopeToken(); - if (!isCXXSimpleTypeSpecifier()) { + if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) { // objc-receiver: // expression ExprResult Receiver = ParseExpression(); @@ -2449,10 +2500,14 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc, } // Parse the, optional, argument list, comma separated. while (Tok.is(tok::comma)) { - ConsumeToken(); // Eat the ','. + SourceLocation commaLoc = ConsumeToken(); // Eat the ','. /// Parse the expression after ',' ExprResult Res(ParseAssignmentExpression()); if (Res.isInvalid()) { + if (Tok.is(tok::colon)) { + Diag(commaLoc, diag::note_extra_comma_message_arg) << + FixItHint::CreateRemoval(commaLoc); + } // We must manually skip to a ']', otherwise the expression skipper will // stop at the ']' when it skips to the ';'. We want it to skip beyond // the enclosing expression. @@ -2580,6 +2635,31 @@ ExprResult Parser::ParseObjCNumericLiteral(SourceLocation AtLoc) { return Owned(Actions.BuildObjCNumericLiteral(AtLoc, Lit.take())); } +/// ParseObjCBoxedExpr - +/// objc-box-expression: +/// @( assignment-expression ) +ExprResult +Parser::ParseObjCBoxedExpr(SourceLocation AtLoc) { + if (Tok.isNot(tok::l_paren)) + return ExprError(Diag(Tok, diag::err_expected_lparen_after) << "@"); + + BalancedDelimiterTracker T(*this, tok::l_paren); + T.consumeOpen(); + ExprResult ValueExpr(ParseAssignmentExpression()); + if (T.consumeClose()) + return ExprError(); + + if (ValueExpr.isInvalid()) + return ExprError(); + + // Wrap the sub-expression in a parenthesized expression, to distinguish + // a boxed expression from a literal. + SourceLocation LPLoc = T.getOpenLocation(), RPLoc = T.getCloseLocation(); + ValueExpr = Actions.ActOnParenExpr(LPLoc, RPLoc, ValueExpr.take()); + return Owned(Actions.BuildObjCBoxedExpr(SourceRange(AtLoc, RPLoc), + ValueExpr.take())); +} + ExprResult Parser::ParseObjCArrayLiteral(SourceLocation AtLoc) { ExprVector ElementExprs(Actions); // array elements. ConsumeBracket(); // consume the l_square. @@ -2698,7 +2778,7 @@ Parser::ParseObjCEncodeExpression(SourceLocation AtLoc) { } /// objc-protocol-expression -/// @protocol ( protocol-name ) +/// \@protocol ( protocol-name ) ExprResult Parser::ParseObjCProtocolExpression(SourceLocation AtLoc) { SourceLocation ProtoLoc = ConsumeToken(); @@ -2713,12 +2793,13 @@ Parser::ParseObjCProtocolExpression(SourceLocation AtLoc) { return ExprError(Diag(Tok, diag::err_expected_ident)); IdentifierInfo *protocolId = Tok.getIdentifierInfo(); - ConsumeToken(); + SourceLocation ProtoIdLoc = ConsumeToken(); T.consumeClose(); return Owned(Actions.ParseObjCProtocolExpression(protocolId, AtLoc, ProtoLoc, T.getOpenLocation(), + ProtoIdLoc, T.getCloseLocation())); } @@ -2785,8 +2866,15 @@ ExprResult Parser::ParseObjCSelectorExpression(SourceLocation AtLoc) { T.getCloseLocation())); } -Decl *Parser::ParseLexedObjCMethodDefs(LexedMethod &LM) { - +void Parser::ParseLexedObjCMethodDefs(LexedMethod &LM, bool parseMethod) { + // MCDecl might be null due to error in method or c-function prototype, etc. + Decl *MCDecl = LM.D; + bool skip = MCDecl && + ((parseMethod && !Actions.isObjCMethodDecl(MCDecl)) || + (!parseMethod && Actions.isObjCMethodDecl(MCDecl))); + if (skip) + return; + // Save the current token position. SourceLocation OrigLoc = Tok.getLocation(); @@ -2796,40 +2884,32 @@ Decl *Parser::ParseLexedObjCMethodDefs(LexedMethod &LM) { LM.Toks.push_back(Tok); PP.EnterTokenStream(LM.Toks.data(), LM.Toks.size(), true, false); - // MDecl might be null due to error in method prototype, etc. - Decl *MDecl = LM.D; // Consume the previously pushed token. ConsumeAnyToken(); - assert(Tok.is(tok::l_brace) && "Inline objective-c method not starting with '{'"); - SourceLocation BraceLoc = Tok.getLocation(); - // Enter a scope for the method body. + assert((Tok.is(tok::l_brace) || Tok.is(tok::kw_try) || + Tok.is(tok::colon)) && + "Inline objective-c method not starting with '{' or 'try' or ':'"); + // Enter a scope for the method or c-fucntion body. ParseScope BodyScope(this, - Scope::ObjCMethodScope|Scope::FnScope|Scope::DeclScope); - - // Tell the actions module that we have entered a method definition with the - // specified Declarator for the method. - Actions.ActOnStartOfObjCMethodDef(getCurScope(), MDecl); - - if (SkipFunctionBodies && trySkippingFunctionBody()) { - BodyScope.Exit(); - return Actions.ActOnFinishFunctionBody(MDecl, 0); - } - - StmtResult FnBody(ParseCompoundStatementBody()); + parseMethod + ? Scope::ObjCMethodScope|Scope::FnScope|Scope::DeclScope + : Scope::FnScope|Scope::DeclScope); - // If the function body could not be parsed, make a bogus compoundstmt. - if (FnBody.isInvalid()) { - Sema::CompoundScopeRAII CompoundScope(Actions); - FnBody = Actions.ActOnCompoundStmt(BraceLoc, BraceLoc, - MultiStmtArg(Actions), false); + // Tell the actions module that we have entered a method or c-function definition + // with the specified Declarator for the method/function. + if (parseMethod) + Actions.ActOnStartOfObjCMethodDef(getCurScope(), MCDecl); + else + Actions.ActOnStartOfFunctionDef(getCurScope(), MCDecl); + if (Tok.is(tok::kw_try)) + MCDecl = ParseFunctionTryBlock(MCDecl, BodyScope); + else { + if (Tok.is(tok::colon)) + ParseConstructorInitializer(MCDecl); + MCDecl = ParseFunctionStatementBody(MCDecl, BodyScope); } - - // Leave the function body scope. - BodyScope.Exit(); - - MDecl = Actions.ActOnFinishFunctionBody(MDecl, FnBody.take()); - + if (Tok.getLocation() != OrigLoc) { // Due to parsing error, we either went over the cached tokens or // there are still cached tokens left. If it's the latter case skip the @@ -2842,5 +2922,5 @@ Decl *Parser::ParseLexedObjCMethodDefs(LexedMethod &LM) { ConsumeAnyToken(); } - return MDecl; + return; } diff --git a/lib/Parse/ParsePragma.h b/lib/Parse/ParsePragma.h index ebb185a..fef6960 100644 --- a/lib/Parse/ParsePragma.h +++ b/lib/Parse/ParsePragma.h @@ -30,10 +30,9 @@ public: }; class PragmaGCCVisibilityHandler : public PragmaHandler { - Sema &Actions; public: - explicit PragmaGCCVisibilityHandler(Sema &A) : PragmaHandler("visibility"), - Actions(A) {} + explicit PragmaGCCVisibilityHandler(Sema &/*A*/) + : PragmaHandler("visibility") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken); @@ -70,11 +69,9 @@ public: }; class PragmaUnusedHandler : public PragmaHandler { - Sema &Actions; - Parser &parser; public: - PragmaUnusedHandler(Sema &A, Parser& p) - : PragmaHandler("unused"), Actions(A), parser(p) {} + PragmaUnusedHandler(Sema &/*A*/) + : PragmaHandler("unused") {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken); @@ -102,10 +99,9 @@ public: class PragmaOpenCLExtensionHandler : public PragmaHandler { Sema &Actions; - Parser &parser; public: - PragmaOpenCLExtensionHandler(Sema &S, Parser& p) : - PragmaHandler("EXTENSION"), Actions(S), parser(p) {} + PragmaOpenCLExtensionHandler(Sema &A) : + PragmaHandler("EXTENSION"), Actions(A) {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken); }; @@ -113,10 +109,9 @@ public: class PragmaFPContractHandler : public PragmaHandler { Sema &Actions; - Parser &parser; public: - PragmaFPContractHandler(Sema &S, Parser& p) : - PragmaHandler("FP_CONTRACT"), Actions(S), parser(p) {} + PragmaFPContractHandler(Sema &A) : + PragmaHandler("FP_CONTRACT"), Actions(A) {} virtual void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer, Token &FirstToken); }; diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index 44320df..d2e4309 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -20,6 +20,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallString.h" using namespace clang; //===----------------------------------------------------------------------===// @@ -771,7 +772,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { DeclsInGroup.data(), DeclsInGroup.size()); StmtResult R = Actions.ActOnDeclStmt(Res, LabelLoc, Tok.getLocation()); - ExpectAndConsume(tok::semi, diag::err_expected_semi_declaration); + ExpectAndConsumeSemi(diag::err_expected_semi_declaration); if (R.isUsable()) Stmts.push_back(R.release()); } @@ -895,6 +896,16 @@ bool Parser::ParseParenExprOrCondition(ExprResult &ExprResult, // Otherwise the condition is valid or the rparen is present. T.consumeClose(); + + // Check for extraneous ')'s to catch things like "if (foo())) {". We know + // that all callers are looking for a statement after the condition, so ")" + // isn't valid. + while (Tok.is(tok::r_paren)) { + Diag(Tok, diag::err_extraneous_rparen_in_condition) + << FixItHint::CreateRemoval(Tok.getLocation()); + ConsumeParen(); + } + return false; } @@ -938,7 +949,7 @@ StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) { if (ParseParenExprOrCondition(CondExp, CondVar, IfLoc, true)) return StmtError(); - FullExprArg FullCondExp(Actions.MakeFullExpr(CondExp.get())); + FullExprArg FullCondExp(Actions.MakeFullExpr(CondExp.get(), IfLoc)); // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this @@ -1164,7 +1175,7 @@ StmtResult Parser::ParseWhileStatement(SourceLocation *TrailingElseLoc) { if (ParseParenExprOrCondition(Cond, CondVar, WhileLoc, true)) return StmtError(); - FullExprArg FullCond(Actions.MakeFullExpr(Cond.get())); + FullExprArg FullCond(Actions.MakeFullExpr(Cond.get(), WhileLoc)); // C99 6.8.5p5 - In C99, the body of the if statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this @@ -1248,6 +1259,12 @@ StmtResult Parser::ParseDoStatement() { // Parse the parenthesized condition. BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); + + // FIXME: Do not just parse the attribute contents and throw them away + ParsedAttributesWithRange attrs(AttrFactory); + MaybeParseCXX0XAttributes(attrs); + ProhibitAttributes(attrs); + ExprResult Cond = ParseExpression(); T.consumeClose(); DoScope.Exit(); @@ -1288,7 +1305,8 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { return StmtError(); } - bool C99orCXXorObjC = getLangOpts().C99 || getLangOpts().CPlusPlus || getLangOpts().ObjC1; + bool C99orCXXorObjC = getLangOpts().C99 || getLangOpts().CPlusPlus || + getLangOpts().ObjC1; // C99 6.8.5p5 - In C99, the for statement is a block. This is not // the case for C90. Start the loop scope. @@ -1336,8 +1354,12 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { return StmtError(); } + ParsedAttributesWithRange attrs(AttrFactory); + MaybeParseCXX0XAttributes(attrs); + // Parse the first part of the for specifier. if (Tok.is(tok::semi)) { // for (; + ProhibitAttributes(attrs); // no first part, eat the ';'. ConsumeToken(); } else if (isForInitDeclaration()) { // for (int X = 4; @@ -1382,6 +1404,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { Diag(Tok, diag::err_expected_semi_for); } } else { + ProhibitAttributes(attrs); Value = ParseExpression(); ForEach = isTokIdentifier_in(); @@ -1441,7 +1464,7 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { Second.get()); } SecondPartIsInvalid = Second.isInvalid(); - SecondPart = Actions.MakeFullExpr(Second.get()); + SecondPart = Actions.MakeFullExpr(Second.get(), ForLoc); } if (Tok.isNot(tok::semi)) { @@ -1469,6 +1492,8 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { // statememt before parsing the body, in order to be able to deduce the type // of an auto-typed loop variable. StmtResult ForRangeStmt; + StmtResult ForEachStmt; + if (ForRange) { ForRangeStmt = Actions.ActOnCXXForRangeStmt(ForLoc, T.getOpenLocation(), FirstPart.take(), @@ -1480,9 +1505,10 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { // Similarly, we need to do the semantic analysis for a for-range // statement immediately in order to close over temporaries correctly. } else if (ForEach) { - if (!Collection.isInvalid()) - Collection = - Actions.ActOnObjCForCollectionOperand(ForLoc, Collection.take()); + ForEachStmt = Actions.ActOnObjCForCollectionStmt(ForLoc, T.getOpenLocation(), + FirstPart.take(), + Collection.take(), + T.getCloseLocation()); } // C99 6.8.5p5 - In C99, the body of the if statement is a scope, even if @@ -1512,11 +1538,8 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { return StmtError(); if (ForEach) - return Actions.ActOnObjCForCollectionStmt(ForLoc, T.getOpenLocation(), - FirstPart.take(), - Collection.take(), - T.getCloseLocation(), - Body.take()); + return Actions.FinishObjCForCollectionStmt(ForEachStmt.take(), + Body.take()); if (ForRange) return Actions.FinishCXXForRangeStmt(ForRangeStmt.take(), Body.take()); @@ -1617,9 +1640,24 @@ StmtResult Parser::ParseReturnStatement() { /// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, /// this routine is called to collect the tokens for an MS asm statement. +/// +/// [MS] ms-asm-statement: +/// ms-asm-block +/// ms-asm-block ms-asm-statement +/// +/// [MS] ms-asm-block: +/// '__asm' ms-asm-line '\n' +/// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] +/// +/// [MS] ms-asm-instruction-block +/// ms-asm-line +/// ms-asm-line '\n' ms-asm-instruction-block +/// StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { SourceManager &SrcMgr = PP.getSourceManager(); SourceLocation EndLoc = AsmLoc; + SmallVector AsmToks; + SmallVector LineEnds; do { bool InBraces = false; unsigned short savedBraceCount = 0; @@ -1648,8 +1686,10 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { // If we hit EOF, we're done, period. if (Tok.is(tok::eof)) break; - // When we consume the closing brace, we're done. - if (InBraces && BraceCount == savedBraceCount) + + // The asm keyword is a statement separator, so multiple asm statements + // are allowed. + if (!InAsmComment && Tok.is(tok::kw_asm)) break; if (!InAsmComment && Tok.is(tok::semi)) { @@ -1681,18 +1721,28 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { break; } } + if (!InAsmComment && InBraces && Tok.is(tok::r_brace) && + BraceCount == (savedBraceCount + 1)) { + // Consume the closing brace, and finish + EndLoc = ConsumeBrace(); + break; + } // Consume the next token; make sure we don't modify the brace count etc. // if we are in a comment. EndLoc = TokLoc; if (InAsmComment) PP.Lex(Tok); - else + else { + AsmToks.push_back(Tok); ConsumeAnyToken(); + } TokLoc = Tok.getLocation(); ++NumTokensRead; } while (1); + LineEnds.push_back(AsmToks.size()); + if (InBraces && BraceCount != savedBraceCount) { // __asm without closing brace (this can happen at EOF). Diag(Tok, diag::err_expected_rbrace); @@ -1709,24 +1759,10 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { break; EndLoc = ConsumeToken(); } while (1); - // FIXME: Need to actually grab the data and pass it on to Sema. Ideally, - // what Sema wants is a string of the entire inline asm, with one instruction - // per line and all the __asm keywords stripped out, and a way of mapping - // from any character of that string to its location in the original source - // code. I'm not entirely sure how to go about that, though. - Token t; - t.setKind(tok::string_literal); - t.setLiteralData("\"/*FIXME: not done*/\""); - t.clearFlag(Token::NeedsCleaning); - t.setLength(21); - ExprResult AsmString(Actions.ActOnStringLiteral(&t, 1)); - ExprVector Constraints(Actions); - ExprVector Exprs(Actions); - ExprVector Clobbers(Actions); - return Actions.ActOnAsmStmt(AsmLoc, true, true, 0, 0, 0, - move_arg(Constraints), move_arg(Exprs), - AsmString.take(), move_arg(Clobbers), - EndLoc, true); + + // FIXME: We should be passing source locations for better diagnostics. + return Actions.ActOnMSAsmStmt(AsmLoc, llvm::makeArrayRef(AsmToks), + llvm::makeArrayRef(LineEnds), EndLoc); } /// ParseAsmStatement - Parse a GNU extended asm statement. @@ -1748,23 +1784,12 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { /// asm-string-literal /// asm-clobbers ',' asm-string-literal /// -/// [MS] ms-asm-statement: -/// ms-asm-block -/// ms-asm-block ms-asm-statement -/// -/// [MS] ms-asm-block: -/// '__asm' ms-asm-line '\n' -/// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] -/// -/// [MS] ms-asm-instruction-block -/// ms-asm-line -/// ms-asm-line '\n' ms-asm-instruction-block -/// StmtResult Parser::ParseAsmStatement(bool &msAsm) { assert(Tok.is(tok::kw_asm) && "Not an asm stmt"); SourceLocation AsmLoc = ConsumeToken(); - if (getLangOpts().MicrosoftExt && Tok.isNot(tok::l_paren) && !isTypeQualifier()) { + if (getLangOpts().MicrosoftExt && Tok.isNot(tok::l_paren) && + !isTypeQualifier()) { msAsm = true; return ParseMicrosoftAsmStatement(AsmLoc); } @@ -2067,7 +2092,7 @@ StmtResult Parser::ParseCXXTryBlockCommon(SourceLocation TryLoc) { return move(TryBlock); // Borland allows SEH-handlers with 'try' - + if ((Tok.is(tok::identifier) && Tok.getIdentifierInfo() == getSEHExceptKeyword()) || Tok.is(tok::kw___finally)) { @@ -2107,7 +2132,7 @@ StmtResult Parser::ParseCXXTryBlockCommon(SourceLocation TryLoc) { if (Handlers.empty()) return StmtError(); - return Actions.ActOnCXXTryBlock(TryLoc, TryBlock.take(), move_arg(Handlers)); + return Actions.ActOnCXXTryBlock(TryLoc, TryBlock.take(),move_arg(Handlers)); } } @@ -2203,10 +2228,10 @@ void Parser::ParseMicrosoftIfExistsStatement(StmtVector &Stmts) { case IEB_Parse: // Parse the statements below. break; - + case IEB_Dependent: llvm_unreachable("Dependent case handled above"); - + case IEB_Skip: Braces.skipToEnd(); return; diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp index 5c3e2ba..ade918f 100644 --- a/lib/Parse/ParseTemplate.cpp +++ b/lib/Parse/ParseTemplate.cpp @@ -90,7 +90,8 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context, // Tell the action that names should be checked in the context of // the declaration to come. - ParsingDeclRAIIObject ParsingTemplateParams(*this); + ParsingDeclRAIIObject + ParsingTemplateParams(*this, ParsingDeclRAIIObject::NoParent); // Parse multiple levels of template headers within this template // parameter scope, e.g., @@ -213,11 +214,15 @@ Parser::ParseSingleDeclarationAfterTemplate( return ParseUsingDirectiveOrDeclaration(Context, TemplateInfo, DeclEnd, prefixAttrs); - // Parse the declaration specifiers, stealing the accumulated - // diagnostics from the template parameters. + // Parse the declaration specifiers, stealing any diagnostics from + // the template parameters. ParsingDeclSpec DS(*this, &DiagsFromTParams); - DS.takeAttributesFrom(prefixAttrs); + // Move the attributes from the prefix into the DS. + if (TemplateInfo.Kind == ParsedTemplateInfo::ExplicitInstantiation) + ProhibitAttributes(prefixAttrs); + else + DS.takeAttributesFrom(prefixAttrs); ParseDeclarationSpecifiers(DS, TemplateInfo, AS, getDeclSpecContextFromDeclaratorContext(Context)); @@ -259,7 +264,7 @@ Parser::ParseSingleDeclarationAfterTemplate( } // Eat the semi colon after the declaration. - ExpectAndConsume(tok::semi, diag::err_expected_semi_declaration); + ExpectAndConsumeSemi(diag::err_expected_semi_declaration); if (LateParsedAttrs.size() > 0) ParseLexedAttributeList(LateParsedAttrs, ThisDecl, true, false); DeclaratorInfo.complete(ThisDecl); @@ -314,6 +319,11 @@ bool Parser::ParseTemplateParameters(unsigned Depth, Failed = ParseTemplateParameterList(Depth, TemplateParams); if (Tok.is(tok::greatergreater)) { + // No diagnostic required here: a template-parameter-list can only be + // followed by a declaration or, for a template template parameter, the + // 'class' keyword. Therefore, the second '>' will be diagnosed later. + // This matters for elegant diagnosis of: + // template> struct S; Tok.setKind(tok::greater); RAngleLoc = Tok.getLocation(); Tok.setLocation(Tok.getLocation().getLocWithOffset(1)); @@ -711,34 +721,104 @@ Parser::ParseTemplateIdAfterTemplateName(TemplateTy Template, } } - if (Tok.isNot(tok::greater) && Tok.isNot(tok::greatergreater)) { + // What will be left once we've consumed the '>'. + tok::TokenKind RemainingToken; + const char *ReplacementStr = "> >"; + + switch (Tok.getKind()) { + default: Diag(Tok.getLocation(), diag::err_expected_greater); return true; - } - // Determine the location of the '>' or '>>'. Only consume this - // token if the caller asked us to. - RAngleLoc = Tok.getLocation(); + case tok::greater: + // Determine the location of the '>' token. Only consume this token + // if the caller asked us to. + RAngleLoc = Tok.getLocation(); + if (ConsumeLastToken) + ConsumeToken(); + return false; - if (Tok.is(tok::greatergreater)) { - const char *ReplaceStr = "> >"; - if (NextToken().is(tok::greater) || NextToken().is(tok::greatergreater)) - ReplaceStr = "> > "; + case tok::greatergreater: + RemainingToken = tok::greater; + break; - Diag(Tok.getLocation(), getLangOpts().CPlusPlus0x ? - diag::warn_cxx98_compat_two_right_angle_brackets : - diag::err_two_right_angle_brackets_need_space) - << FixItHint::CreateReplacement(SourceRange(Tok.getLocation()), - ReplaceStr); + case tok::greatergreatergreater: + RemainingToken = tok::greatergreater; + break; - Tok.setKind(tok::greater); - if (!ConsumeLastToken) { - // Since we're not supposed to consume the '>>' token, we need - // to insert a second '>' token after the first. - PP.EnterToken(Tok); - } - } else if (ConsumeLastToken) + case tok::greaterequal: + RemainingToken = tok::equal; + ReplacementStr = "> ="; + break; + + case tok::greatergreaterequal: + RemainingToken = tok::greaterequal; + break; + } + + // This template-id is terminated by a token which starts with a '>'. Outside + // C++11, this is now error recovery, and in C++11, this is error recovery if + // the token isn't '>>'. + + RAngleLoc = Tok.getLocation(); + + // The source range of the '>>' or '>=' at the start of the token. + CharSourceRange ReplacementRange = + CharSourceRange::getCharRange(RAngleLoc, + Lexer::AdvanceToTokenCharacter(RAngleLoc, 2, PP.getSourceManager(), + getLangOpts())); + + // A hint to put a space between the '>>'s. In order to make the hint as + // clear as possible, we include the characters either side of the space in + // the replacement, rather than just inserting a space at SecondCharLoc. + FixItHint Hint1 = FixItHint::CreateReplacement(ReplacementRange, + ReplacementStr); + + // A hint to put another space after the token, if it would otherwise be + // lexed differently. + FixItHint Hint2; + Token Next = NextToken(); + if ((RemainingToken == tok::greater || + RemainingToken == tok::greatergreater) && + (Next.is(tok::greater) || Next.is(tok::greatergreater) || + Next.is(tok::greatergreatergreater) || Next.is(tok::equal) || + Next.is(tok::greaterequal) || Next.is(tok::greatergreaterequal) || + Next.is(tok::equalequal)) && + areTokensAdjacent(Tok, Next)) + Hint2 = FixItHint::CreateInsertion(Next.getLocation(), " "); + + unsigned DiagId = diag::err_two_right_angle_brackets_need_space; + if (getLangOpts().CPlusPlus0x && Tok.is(tok::greatergreater)) + DiagId = diag::warn_cxx98_compat_two_right_angle_brackets; + else if (Tok.is(tok::greaterequal)) + DiagId = diag::err_right_angle_bracket_equal_needs_space; + Diag(Tok.getLocation(), DiagId) << Hint1 << Hint2; + + // Strip the initial '>' from the token. + if (RemainingToken == tok::equal && Next.is(tok::equal) && + areTokensAdjacent(Tok, Next)) { + // Join two adjacent '=' tokens into one, for cases like: + // void (*p)() = f; + // return f==p; ConsumeToken(); + Tok.setKind(tok::equalequal); + Tok.setLength(Tok.getLength() + 1); + } else { + Tok.setKind(RemainingToken); + Tok.setLength(Tok.getLength() - 1); + } + Tok.setLocation(Lexer::AdvanceToTokenCharacter(RAngleLoc, 1, + PP.getSourceManager(), + getLangOpts())); + + if (!ConsumeLastToken) { + // Since we're not supposed to consume the '>' token, we need to push + // this token and revert the current token back to the '>'. + PP.EnterToken(Tok); + Tok.setKind(tok::greater); + Tok.setLength(1); + Tok.setLocation(RAngleLoc); + } return false; } @@ -1132,7 +1212,8 @@ Decl *Parser::ParseExplicitInstantiation(unsigned Context, SourceLocation &DeclEnd, AccessSpecifier AS) { // This isn't really required here. - ParsingDeclRAIIObject ParsingTemplateParams(*this); + ParsingDeclRAIIObject + ParsingTemplateParams(*this, ParsingDeclRAIIObject::NoParent); return ParseSingleDeclarationAfterTemplate(Context, ParsedTemplateInfo(ExternLoc, diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp index 28c5e8b..1a4df47 100644 --- a/lib/Parse/ParseTentative.cpp +++ b/lib/Parse/ParseTentative.cpp @@ -671,7 +671,7 @@ Parser::TPResult Parser::TryParseDeclarator(bool mayBeAbstract, // initializer that follows the declarator. Note that ctor-style // initializers are not possible in contexts where abstract declarators // are allowed. - if (!mayBeAbstract && !isCXXFunctionDeclarator(false/*warnIfAmbiguous*/)) + if (!mayBeAbstract && !isCXXFunctionDeclarator()) break; // direct-declarator '(' parameter-declaration-clause ')' @@ -735,6 +735,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { case tok::kw_alignof: case tok::kw_noexcept: case tok::kw_nullptr: + case tok::kw__Alignof: case tok::kw___null: case tok::kw___alignof: case tok::kw___builtin_choose_expr: @@ -744,6 +745,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { case tok::kw___imag: case tok::kw___real: case tok::kw___FUNCTION__: + case tok::kw_L__FUNCTION__: case tok::kw___PRETTY_FUNCTION__: case tok::kw___has_nothrow_assign: case tok::kw___has_nothrow_copy: @@ -827,6 +829,10 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { /// be either a decl-specifier or a function-style cast, and TPResult::Error() /// if a parsing error was found and reported. /// +/// If HasMissingTypename is provided, a name with a dependent scope specifier +/// will be treated as ambiguous if the 'typename' keyword is missing. If this +/// happens, *HasMissingTypename will be set to 'true'. +/// /// decl-specifier: /// storage-class-specifier /// type-specifier @@ -918,7 +924,8 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) { /// [GNU] restrict /// Parser::TPResult -Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult) { +Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult, + bool *HasMissingTypename) { switch (Tok.getKind()) { case tok::identifier: // foo::bar // Check for need to substitute AltiVec __vector keyword @@ -931,9 +938,12 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult) { // recurse to handle whatever we get. if (TryAnnotateTypeOrScopeToken()) return TPResult::Error(); - if (Tok.is(tok::identifier)) - return TPResult::False(); - return isCXXDeclarationSpecifier(BracedCastResult); + if (Tok.is(tok::identifier)) { + const Token &Next = NextToken(); + return (!getLangOpts().ObjC1 && Next.is(tok::identifier)) ? + TPResult::True() : TPResult::False(); + } + return isCXXDeclarationSpecifier(BracedCastResult, HasMissingTypename); case tok::coloncolon: { // ::foo::bar const Token &Next = NextToken(); @@ -947,7 +957,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult) { // recurse to handle whatever we get. if (TryAnnotateTypeOrScopeToken()) return TPResult::Error(); - return isCXXDeclarationSpecifier(BracedCastResult); + return isCXXDeclarationSpecifier(BracedCastResult, HasMissingTypename); // decl-specifier: // storage-class-specifier @@ -1049,12 +1059,20 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult) { bool isIdentifier = Tok.is(tok::identifier); TPResult TPR = TPResult::False(); if (!isIdentifier) - TPR = isCXXDeclarationSpecifier(BracedCastResult); + TPR = isCXXDeclarationSpecifier(BracedCastResult, + HasMissingTypename); PA.Revert(); if (isIdentifier || TPR == TPResult::True() || TPR == TPResult::Error()) return TPResult::Error(); + + if (HasMissingTypename) { + // We can't tell whether this is a missing 'typename' or a valid + // expression. + *HasMissingTypename = true; + return TPResult::Ambiguous(); + } } } return TPResult::False(); @@ -1218,21 +1236,24 @@ Parser::TPResult Parser::TryParseProtocolQualifiers() { return TPResult::Error(); } -Parser::TPResult Parser::TryParseDeclarationSpecifier() { - TPResult TPR = isCXXDeclarationSpecifier(); +Parser::TPResult +Parser::TryParseDeclarationSpecifier(bool *HasMissingTypename) { + TPResult TPR = isCXXDeclarationSpecifier(TPResult::False(), + HasMissingTypename); if (TPR != TPResult::Ambiguous()) return TPR; if (Tok.is(tok::kw_typeof)) TryParseTypeofSpecifier(); else { + if (Tok.is(tok::annot_cxxscope)) + ConsumeToken(); ConsumeToken(); if (getLangOpts().ObjC1 && Tok.is(tok::less)) TryParseProtocolQualifiers(); } - assert(Tok.is(tok::l_paren) && "Expected '('!"); return TPResult::Ambiguous(); } @@ -1246,7 +1267,7 @@ Parser::TPResult Parser::TryParseDeclarationSpecifier() { /// '(' parameter-declaration-clause ')' cv-qualifier-seq[opt] /// exception-specification[opt] /// -bool Parser::isCXXFunctionDeclarator(bool warnIfAmbiguous) { +bool Parser::isCXXFunctionDeclarator(bool *IsAmbiguous) { // C++ 8.2p1: // The ambiguity arising from the similarity between a function-style cast and @@ -1260,27 +1281,36 @@ bool Parser::isCXXFunctionDeclarator(bool warnIfAmbiguous) { TentativeParsingAction PA(*this); ConsumeParen(); - TPResult TPR = TryParseParameterDeclarationClause(); - if (TPR == TPResult::Ambiguous() && Tok.isNot(tok::r_paren)) - TPR = TPResult::False(); + bool InvalidAsDeclaration = false; + TPResult TPR = TryParseParameterDeclarationClause(&InvalidAsDeclaration); + if (TPR == TPResult::Ambiguous()) { + if (Tok.isNot(tok::r_paren)) + TPR = TPResult::False(); + else { + const Token &Next = NextToken(); + if (Next.is(tok::amp) || Next.is(tok::ampamp) || + Next.is(tok::kw_const) || Next.is(tok::kw_volatile) || + Next.is(tok::kw_throw) || Next.is(tok::kw_noexcept) || + Next.is(tok::l_square) || isCXX0XVirtSpecifier(Next) || + Next.is(tok::l_brace) || Next.is(tok::kw_try) || + Next.is(tok::equal) || Next.is(tok::arrow)) + // The next token cannot appear after a constructor-style initializer, + // and can appear next in a function definition. This must be a function + // declarator. + TPR = TPResult::True(); + else if (InvalidAsDeclaration) + // Use the absence of 'typename' as a tie-breaker. + TPR = TPResult::False(); + } + } - SourceLocation TPLoc = Tok.getLocation(); PA.Revert(); - // In case of an error, let the declaration parsing code handle it. - if (TPR == TPResult::Error()) - return true; + if (IsAmbiguous && TPR == TPResult::Ambiguous()) + *IsAmbiguous = true; - if (TPR == TPResult::Ambiguous()) { - // Function declarator has precedence over constructor-style initializer. - // Emit a warning just in case the author intended a variable definition. - if (warnIfAmbiguous) - Diag(Tok, diag::warn_parens_disambiguated_as_function_decl) - << SourceRange(Tok.getLocation(), TPLoc); - return true; - } - - return TPR == TPResult::True(); + // In case of an error, let the declaration parsing code handle it. + return TPR != TPResult::False(); } /// parameter-declaration-clause: @@ -1300,10 +1330,11 @@ bool Parser::isCXXFunctionDeclarator(bool warnIfAmbiguous) { /// attribute-specifier-seq[opt] decl-specifier-seq abstract-declarator[opt] /// attributes[opt] '=' assignment-expression /// -Parser::TPResult Parser::TryParseParameterDeclarationClause() { +Parser::TPResult +Parser::TryParseParameterDeclarationClause(bool *InvalidAsDeclaration) { if (Tok.is(tok::r_paren)) - return TPResult::True(); + return TPResult::Ambiguous(); // parameter-declaration-list[opt] '...'[opt] // parameter-declaration-list ',' '...' @@ -1333,7 +1364,7 @@ Parser::TPResult Parser::TryParseParameterDeclarationClause() { // decl-specifier-seq // A parameter-declaration's initializer must be preceded by an '=', so // decl-specifier-seq '{' is not a parameter in C++11. - TPResult TPR = TryParseDeclarationSpecifier(); + TPResult TPR = TryParseDeclarationSpecifier(InvalidAsDeclaration); if (TPR != TPResult::Ambiguous()) return TPR; diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index f1b99fb..3725e2b 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -23,6 +23,22 @@ #include "clang/AST/ASTConsumer.h" using namespace clang; +namespace { +/// \brief A comment handler that passes comments found by the preprocessor +/// to the parser action. +class ActionCommentHandler : public CommentHandler { + Sema &S; + +public: + explicit ActionCommentHandler(Sema &S) : S(S) { } + + virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) { + S.ActOnComment(Comment); + return false; + } +}; +} // end anonymous namespace + IdentifierInfo *Parser::getSEHExceptKeyword() { // __except is accepted as a (contextual) keyword if (!Ident__except && (getLangOpts().MicrosoftExt || getLangOpts().Borland)) @@ -35,7 +51,7 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool SkipFunctionBodies) : PP(pp), Actions(actions), Diags(PP.getDiagnostics()), GreaterThanIsOperator(true), ColonIsSacred(false), InMessageExpression(false), TemplateParameterDepth(0), - SkipFunctionBodies(SkipFunctionBodies) { + ParsingInObjCContainer(false), SkipFunctionBodies(SkipFunctionBodies) { Tok.setKind(tok::eof); Actions.CurScope = 0; NumCachedScopes = 0; @@ -59,7 +75,7 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool SkipFunctionBodies) MSStructHandler.reset(new PragmaMSStructHandler(actions)); PP.AddPragmaHandler(MSStructHandler.get()); - UnusedHandler.reset(new PragmaUnusedHandler(actions, *this)); + UnusedHandler.reset(new PragmaUnusedHandler(actions)); PP.AddPragmaHandler(UnusedHandler.get()); WeakHandler.reset(new PragmaWeakHandler(actions)); @@ -68,17 +84,19 @@ Parser::Parser(Preprocessor &pp, Sema &actions, bool SkipFunctionBodies) RedefineExtnameHandler.reset(new PragmaRedefineExtnameHandler(actions)); PP.AddPragmaHandler(RedefineExtnameHandler.get()); - FPContractHandler.reset(new PragmaFPContractHandler(actions, *this)); + FPContractHandler.reset(new PragmaFPContractHandler(actions)); PP.AddPragmaHandler("STDC", FPContractHandler.get()); if (getLangOpts().OpenCL) { - OpenCLExtensionHandler.reset( - new PragmaOpenCLExtensionHandler(actions, *this)); + OpenCLExtensionHandler.reset(new PragmaOpenCLExtensionHandler(actions)); PP.AddPragmaHandler("OPENCL", OpenCLExtensionHandler.get()); PP.AddPragmaHandler("OPENCL", FPContractHandler.get()); } - + + CommentSemaHandler.reset(new ActionCommentHandler(actions)); + PP.addCommentHandler(CommentSemaHandler.get()); + PP.setCodeCompletionHandler(*this); } @@ -185,7 +203,7 @@ bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID, bool Parser::ExpectAndConsumeSemi(unsigned DiagID) { if (Tok.is(tok::semi) || Tok.is(tok::code_completion)) { - ConsumeAnyToken(); + ConsumeToken(); return false; } @@ -202,6 +220,42 @@ bool Parser::ExpectAndConsumeSemi(unsigned DiagID) { return ExpectAndConsume(tok::semi, DiagID); } +void Parser::ConsumeExtraSemi(ExtraSemiKind Kind, unsigned TST) { + if (!Tok.is(tok::semi)) return; + + bool HadMultipleSemis = false; + SourceLocation StartLoc = Tok.getLocation(); + SourceLocation EndLoc = Tok.getLocation(); + ConsumeToken(); + + while ((Tok.is(tok::semi) && !Tok.isAtStartOfLine())) { + HadMultipleSemis = true; + EndLoc = Tok.getLocation(); + ConsumeToken(); + } + + // C++11 allows extra semicolons at namespace scope, but not in any of the + // other contexts. + if (Kind == OutsideFunction && getLangOpts().CPlusPlus) { + if (getLangOpts().CPlusPlus0x) + Diag(StartLoc, diag::warn_cxx98_compat_top_level_semi) + << FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc)); + else + Diag(StartLoc, diag::ext_extra_semi_cxx11) + << FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc)); + return; + } + + if (Kind != AfterMemberFunctionDefinition || HadMultipleSemis) + Diag(StartLoc, diag::ext_extra_semi) + << Kind << DeclSpec::getSpecifierName((DeclSpec::TST)TST) + << FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc)); + else + // A single semicolon is valid after a member function definition. + Diag(StartLoc, diag::warn_extra_semi_after_mem_fn_def) + << FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc)); +} + //===----------------------------------------------------------------------===// // Error recovery. //===----------------------------------------------------------------------===// @@ -396,6 +450,9 @@ Parser::~Parser() { PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); + + PP.removeCommentHandler(CommentSemaHandler.get()); + PP.clearCodeCompletionHandler(); assert(TemplateIds.empty() && "Still alive TemplateIdAnnotations around?"); @@ -412,10 +469,6 @@ void Parser::Initialize() { // Prime the lexer look-ahead. ConsumeToken(); - if (Tok.is(tok::eof) && - !getLangOpts().CPlusPlus) // Empty source file is an extension in C - Diag(Tok, diag::ext_empty_source_file); - // Initialization for Objective-C context sensitive keywords recognition. // Referenced in Parser::ParseObjCTypeQualifierList. if (getLangOpts().ObjC1) { @@ -582,11 +635,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, HandlePragmaPack(); return DeclGroupPtrTy(); case tok::semi: - Diag(Tok, getLangOpts().CPlusPlus0x ? - diag::warn_cxx98_compat_top_level_semi : diag::ext_top_level_semi) - << FixItHint::CreateRemoval(Tok.getLocation()); - - ConsumeToken(); + ConsumeExtraSemi(OutsideFunction); // TODO: Invoke action for top-level semicolon. return DeclGroupPtrTy(); case tok::r_brace: @@ -641,7 +690,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, case tok::kw_export: // As in 'export template' case tok::kw_static_assert: case tok::kw__Static_assert: - // A function definition cannot start with a these keywords. + // A function definition cannot start with any of these keywords. { SourceLocation DeclEnd; StmtVector Stmts(Actions); @@ -708,8 +757,7 @@ Parser::ParseExternalDeclaration(ParsedAttributesWithRange &attrs, dont_know: // We can't tell whether this is a function-definition or declaration yet. if (DS) { - DS->takeAttributesFrom(attrs); - return ParseDeclarationOrFunctionDefinition(*DS); + return ParseDeclarationOrFunctionDefinition(attrs, DS); } else { return ParseDeclarationOrFunctionDefinition(attrs); } @@ -729,7 +777,7 @@ bool Parser::isDeclarationAfterDeclarator() { if (KW.is(tok::kw_default) || KW.is(tok::kw_delete)) return false; } - + return Tok.is(tok::equal) || // int X()= -> not a function def Tok.is(tok::comma) || // int X(), -> not a function def Tok.is(tok::semi) || // int X(); -> not a function def @@ -777,20 +825,24 @@ bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) { /// [OMP] threadprivate-directive [TODO] /// Parser::DeclGroupPtrTy -Parser::ParseDeclarationOrFunctionDefinition(ParsingDeclSpec &DS, - AccessSpecifier AS) { +Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs, + ParsingDeclSpec &DS, + AccessSpecifier AS) { // Parse the common declaration-specifiers piece. ParseDeclarationSpecifiers(DS, ParsedTemplateInfo(), AS, DSC_top_level); // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" // declaration-specifiers init-declarator-list[opt] ';' if (Tok.is(tok::semi)) { + ProhibitAttributes(attrs); ConsumeToken(); Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS, DS); DS.complete(TheDecl); return Actions.ConvertDeclToDeclGroup(TheDecl); } + DS.takeAttributesFrom(attrs); + // ObjC2 allows prefix attributes on class interfaces and protocols. // FIXME: This still needs better diagnostics. We should only accept // attributes here, no types, etc. @@ -831,16 +883,20 @@ Parser::ParseDeclarationOrFunctionDefinition(ParsingDeclSpec &DS, } Parser::DeclGroupPtrTy -Parser::ParseDeclarationOrFunctionDefinition(ParsedAttributes &attrs, +Parser::ParseDeclarationOrFunctionDefinition(ParsedAttributesWithRange &attrs, + ParsingDeclSpec *DS, AccessSpecifier AS) { - ParsingDeclSpec DS(*this); - DS.takeAttributesFrom(attrs); - // Must temporarily exit the objective-c container scope for - // parsing c constructs and re-enter objc container scope - // afterwards. - ObjCDeclContextSwitch ObjCDC(*this); - - return ParseDeclarationOrFunctionDefinition(DS, AS); + if (DS) { + return ParseDeclOrFunctionDefInternal(attrs, *DS, AS); + } else { + ParsingDeclSpec PDS(*this); + // Must temporarily exit the objective-c container scope for + // parsing c constructs and re-enter objc container scope + // afterwards. + ObjCDeclContextSwitch ObjCDC(*this); + + return ParseDeclOrFunctionDefInternal(attrs, PDS, AS); + } } /// ParseFunctionDefinition - We parsed and verified that the specified @@ -914,6 +970,7 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, // In delayed template parsing mode, for function template we consume the // tokens and store them for late parsing at the end of the translation unit. if (getLangOpts().DelayedTemplateParsing && + Tok.isNot(tok::equal) && TemplateInfo.Kind == ParsedTemplateInfo::Template) { MultiTemplateParamsArg TemplateParameterLists(Actions, TemplateInfo.TemplateParams->data(), @@ -947,7 +1004,28 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D, } return DP; } - + else if (CurParsedObjCImpl && + !TemplateInfo.TemplateParams && + (Tok.is(tok::l_brace) || Tok.is(tok::kw_try) || + Tok.is(tok::colon)) && + Actions.CurContext->isTranslationUnit()) { + MultiTemplateParamsArg TemplateParameterLists(Actions, 0, 0); + ParseScope BodyScope(this, Scope::FnScope|Scope::DeclScope); + Scope *ParentScope = getCurScope()->getParent(); + + D.setFunctionDefinitionKind(FDK_Definition); + Decl *FuncDecl = Actions.HandleDeclarator(ParentScope, D, + move(TemplateParameterLists)); + D.complete(FuncDecl); + D.getMutableDeclSpec().abort(); + if (FuncDecl) { + // Consume the tokens and store them for later parsing. + StashAwayMethodOrFunctionBodyTokens(FuncDecl); + CurParsedObjCImpl->HasCFunction = true; + return FuncDecl; + } + } + // Enter a scope for the function body. ParseScope BodyScope(this, Scope::FnScope|Scope::DeclScope); @@ -1130,10 +1208,7 @@ void Parser::ParseKNRParamDeclarations(Declarator &D) { ParseDeclarator(ParmDeclarator); } - if (Tok.is(tok::semi)) { - ConsumeToken(); - } else { - Diag(Tok, diag::err_expected_semi_declaration); + if (ExpectAndConsumeSemi(diag::err_expected_semi_declaration)) { // Skip to end of block or statement SkipUntil(tok::semi, true); if (Tok.is(tok::semi)) @@ -1251,7 +1326,8 @@ TemplateIdAnnotation *Parser::takeTemplateIdAnnotation(const Token &tok) { bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) { assert((Tok.is(tok::identifier) || Tok.is(tok::coloncolon) || Tok.is(tok::kw_typename) || Tok.is(tok::annot_cxxscope) - || Tok.is(tok::kw_decltype)) && "Cannot be a type or scope token!"); + || Tok.is(tok::kw_decltype) || Tok.is(tok::annot_template_id)) + && "Cannot be a type or scope token!"); if (Tok.is(tok::kw_typename)) { // Parse a C++ typename-specifier, e.g., "typename T::type". @@ -1267,10 +1343,23 @@ bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) { 0, /*IsTypename*/true)) return true; if (!SS.isSet()) { - if (getLangOpts().MicrosoftExt) - Diag(Tok.getLocation(), diag::warn_expected_qualified_after_typename); - else - Diag(Tok.getLocation(), diag::err_expected_qualified_after_typename); + if (Tok.is(tok::identifier) || Tok.is(tok::annot_template_id) || + Tok.is(tok::annot_decltype)) { + // Attempt to recover by skipping the invalid 'typename' + if (Tok.is(tok::annot_decltype) || + (!TryAnnotateTypeOrScopeToken(EnteringContext, NeedType) && + Tok.isAnnotation())) { + unsigned DiagID = diag::err_expected_qualified_after_typename; + // MS compatibility: MSVC permits using known types with typename. + // e.g. "typedef typename T* pointer_type" + if (getLangOpts().MicrosoftExt) + DiagID = diag::warn_expected_qualified_after_typename; + Diag(Tok.getLocation(), DiagID); + return false; + } + } + + Diag(Tok.getLocation(), diag::err_expected_qualified_after_typename); return true; } @@ -1423,8 +1512,7 @@ bool Parser::TryAnnotateTypeOrScopeToken(bool EnteringContext, bool NeedType) { /// TryAnnotateScopeToken - Like TryAnnotateTypeOrScopeToken but only /// annotates C++ scope specifiers and template-ids. This returns -/// true if the token was annotated or there was an error that could not be -/// recovered from. +/// true if there was an error that could not be recovered from. /// /// Note that this routine emits an error if you call it with ::new or ::delete /// as the current tokens, so only call it in contexts where these are invalid. @@ -1678,13 +1766,13 @@ Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) { return Actions.ConvertDeclToDeclGroup(Import.get()); } -bool Parser::BalancedDelimiterTracker::diagnoseOverflow() { +bool BalancedDelimiterTracker::diagnoseOverflow() { P.Diag(P.Tok, diag::err_parser_impl_limit_overflow); P.SkipUntil(tok::eof); return true; } -bool Parser::BalancedDelimiterTracker::expectAndConsume(unsigned DiagID, +bool BalancedDelimiterTracker::expectAndConsume(unsigned DiagID, const char *Msg, tok::TokenKind SkipToToc ) { LOpen = P.Tok.getLocation(); @@ -1697,7 +1785,7 @@ bool Parser::BalancedDelimiterTracker::expectAndConsume(unsigned DiagID, return diagnoseOverflow(); } -bool Parser::BalancedDelimiterTracker::diagnoseMissingClose() { +bool BalancedDelimiterTracker::diagnoseMissingClose() { assert(!P.Tok.is(Close) && "Should have consumed closing delimiter"); const char *LHSName = "unknown"; @@ -1715,6 +1803,6 @@ bool Parser::BalancedDelimiterTracker::diagnoseMissingClose() { return true; } -void Parser::BalancedDelimiterTracker::skipToEnd() { +void BalancedDelimiterTracker::skipToEnd() { P.SkipUntil(Close, false); } diff --git a/lib/Parse/RAIIObjectsForParser.h b/lib/Parse/RAIIObjectsForParser.h index ef17aee..455c4af 100644 --- a/lib/Parse/RAIIObjectsForParser.h +++ b/lib/Parse/RAIIObjectsForParser.h @@ -16,13 +16,230 @@ #define LLVM_CLANG_PARSE_RAII_OBJECTS_FOR_PARSER_H #include "clang/Parse/ParseDiagnostic.h" +#include "clang/Parse/Parser.h" +#include "clang/Sema/DelayedDiagnostic.h" +#include "clang/Sema/Sema.h" namespace clang { - // TODO: move ParsingDeclRAIIObject here. // TODO: move ParsingClassDefinition here. // TODO: move TentativeParsingAction here. - - + + /// \brief A RAII object used to temporarily suppress access-like + /// checking. Access-like checks are those associated with + /// controlling the use of a declaration, like C++ access control + /// errors and deprecation warnings. They are contextually + /// dependent, in that they can only be resolved with full + /// information about what's being declared. They are also + /// suppressed in certain contexts, like the template arguments of + /// an explicit instantiation. However, those suppression contexts + /// cannot necessarily be fully determined in advance; for + /// example, something starting like this: + /// template <> class std::vector + /// might be the entirety of an explicit instantiation: + /// template <> class std::vector; + /// or just an elaborated type specifier: + /// template <> class std::vector make_vector<>(); + /// Therefore this class collects all the diagnostics and permits + /// them to be re-delayed in a new context. + class SuppressAccessChecks { + Sema &S; + sema::DelayedDiagnosticPool DiagnosticPool; + Sema::ParsingDeclState State; + bool Active; + + public: + /// Begin suppressing access-like checks + SuppressAccessChecks(Parser &P, bool activate = true) + : S(P.getActions()), DiagnosticPool(NULL) { + if (activate) { + State = S.PushParsingDeclaration(DiagnosticPool); + Active = true; + } else { + Active = false; + } + } + + void done() { + assert(Active && "trying to end an inactive suppression"); + S.PopParsingDeclaration(State, NULL); + Active = false; + } + + void redelay() { + assert(!Active && "redelaying without having ended first"); + if (!DiagnosticPool.pool_empty()) + S.redelayDiagnostics(DiagnosticPool); + assert(DiagnosticPool.pool_empty()); + } + + ~SuppressAccessChecks() { + if (Active) done(); + } + }; + + /// \brief RAII object used to inform the actions that we're + /// currently parsing a declaration. This is active when parsing a + /// variable's initializer, but not when parsing the body of a + /// class or function definition. + class ParsingDeclRAIIObject { + Sema &Actions; + sema::DelayedDiagnosticPool DiagnosticPool; + Sema::ParsingDeclState State; + bool Popped; + + // Do not implement. + ParsingDeclRAIIObject(const ParsingDeclRAIIObject &other); + ParsingDeclRAIIObject &operator=(const ParsingDeclRAIIObject &other); + + public: + enum NoParent_t { NoParent }; + ParsingDeclRAIIObject(Parser &P, NoParent_t _) + : Actions(P.getActions()), DiagnosticPool(NULL) { + push(); + } + + /// Creates a RAII object whose pool is optionally parented by another. + ParsingDeclRAIIObject(Parser &P, + const sema::DelayedDiagnosticPool *parentPool) + : Actions(P.getActions()), DiagnosticPool(parentPool) { + push(); + } + + /// Creates a RAII object and, optionally, initialize its + /// diagnostics pool by stealing the diagnostics from another + /// RAII object (which is assumed to be the current top pool). + ParsingDeclRAIIObject(Parser &P, ParsingDeclRAIIObject *other) + : Actions(P.getActions()), + DiagnosticPool(other ? other->DiagnosticPool.getParent() : NULL) { + if (other) { + DiagnosticPool.steal(other->DiagnosticPool); + other->abort(); + } + push(); + } + + ~ParsingDeclRAIIObject() { + abort(); + } + + sema::DelayedDiagnosticPool &getDelayedDiagnosticPool() { + return DiagnosticPool; + } + const sema::DelayedDiagnosticPool &getDelayedDiagnosticPool() const { + return DiagnosticPool; + } + + /// Resets the RAII object for a new declaration. + void reset() { + abort(); + push(); + } + + /// Signals that the context was completed without an appropriate + /// declaration being parsed. + void abort() { + pop(0); + } + + void complete(Decl *D) { + assert(!Popped && "ParsingDeclaration has already been popped!"); + pop(D); + } + + /// Unregister this object from Sema, but remember all the + /// diagnostics that were emitted into it. + void abortAndRemember() { + pop(0); + } + + private: + void push() { + State = Actions.PushParsingDeclaration(DiagnosticPool); + Popped = false; + } + + void pop(Decl *D) { + if (!Popped) { + Actions.PopParsingDeclaration(State, D); + Popped = true; + } + } + }; + + /// A class for parsing a DeclSpec. + class ParsingDeclSpec : public DeclSpec { + ParsingDeclRAIIObject ParsingRAII; + + public: + ParsingDeclSpec(Parser &P) + : DeclSpec(P.getAttrFactory()), + ParsingRAII(P, ParsingDeclRAIIObject::NoParent) {} + ParsingDeclSpec(Parser &P, ParsingDeclRAIIObject *RAII) + : DeclSpec(P.getAttrFactory()), + ParsingRAII(P, RAII) {} + + const sema::DelayedDiagnosticPool &getDelayedDiagnosticPool() const { + return ParsingRAII.getDelayedDiagnosticPool(); + } + + void complete(Decl *D) { + ParsingRAII.complete(D); + } + + void abort() { + ParsingRAII.abort(); + } + }; + + /// A class for parsing a declarator. + class ParsingDeclarator : public Declarator { + ParsingDeclRAIIObject ParsingRAII; + + public: + ParsingDeclarator(Parser &P, const ParsingDeclSpec &DS, TheContext C) + : Declarator(DS, C), ParsingRAII(P, &DS.getDelayedDiagnosticPool()) { + } + + const ParsingDeclSpec &getDeclSpec() const { + return static_cast(Declarator::getDeclSpec()); + } + + ParsingDeclSpec &getMutableDeclSpec() const { + return const_cast(getDeclSpec()); + } + + void clear() { + Declarator::clear(); + ParsingRAII.reset(); + } + + void complete(Decl *D) { + ParsingRAII.complete(D); + } + }; + + /// A class for parsing a field declarator. + class ParsingFieldDeclarator : public FieldDeclarator { + ParsingDeclRAIIObject ParsingRAII; + + public: + ParsingFieldDeclarator(Parser &P, const ParsingDeclSpec &DS) + : FieldDeclarator(DS), ParsingRAII(P, &DS.getDelayedDiagnosticPool()) { + } + + const ParsingDeclSpec &getDeclSpec() const { + return static_cast(D.getDeclSpec()); + } + + ParsingDeclSpec &getMutableDeclSpec() const { + return const_cast(getDeclSpec()); + } + + void complete(Decl *D) { + ParsingRAII.complete(D); + } + }; + /// ExtensionRAIIObject - This saves the state of extension warnings when /// constructed and disables them. When destructed, it restores them back to /// the way they used to be. This is used to handle __extension__ in the @@ -137,6 +354,81 @@ namespace clang { } }; + /// \brief RAII class that helps handle the parsing of an open/close delimiter + /// pair, such as braces { ... } or parentheses ( ... ). + class BalancedDelimiterTracker : public GreaterThanIsOperatorScope { + Parser& P; + tok::TokenKind Kind, Close; + SourceLocation (Parser::*Consumer)(); + SourceLocation LOpen, LClose; + + unsigned short &getDepth() { + switch (Kind) { + case tok::l_brace: return P.BraceCount; + case tok::l_square: return P.BracketCount; + case tok::l_paren: return P.ParenCount; + default: llvm_unreachable("Wrong token kind"); + } + } + + enum { MaxDepth = 256 }; + + bool diagnoseOverflow(); + bool diagnoseMissingClose(); + + public: + BalancedDelimiterTracker(Parser& p, tok::TokenKind k) + : GreaterThanIsOperatorScope(p.GreaterThanIsOperator, true), + P(p), Kind(k) + { + switch (Kind) { + default: llvm_unreachable("Unexpected balanced token"); + case tok::l_brace: + Close = tok::r_brace; + Consumer = &Parser::ConsumeBrace; + break; + case tok::l_paren: + Close = tok::r_paren; + Consumer = &Parser::ConsumeParen; + break; + + case tok::l_square: + Close = tok::r_square; + Consumer = &Parser::ConsumeBracket; + break; + } + } + + SourceLocation getOpenLocation() const { return LOpen; } + SourceLocation getCloseLocation() const { return LClose; } + SourceRange getRange() const { return SourceRange(LOpen, LClose); } + + bool consumeOpen() { + if (!P.Tok.is(Kind)) + return true; + + if (getDepth() < MaxDepth) { + LOpen = (P.*Consumer)(); + return false; + } + + return diagnoseOverflow(); + } + + bool expectAndConsume(unsigned DiagID, + const char *Msg = "", + tok::TokenKind SkipToTok = tok::unknown); + bool consumeClose() { + if (P.Tok.is(Close)) { + LClose = (P.*Consumer)(); + return false; + } + + return diagnoseMissingClose(); + } + void skipToEnd(); + }; + } // end namespace clang #endif diff --git a/lib/Rewrite/CMakeLists.txt b/lib/Rewrite/CMakeLists.txt index 2a05040..af8f6d4 100644 --- a/lib/Rewrite/CMakeLists.txt +++ b/lib/Rewrite/CMakeLists.txt @@ -1,11 +1,10 @@ -set(LLVM_USED_LIBS clangBasic clangAST clangParse clangFrontend) - add_clang_library(clangRewrite DeltaTree.cpp FixItRewriter.cpp FrontendActions.cpp HTMLPrint.cpp HTMLRewrite.cpp + InclusionRewriter.cpp RewriteMacros.cpp RewriteModernObjC.cpp RewriteObjC.cpp @@ -18,5 +17,17 @@ add_clang_library(clangRewrite add_dependencies(clangRewrite ClangAttrClasses ClangAttrList + ClangAttrParsedAttrList + ClangCommentNodes ClangDeclNodes - ClangStmtNodes) + ClangDiagnosticCommon + ClangDiagnosticFrontend + ClangStmtNodes + ) + +target_link_libraries(clangRewrite + clangBasic + clangAST + clangParse + clangFrontend + ) diff --git a/lib/Rewrite/FrontendActions.cpp b/lib/Rewrite/FrontendActions.cpp index 1753325..9bc218e 100644 --- a/lib/Rewrite/FrontendActions.cpp +++ b/lib/Rewrite/FrontendActions.cpp @@ -155,7 +155,7 @@ bool FixItRecompile::BeginInvocation(CompilerInstance &CI) { ASTConsumer *RewriteObjCAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (raw_ostream *OS = CI.createDefaultOutputFile(false, InFile, "cpp")) { - if (CI.getLangOpts().ObjCNonFragileABI) + if (CI.getLangOpts().ObjCRuntime.isNonFragile()) return CreateModernObjCRewriter(InFile, OS, CI.getDiagnostics(), CI.getLangOpts(), CI.getDiagnosticOpts().NoRewriteMacros); @@ -181,3 +181,12 @@ void RewriteTestAction::ExecuteAction() { DoRewriteTest(CI.getPreprocessor(), OS); } + +void RewriteIncludesAction::ExecuteAction() { + CompilerInstance &CI = getCompilerInstance(); + raw_ostream *OS = CI.createDefaultOutputFile(true, getCurrentFile()); + if (!OS) return; + + RewriteIncludesInInput(CI.getPreprocessor(), OS, + CI.getPreprocessorOutputOpts()); +} diff --git a/lib/Rewrite/HTMLRewrite.cpp b/lib/Rewrite/HTMLRewrite.cpp index dc39dde..236b98f 100644 --- a/lib/Rewrite/HTMLRewrite.cpp +++ b/lib/Rewrite/HTMLRewrite.cpp @@ -325,11 +325,12 @@ void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID, " .msgControl { background-color:#bbbbbb; color:#000000 }\n" " .mrange { background-color:#dfddf3 }\n" " .mrange { border-bottom:1px solid #6F9DBE }\n" - " .PathIndex { font-weight: bold; padding:0px 5px 0px 5px; " + " .PathIndex { font-weight: bold; padding:0px 5px; " "margin-right:5px; }\n" " .PathIndex { -webkit-border-radius:8px }\n" " .PathIndexEvent { background-color:#bfba87 }\n" " .PathIndexControl { background-color:#8c8c8c }\n" + " .PathNav a { text-decoration:none; font-size: larger }\n" " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n" " .CodeRemovalHint { background-color:#de1010 }\n" " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n" @@ -495,6 +496,11 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { // Inform the preprocessor that we don't want comments. TmpPP.SetCommentRetentionState(false, false); + // We don't want pragmas either. Although we filtered out #pragma, removing + // _Pragma and __pragma is much harder. + bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled(); + TmpPP.setPragmasEnabled(false); + // Enter the tokens we just lexed. This will cause them to be macro expanded // but won't enter sub-files (because we removed #'s). TmpPP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false); @@ -571,6 +577,7 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { "", Expansion.c_str()); } - // Restore diagnostics object back to its own thing. + // Restore the preprocessor's old state. TmpPP.setDiagnostics(*OldDiags); + TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled); } diff --git a/lib/Rewrite/InclusionRewriter.cpp b/lib/Rewrite/InclusionRewriter.cpp new file mode 100644 index 0000000..3dfc3b0 --- /dev/null +++ b/lib/Rewrite/InclusionRewriter.cpp @@ -0,0 +1,361 @@ +//===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This code rewrites include invocations into their expansions. This gives you +// a file with all included files merged into it. +// +//===----------------------------------------------------------------------===// + +#include "clang/Rewrite/Rewriters.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/PreprocessorOutputOptions.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace llvm; + +namespace { + +class InclusionRewriter : public PPCallbacks { + /// Information about which #includes were actually performed, + /// created by preprocessor callbacks. + struct FileChange { + SourceLocation From; + FileID Id; + SrcMgr::CharacteristicKind FileType; + FileChange(SourceLocation From) : From(From) { + } + }; + Preprocessor &PP; ///< Used to find inclusion directives. + SourceManager &SM; ///< Used to read and manage source files. + raw_ostream &OS; ///< The destination stream for rewritten contents. + bool ShowLineMarkers; ///< Show #line markers. + bool UseLineDirective; ///< Use of line directives or line markers. + typedef std::map FileChangeMap; + FileChangeMap FileChanges; /// Tracks which files were included where. + /// Used transitively for building up the FileChanges mapping over the + /// various \c PPCallbacks callbacks. + FileChangeMap::iterator LastInsertedFileChange; +public: + InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers); + bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType); +private: + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID); + virtual void FileSkipped(const FileEntry &ParentFile, + const Token &FilenameTok, + SrcMgr::CharacteristicKind FileType); + virtual void InclusionDirective(SourceLocation HashLoc, + const Token &IncludeTok, + StringRef FileName, + bool IsAngled, + const FileEntry *File, + SourceLocation EndLoc, + StringRef SearchPath, + StringRef RelativePath); + void WriteLineInfo(const char *Filename, int Line, + SrcMgr::CharacteristicKind FileType, + StringRef EOL, StringRef Extra = StringRef()); + void OutputContentUpTo(const MemoryBuffer &FromFile, + unsigned &WriteFrom, unsigned WriteTo, + StringRef EOL, int &lines, + bool EnsureNewline = false); + void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken, + const MemoryBuffer &FromFile, StringRef EOL, + unsigned &NextToWrite, int &Lines); + const FileChange *FindFileChangeLocation(SourceLocation Loc) const; + StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken); +}; + +} // end anonymous namespace + +/// Initializes an InclusionRewriter with a \p PP source and \p OS destination. +InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS, + bool ShowLineMarkers) + : PP(PP), SM(PP.getSourceManager()), OS(OS), + ShowLineMarkers(ShowLineMarkers), + LastInsertedFileChange(FileChanges.end()) { + // If we're in microsoft mode, use normal #line instead of line markers. + UseLineDirective = PP.getLangOpts().MicrosoftExt; +} + +/// Write appropriate line information as either #line directives or GNU line +/// markers depending on what mode we're in, including the \p Filename and +/// \p Line we are located at, using the specified \p EOL line separator, and +/// any \p Extra context specifiers in GNU line directives. +void InclusionRewriter::WriteLineInfo(const char *Filename, int Line, + SrcMgr::CharacteristicKind FileType, + StringRef EOL, StringRef Extra) { + if (!ShowLineMarkers) + return; + if (UseLineDirective) { + OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"'; + } else { + // Use GNU linemarkers as described here: + // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html + OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"'; + if (!Extra.empty()) + OS << Extra; + if (FileType == SrcMgr::C_System) + // "`3' This indicates that the following text comes from a system header + // file, so certain warnings should be suppressed." + OS << " 3"; + else if (FileType == SrcMgr::C_ExternCSystem) + // as above for `3', plus "`4' This indicates that the following text + // should be treated as being wrapped in an implicit extern "C" block." + OS << " 3 4"; + } + OS << EOL; +} + +/// FileChanged - Whenever the preprocessor enters or exits a #include file +/// it invokes this handler. +void InclusionRewriter::FileChanged(SourceLocation Loc, + FileChangeReason Reason, + SrcMgr::CharacteristicKind NewFileType, + FileID) { + if (Reason != EnterFile) + return; + if (LastInsertedFileChange == FileChanges.end()) + // we didn't reach this file (eg: the main file) via an inclusion directive + return; + LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID(); + LastInsertedFileChange->second.FileType = NewFileType; + LastInsertedFileChange = FileChanges.end(); +} + +/// Called whenever an inclusion is skipped due to canonical header protection +/// macros. +void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/, + const Token &/*FilenameTok*/, + SrcMgr::CharacteristicKind /*FileType*/) { + assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't " + "found via an inclusion directive, was skipped"); + FileChanges.erase(LastInsertedFileChange); + LastInsertedFileChange = FileChanges.end(); +} + +/// This should be called whenever the preprocessor encounters include +/// directives. It does not say whether the file has been included, but it +/// provides more information about the directive (hash location instead +/// of location inside the included file). It is assumed that the matching +/// FileChanged() or FileSkipped() is called after this. +void InclusionRewriter::InclusionDirective(SourceLocation HashLoc, + const Token &/*IncludeTok*/, + StringRef /*FileName*/, + bool /*IsAngled*/, + const FileEntry * /*File*/, + SourceLocation /*EndLoc*/, + StringRef /*SearchPath*/, + StringRef /*RelativePath*/) { + assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion " + "directive was found before the previous one was processed"); + std::pair p = FileChanges.insert( + std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc))); + assert(p.second && "Unexpected revisitation of the same include directive"); + LastInsertedFileChange = p.first; +} + +/// Simple lookup for a SourceLocation (specifically one denoting the hash in +/// an inclusion directive) in the map of inclusion information, FileChanges. +const InclusionRewriter::FileChange * +InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const { + FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding()); + if (I != FileChanges.end()) + return &I->second; + return NULL; +} + +/// Detect the likely line ending style of \p FromFile by examining the first +/// newline found within it. +static StringRef DetectEOL(const MemoryBuffer &FromFile) { + // detect what line endings the file uses, so that added content does not mix + // the style + const char *Pos = strchr(FromFile.getBufferStart(), '\n'); + if (Pos == NULL) + return "\n"; + if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') + return "\n\r"; + if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') + return "\r\n"; + return "\n"; +} + +/// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at +/// \p WriteTo - 1. +void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile, + unsigned &WriteFrom, unsigned WriteTo, + StringRef EOL, int &Line, + bool EnsureNewline) { + if (WriteTo <= WriteFrom) + return; + OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom); + // count lines manually, it's faster than getPresumedLoc() + Line += std::count(FromFile.getBufferStart() + WriteFrom, + FromFile.getBufferStart() + WriteTo, '\n'); + if (EnsureNewline) { + char LastChar = FromFile.getBufferStart()[WriteTo - 1]; + if (LastChar != '\n' && LastChar != '\r') + OS << EOL; + } + WriteFrom = WriteTo; +} + +/// Print characters from \p FromFile starting at \p NextToWrite up until the +/// inclusion directive at \p StartToken, then print out the inclusion +/// inclusion directive disabled by a #if directive, updating \p NextToWrite +/// and \p Line to track the number of source lines visited and the progress +/// through the \p FromFile buffer. +void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex, + const Token &StartToken, + const MemoryBuffer &FromFile, + StringRef EOL, + unsigned &NextToWrite, int &Line) { + OutputContentUpTo(FromFile, NextToWrite, + SM.getFileOffset(StartToken.getLocation()), EOL, Line); + Token DirectiveToken; + do { + DirectiveLex.LexFromRawLexer(DirectiveToken); + } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof)); + OS << "#if 0 /* expanded by -frewrite-includes */" << EOL; + OutputContentUpTo(FromFile, NextToWrite, + SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(), + EOL, Line); + OS << "#endif /* expanded by -frewrite-includes */" << EOL; +} + +/// Find the next identifier in the pragma directive specified by \p RawToken. +StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex, + Token &RawToken) { + RawLex.LexFromRawLexer(RawToken); + if (RawToken.is(tok::raw_identifier)) + PP.LookUpIdentifierInfo(RawToken); + if (RawToken.is(tok::identifier)) + return RawToken.getIdentifierInfo()->getName(); + return StringRef(); +} + +/// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it +/// and including content of included files recursively. +bool InclusionRewriter::Process(FileID FileId, + SrcMgr::CharacteristicKind FileType) +{ + bool Invalid; + const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid); + if (Invalid) // invalid inclusion + return true; + const char *FileName = FromFile.getBufferIdentifier(); + Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts()); + RawLex.SetCommentRetentionState(false); + + StringRef EOL = DetectEOL(FromFile); + + // Per the GNU docs: "1" indicates the start of a new file. + WriteLineInfo(FileName, 1, FileType, EOL, " 1"); + + if (SM.getFileIDSize(FileId) == 0) + return true; + + // The next byte to be copied from the source file + unsigned NextToWrite = 0; + int Line = 1; // The current input file line number. + + Token RawToken; + RawLex.LexFromRawLexer(RawToken); + + // TODO: Consider adding a switch that strips possibly unimportant content, + // such as comments, to reduce the size of repro files. + while (RawToken.isNot(tok::eof)) { + if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) { + RawLex.setParsingPreprocessorDirective(true); + Token HashToken = RawToken; + RawLex.LexFromRawLexer(RawToken); + if (RawToken.is(tok::raw_identifier)) + PP.LookUpIdentifierInfo(RawToken); + if (RawToken.is(tok::identifier)) { + switch (RawToken.getIdentifierInfo()->getPPKeywordID()) { + case tok::pp_include: + case tok::pp_include_next: + case tok::pp_import: { + CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite, + Line); + if (const FileChange *Change = FindFileChangeLocation( + HashToken.getLocation())) { + // now include and recursively process the file + if (Process(Change->Id, Change->FileType)) + // and set lineinfo back to this file, if the nested one was + // actually included + // `2' indicates returning to a file (after having included + // another file. + WriteLineInfo(FileName, Line, FileType, EOL, " 2"); + } else + // fix up lineinfo (since commented out directive changed line + // numbers) for inclusions that were skipped due to header guards + WriteLineInfo(FileName, Line, FileType, EOL); + break; + } + case tok::pp_pragma: { + StringRef Identifier = NextIdentifierName(RawLex, RawToken); + if (Identifier == "clang" || Identifier == "GCC") { + if (NextIdentifierName(RawLex, RawToken) == "system_header") { + // keep the directive in, commented out + CommentOutDirective(RawLex, HashToken, FromFile, EOL, + NextToWrite, Line); + // update our own type + FileType = SM.getFileCharacteristic(RawToken.getLocation()); + WriteLineInfo(FileName, Line, FileType, EOL); + } + } else if (Identifier == "once") { + // keep the directive in, commented out + CommentOutDirective(RawLex, HashToken, FromFile, EOL, + NextToWrite, Line); + WriteLineInfo(FileName, Line, FileType, EOL); + } + break; + } + default: + break; + } + } + RawLex.setParsingPreprocessorDirective(false); + } + RawLex.LexFromRawLexer(RawToken); + } + OutputContentUpTo(FromFile, NextToWrite, + SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line, + /*EnsureNewline*/true); + return true; +} + +/// InclusionRewriterInInput - Implement -frewrite-includes mode. +void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS, + const PreprocessorOutputOptions &Opts) { + SourceManager &SM = PP.getSourceManager(); + InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS, + Opts.ShowLineMarkers); + PP.addPPCallbacks(Rewrite); + + // First let the preprocessor process the entire file and call callbacks. + // Callbacks will record which #include's were actually performed. + PP.EnterMainSourceFile(); + Token Tok; + // Only preprocessor directives matter here, so disable macro expansion + // everywhere else as an optimization. + // TODO: It would be even faster if the preprocessor could be switched + // to a mode where it would parse only preprocessor directives and comments, + // nothing else matters for parsing or processing. + PP.SetMacroExpansionOnlyInDirectives(); + do { + PP.Lex(Tok); + } while (Tok.isNot(tok::eof)); + Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User); + OS->flush(); +} diff --git a/lib/Rewrite/RewriteModernObjC.cpp b/lib/Rewrite/RewriteModernObjC.cpp index 94fba64..9f42fca 100644 --- a/lib/Rewrite/RewriteModernObjC.cpp +++ b/lib/Rewrite/RewriteModernObjC.cpp @@ -102,7 +102,6 @@ namespace { FunctionDecl *CFStringFunctionDecl; FunctionDecl *SuperContructorFunctionDecl; FunctionDecl *CurFunctionDef; - FunctionDecl *CurFunctionDeclToDeclareForBlock; /* Misc. containers needed for meta-data rewrite. */ SmallVector ClassImplementation; @@ -110,7 +109,7 @@ namespace { llvm::SmallPtrSet ObjCSynthesizedStructs; llvm::SmallPtrSet ObjCSynthesizedProtocols; llvm::SmallPtrSet ObjCWrittenInterfaces; - llvm::SmallPtrSet TagsDefinedInIvarDecls; + llvm::SmallPtrSet GlobalDefinedTags; SmallVector ObjCInterfacesSeen; /// DefinedNonLazyClasses - List of defined "non-lazy" classes. SmallVector DefinedNonLazyClasses; @@ -304,9 +303,12 @@ namespace { void RewriteFunctionDecl(FunctionDecl *FD); void RewriteBlockPointerType(std::string& Str, QualType Type); void RewriteBlockPointerTypeVariable(std::string& Str, ValueDecl *VD); + void RewriteBlockLiteralFunctionDecl(FunctionDecl *FD); void RewriteObjCQualifiedInterfaceTypes(Decl *Dcl); void RewriteTypeOfDecl(VarDecl *VD); void RewriteObjCQualifiedInterfaceTypes(Expr *E); + + std::string getIvarAccessString(ObjCIvarDecl *D); // Expression Rewriting. Stmt *RewriteFunctionBodyOrGlobalInitializer(Stmt *S); @@ -317,11 +319,12 @@ namespace { Stmt *RewriteMessageExpr(ObjCMessageExpr *Exp); Stmt *RewriteObjCStringLiteral(ObjCStringLiteral *Exp); Stmt *RewriteObjCBoolLiteralExpr(ObjCBoolLiteralExpr *Exp); - Stmt *RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp); + Stmt *RewriteObjCBoxedExpr(ObjCBoxedExpr *Exp); Stmt *RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp); Stmt *RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral *Exp); Stmt *RewriteObjCProtocolExpr(ObjCProtocolExpr *Exp); Stmt *RewriteObjCTryStmt(ObjCAtTryStmt *S); + Stmt *RewriteObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S); Stmt *RewriteObjCSynchronizedStmt(ObjCAtSynchronizedStmt *S); Stmt *RewriteObjCThrowStmt(ObjCAtThrowStmt *S); Stmt *RewriteObjCForCollectionStmt(ObjCForCollectionStmt *S, @@ -337,7 +340,7 @@ namespace { // Block specific rewrite rules. void RewriteBlockPointerDecl(NamedDecl *VD); - void RewriteByRefVar(VarDecl *VD); + void RewriteByRefVar(VarDecl *VD, bool firstDecl, bool lastDecl); Stmt *RewriteBlockDeclRefExpr(DeclRefExpr *VD); Stmt *RewriteLocalVariableExternalStorage(DeclRefExpr *DRE); void RewriteBlockPointerFunctionArgs(FunctionDecl *FD); @@ -346,6 +349,10 @@ namespace { std::string &Result); void RewriteObjCFieldDecl(FieldDecl *fieldDecl, std::string &Result); + bool IsTagDefinedInsideClass(ObjCContainerDecl *IDecl, TagDecl *Tag, + bool &IsNamedDefinition); + void RewriteLocallyDefinedNamedAggregates(FieldDecl *fieldDecl, + std::string &Result); bool RewriteObjCFieldDeclType(QualType &Type, std::string &Result); @@ -354,12 +361,19 @@ namespace { virtual void Initialize(ASTContext &context); - // Misc. AST transformation routines. Somtimes they end up calling + // Misc. AST transformation routines. Sometimes they end up calling // rewriting routines on the new ASTs. CallExpr *SynthesizeCallToFunctionDecl(FunctionDecl *FD, Expr **args, unsigned nargs, SourceLocation StartLoc=SourceLocation(), SourceLocation EndLoc=SourceLocation()); + + Expr *SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavor, + QualType msgSendType, + QualType returnType, + SmallVectorImpl &ArgTypes, + SmallVectorImpl &MsgExprs, + ObjCMethodDecl *Method); Stmt *SynthMessageExpr(ObjCMessageExpr *Exp, SourceLocation StartLoc=SourceLocation(), @@ -387,23 +401,23 @@ namespace { std::string &Result); void RewriteObjCProtocolMetaData(ObjCProtocolDecl *Protocol, std::string &Result); - virtual void RewriteObjCProtocolListMetaData( + void RewriteObjCProtocolListMetaData( const ObjCList &Prots, StringRef prefix, StringRef ClassName, std::string &Result); - virtual void RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl, + void RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl, std::string &Result); - virtual void RewriteClassSetupInitHook(std::string &Result); + void RewriteClassSetupInitHook(std::string &Result); - virtual void RewriteMetaDataIntoBuffer(std::string &Result); - virtual void WriteImageInfo(std::string &Result); - virtual void RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *CDecl, + void RewriteMetaDataIntoBuffer(std::string &Result); + void WriteImageInfo(std::string &Result); + void RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *CDecl, std::string &Result); - virtual void RewriteCategorySetupInitHook(std::string &Result); + void RewriteCategorySetupInitHook(std::string &Result); // Rewriting ivar - virtual void RewriteIvarOffsetComputation(ObjCIvarDecl *ivar, + void RewriteIvarOffsetComputation(ObjCIvarDecl *ivar, std::string &Result); - virtual Stmt *RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV); + Stmt *RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV); std::string SynthesizeByrefCopyDestroyHelper(VarDecl *VD, int flag); @@ -622,7 +636,6 @@ void RewriteModernObjC::InitializeCommon(ASTContext &context) { NSStringRecord = 0; CurMethodDef = 0; CurFunctionDef = 0; - CurFunctionDeclToDeclareForBlock = 0; GlobalVarDecl = 0; GlobalConstructionExp = 0; SuperStructDecl = 0; @@ -768,29 +781,104 @@ void RewriteModernObjC::RewriteInclude() { } } -static std::string getIvarAccessString(ObjCIvarDecl *OID) { - const ObjCInterfaceDecl *ClassDecl = OID->getContainingInterface(); - std::string S; - S = "((struct "; - S += ClassDecl->getIdentifier()->getName(); - S += "_IMPL *)self)->"; - S += OID->getName(); +static void WriteInternalIvarName(const ObjCInterfaceDecl *IDecl, + ObjCIvarDecl *IvarDecl, std::string &Result) { + Result += "OBJC_IVAR_$_"; + Result += IDecl->getName(); + Result += "$"; + Result += IvarDecl->getName(); +} + +std::string +RewriteModernObjC::getIvarAccessString(ObjCIvarDecl *D) { + const ObjCInterfaceDecl *ClassDecl = D->getContainingInterface(); + + // Build name of symbol holding ivar offset. + std::string IvarOffsetName; + WriteInternalIvarName(ClassDecl, D, IvarOffsetName); + + + std::string S = "(*("; + QualType IvarT = D->getType(); + + if (!isa(IvarT) && IvarT->isRecordType()) { + RecordDecl *RD = IvarT->getAs()->getDecl(); + RD = RD->getDefinition(); + if (RD && !RD->getDeclName().getAsIdentifierInfo()) { + // decltype(((Foo_IMPL*)0)->bar) * + ObjCContainerDecl *CDecl = + dyn_cast(D->getDeclContext()); + // ivar in class extensions requires special treatment. + if (ObjCCategoryDecl *CatDecl = dyn_cast(CDecl)) + CDecl = CatDecl->getClassInterface(); + std::string RecName = CDecl->getName(); + RecName += "_IMPL"; + RecordDecl *RD = RecordDecl::Create(*Context, TTK_Struct, TUDecl, + SourceLocation(), SourceLocation(), + &Context->Idents.get(RecName.c_str())); + QualType PtrStructIMPL = Context->getPointerType(Context->getTagDeclType(RD)); + unsigned UnsignedIntSize = + static_cast(Context->getTypeSize(Context->UnsignedIntTy)); + Expr *Zero = IntegerLiteral::Create(*Context, + llvm::APInt(UnsignedIntSize, 0), + Context->UnsignedIntTy, SourceLocation()); + Zero = NoTypeInfoCStyleCastExpr(Context, PtrStructIMPL, CK_BitCast, Zero); + ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), + Zero); + FieldDecl *FD = FieldDecl::Create(*Context, 0, SourceLocation(), + SourceLocation(), + &Context->Idents.get(D->getNameAsString()), + IvarT, 0, + /*BitWidth=*/0, /*Mutable=*/true, + ICIS_NoInit); + MemberExpr *ME = new (Context) MemberExpr(PE, true, FD, SourceLocation(), + FD->getType(), VK_LValue, + OK_Ordinary); + IvarT = Context->getDecltypeType(ME, ME->getType()); + } + } + convertObjCTypeToCStyleType(IvarT); + QualType castT = Context->getPointerType(IvarT); + std::string TypeString(castT.getAsString(Context->getPrintingPolicy())); + S += TypeString; + S += ")"; + + // ((char *)self + IVAR_OFFSET_SYMBOL_NAME) + S += "((char *)self + "; + S += IvarOffsetName; + S += "))"; + ReferencedIvars[const_cast(ClassDecl)].insert(D); return S; } +/// mustSynthesizeSetterGetterMethod - returns true if setter or getter has not +/// been found in the class implementation. In this case, it must be synthesized. +static bool mustSynthesizeSetterGetterMethod(ObjCImplementationDecl *IMP, + ObjCPropertyDecl *PD, + bool getter) { + return getter ? !IMP->getInstanceMethod(PD->getGetterName()) + : !IMP->getInstanceMethod(PD->getSetterName()); + +} + void RewriteModernObjC::RewritePropertyImplDecl(ObjCPropertyImplDecl *PID, ObjCImplementationDecl *IMD, ObjCCategoryImplDecl *CID) { static bool objcGetPropertyDefined = false; static bool objcSetPropertyDefined = false; - SourceLocation startLoc = PID->getLocStart(); - InsertText(startLoc, "// "); - const char *startBuf = SM->getCharacterData(startLoc); - assert((*startBuf == '@') && "bogus @synthesize location"); - const char *semiBuf = strchr(startBuf, ';'); - assert((*semiBuf == ';') && "@synthesize: can't find ';'"); - SourceLocation onePastSemiLoc = - startLoc.getLocWithOffset(semiBuf-startBuf+1); + SourceLocation startGetterSetterLoc; + + if (PID->getLocStart().isValid()) { + SourceLocation startLoc = PID->getLocStart(); + InsertText(startLoc, "// "); + const char *startBuf = SM->getCharacterData(startLoc); + assert((*startBuf == '@') && "bogus @synthesize location"); + const char *semiBuf = strchr(startBuf, ';'); + assert((*semiBuf == ';') && "@synthesize: can't find ';'"); + startGetterSetterLoc = startLoc.getLocWithOffset(semiBuf-startBuf+1); + } + else + startGetterSetterLoc = IMD ? IMD->getLocEnd() : CID->getLocEnd(); if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) return; // FIXME: is this correct? @@ -802,7 +890,7 @@ void RewriteModernObjC::RewritePropertyImplDecl(ObjCPropertyImplDecl *PID, if (!OID) return; unsigned Attributes = PD->getPropertyAttributes(); - if (!PD->getGetterMethodDecl()->isDefined()) { + if (mustSynthesizeSetterGetterMethod(IMD, PD, true /*getter*/)) { bool GenGetProperty = !(Attributes & ObjCPropertyDecl::OBJC_PR_nonatomic) && (Attributes & (ObjCPropertyDecl::OBJC_PR_retain | ObjCPropertyDecl::OBJC_PR_copy)); @@ -854,10 +942,11 @@ void RewriteModernObjC::RewritePropertyImplDecl(ObjCPropertyImplDecl *PID, else Getr += "return " + getIvarAccessString(OID); Getr += "; }"; - InsertText(onePastSemiLoc, Getr); + InsertText(startGetterSetterLoc, Getr); } - if (PD->isReadOnly() || PD->getSetterMethodDecl()->isDefined()) + if (PD->isReadOnly() || + !mustSynthesizeSetterGetterMethod(IMD, PD, false /*setter*/)) return; // Generate the 'setter' function. @@ -895,8 +984,8 @@ void RewriteModernObjC::RewritePropertyImplDecl(ObjCPropertyImplDecl *PID, Setr += getIvarAccessString(OID) + " = "; Setr += PD->getName(); } - Setr += "; }"; - InsertText(onePastSemiLoc, Setr); + Setr += "; }\n"; + InsertText(startGetterSetterLoc, Setr); } static void RewriteOneForwardClassDecl(ObjCInterfaceDecl *ForwardDecl, @@ -985,17 +1074,13 @@ void RewriteModernObjC::RewriteCategoryDecl(ObjCCategoryDecl *CatDecl) { SourceLocation LocStart = CatDecl->getLocStart(); // FIXME: handle category headers that are declared across multiple lines. - ReplaceText(LocStart, 0, "// "); - if (CatDecl->getIvarLBraceLoc().isValid()) - InsertText(CatDecl->getIvarLBraceLoc(), "// "); - for (ObjCCategoryDecl::ivar_iterator - I = CatDecl->ivar_begin(), E = CatDecl->ivar_end(); I != E; ++I) { - ObjCIvarDecl *Ivar = (*I); - SourceLocation LocStart = Ivar->getLocStart(); + if (CatDecl->getIvarRBraceLoc().isValid()) { + ReplaceText(LocStart, 1, "/** "); + ReplaceText(CatDecl->getIvarRBraceLoc(), 1, "**/ "); + } + else { ReplaceText(LocStart, 0, "// "); - } - if (CatDecl->getIvarRBraceLoc().isValid()) - InsertText(CatDecl->getIvarRBraceLoc(), "// "); + } for (ObjCCategoryDecl::prop_iterator I = CatDecl->prop_begin(), E = CatDecl->prop_end(); I != E; ++I) @@ -1221,17 +1306,13 @@ void RewriteModernObjC::RewriteImplementationDecl(Decl *OID) { ObjCCategoryImplDecl *CID = dyn_cast(OID); if (IMD) { - InsertText(IMD->getLocStart(), "// "); - if (IMD->getIvarLBraceLoc().isValid()) - InsertText(IMD->getIvarLBraceLoc(), "// "); - for (ObjCImplementationDecl::ivar_iterator - I = IMD->ivar_begin(), E = IMD->ivar_end(); I != E; ++I) { - ObjCIvarDecl *Ivar = (*I); - SourceLocation LocStart = Ivar->getLocStart(); - ReplaceText(LocStart, 0, "// "); + if (IMD->getIvarRBraceLoc().isValid()) { + ReplaceText(IMD->getLocStart(), 1, "/** "); + ReplaceText(IMD->getIvarRBraceLoc(), 1, "**/ "); + } + else { + InsertText(IMD->getLocStart(), "// "); } - if (IMD->getIvarRBraceLoc().isValid()) - InsertText(IMD->getIvarRBraceLoc(), "// "); } else InsertText(CID->getLocStart(), "// "); @@ -1808,6 +1889,15 @@ void RewriteModernObjC::WarnAboutReturnGotoStmts(Stmt *S) return; } +Stmt *RewriteModernObjC::RewriteObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S) { + SourceLocation startLoc = S->getAtLoc(); + ReplaceText(startLoc, strlen("@autoreleasepool"), "/* @autoreleasepool */"); + ReplaceText(S->getSubStmt()->getLocStart(), 1, + "{ __AtAutoreleasePool __autoreleasepool; "); + + return 0; +} + Stmt *RewriteModernObjC::RewriteObjCTryStmt(ObjCAtTryStmt *S) { ObjCAtFinallyStmt *finalStmt = S->getFinallyStmt(); bool noCatch = S->getNumCatchStmts() == 0; @@ -2245,6 +2335,32 @@ void RewriteModernObjC::RewriteBlockPointerTypeVariable(std::string& Str, } } +void RewriteModernObjC::RewriteBlockLiteralFunctionDecl(FunctionDecl *FD) { + SourceLocation FunLocStart = FD->getTypeSpecStartLoc(); + const FunctionType *funcType = FD->getType()->getAs(); + const FunctionProtoType *proto = dyn_cast(funcType); + if (!proto) + return; + QualType Type = proto->getResultType(); + std::string FdStr = Type.getAsString(Context->getPrintingPolicy()); + FdStr += " "; + FdStr += FD->getName(); + FdStr += "("; + unsigned numArgs = proto->getNumArgs(); + for (unsigned i = 0; i < numArgs; i++) { + QualType ArgType = proto->getArgType(i); + RewriteBlockPointerType(FdStr, ArgType); + if (i+1 < numArgs) + FdStr += ", "; + } + if (FD->isVariadic()) { + FdStr += (numArgs > 0) ? ", ...);\n" : "...);\n"; + } + else + FdStr += ");\n"; + InsertText(FunLocStart, FdStr); +} + // SynthSuperContructorFunctionDecl - id __rw_objc_super(id obj, id super); void RewriteModernObjC::SynthSuperContructorFunctionDecl() { if (SuperContructorFunctionDecl) @@ -2362,12 +2478,12 @@ void RewriteModernObjC::SynthMsgSendFpretFunctionDecl() { SC_None, false); } -// SynthGetClassFunctionDecl - id objc_getClass(const char *name); +// SynthGetClassFunctionDecl - Class objc_getClass(const char *name); void RewriteModernObjC::SynthGetClassFunctionDecl() { IdentifierInfo *getClassIdent = &Context->Idents.get("objc_getClass"); SmallVector ArgTys; ArgTys.push_back(Context->getPointerType(Context->CharTy.withConst())); - QualType getClassType = getSimpleFunctionType(Context->getObjCIdType(), + QualType getClassType = getSimpleFunctionType(Context->getObjCClassType(), &ArgTys[0], ArgTys.size()); GetClassFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), @@ -2395,12 +2511,12 @@ void RewriteModernObjC::SynthGetSuperClassFunctionDecl() { false); } -// SynthGetMetaClassFunctionDecl - id objc_getMetaClass(const char *name); +// SynthGetMetaClassFunctionDecl - Class objc_getMetaClass(const char *name); void RewriteModernObjC::SynthGetMetaClassFunctionDecl() { IdentifierInfo *getClassIdent = &Context->Idents.get("objc_getMetaClass"); SmallVector ArgTys; ArgTys.push_back(Context->getPointerType(Context->CharTy.withConst())); - QualType getClassType = getSimpleFunctionType(Context->getObjCIdType(), + QualType getClassType = getSimpleFunctionType(Context->getObjCClassType(), &ArgTys[0], ArgTys.size()); GetMetaClassFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), @@ -2471,7 +2587,7 @@ Stmt *RewriteModernObjC::RewriteObjCBoolLiteralExpr(ObjCBoolLiteralExpr *Exp) { return PE; } -Stmt *RewriteModernObjC::RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp) { +Stmt *RewriteModernObjC::RewriteObjCBoxedExpr(ObjCBoxedExpr *Exp) { // synthesize declaration of helper functions needed in this routine. if (!SelGetUidFunctionDecl) SynthSelGetUidFunctionDecl(); @@ -2489,13 +2605,12 @@ Stmt *RewriteModernObjC::RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp) SmallVector MsgExprs; SmallVector ClsExprs; QualType argType = Context->getPointerType(Context->CharTy); - QualType expType = Exp->getType(); - // Create a call to objc_getClass("NSNumber"). It will be th 1st argument. - ObjCInterfaceDecl *Class = - expType->getPointeeType()->getAs()->getInterface(); + // Create a call to objc_getClass(""). It will be the 1st argument. + ObjCMethodDecl *BoxingMethod = Exp->getBoxingMethod(); + ObjCInterfaceDecl *BoxingClass = BoxingMethod->getClassInterface(); - IdentifierInfo *clsName = Class->getIdentifier(); + IdentifierInfo *clsName = BoxingClass->getIdentifier(); ClsExprs.push_back(StringLiteral::Create(*Context, clsName->getName(), StringLiteral::Ascii, false, @@ -2506,12 +2621,11 @@ Stmt *RewriteModernObjC::RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp) StartLoc, EndLoc); MsgExprs.push_back(Cls); - // Create a call to sel_registerName("numberWithBool:"), etc. + // Create a call to sel_registerName(":"), etc. // it will be the 2nd argument. SmallVector SelExprs; - ObjCMethodDecl *NumericMethod = Exp->getObjCNumericLiteralMethod(); SelExprs.push_back(StringLiteral::Create(*Context, - NumericMethod->getSelector().getAsString(), + BoxingMethod->getSelector().getAsString(), StringLiteral::Ascii, false, argType, SourceLocation())); CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl, @@ -2519,25 +2633,25 @@ Stmt *RewriteModernObjC::RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp) StartLoc, EndLoc); MsgExprs.push_back(SelExp); - // User provided numeric literal is the 3rd, and last, argument. - Expr *userExpr = Exp->getNumber(); - if (ImplicitCastExpr *ICE = dyn_cast(userExpr)) { + // User provided sub-expression is the 3rd, and last, argument. + Expr *subExpr = Exp->getSubExpr(); + if (ImplicitCastExpr *ICE = dyn_cast(subExpr)) { QualType type = ICE->getType(); const Expr *SubExpr = ICE->IgnoreParenImpCasts(); CastKind CK = CK_BitCast; if (SubExpr->getType()->isIntegralType(*Context) && type->isBooleanType()) CK = CK_IntegralToBoolean; - userExpr = NoTypeInfoCStyleCastExpr(Context, type, CK, userExpr); + subExpr = NoTypeInfoCStyleCastExpr(Context, type, CK, subExpr); } - MsgExprs.push_back(userExpr); + MsgExprs.push_back(subExpr); SmallVector ArgTypes; ArgTypes.push_back(Context->getObjCIdType()); ArgTypes.push_back(Context->getObjCSelType()); - for (ObjCMethodDecl::param_iterator PI = NumericMethod->param_begin(), - E = NumericMethod->param_end(); PI != E; ++PI) + for (ObjCMethodDecl::param_iterator PI = BoxingMethod->param_begin(), + E = BoxingMethod->param_end(); PI != E; ++PI) ArgTypes.push_back((*PI)->getType()); - + QualType returnType = Exp->getType(); // Get the type, we will need to reference it in a couple spots. QualType msgSendType = MsgSendFlavor->getType(); @@ -2547,13 +2661,13 @@ Stmt *RewriteModernObjC::RewriteObjCNumericLiteralExpr(ObjCNumericLiteral *Exp) VK_LValue, SourceLocation()); CastExpr *cast = NoTypeInfoCStyleCastExpr(Context, - Context->getPointerType(Context->VoidTy), - CK_BitCast, DRE); + Context->getPointerType(Context->VoidTy), + CK_BitCast, DRE); // Now do the "normal" pointer to function cast. QualType castType = - getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), - NumericMethod->isVariadic()); + getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), + BoxingMethod->isVariadic()); castType = Context->getPointerType(castType); cast = NoTypeInfoCStyleCastExpr(Context, castType, CK_BitCast, cast); @@ -2613,7 +2727,7 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) { &Context->Idents.get("arr"), Context->getPointerType(Context->VoidPtrTy), 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *ArrayLiteralME = new (Context) MemberExpr(NSArrayCallExpr, false, ARRFD, SourceLocation(), @@ -2760,7 +2874,7 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral &Context->Idents.get("arr"), Context->getPointerType(Context->VoidPtrTy), 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *DictLiteralValueME = new (Context) MemberExpr(NSValueCallExpr, false, ARRFD, SourceLocation(), @@ -2907,7 +3021,7 @@ QualType RewriteModernObjC::getSuperStructType() { FieldTypes[i], 0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false)); + ICIS_NoInit)); } SuperStructDecl->completeDefinition(); @@ -2940,7 +3054,7 @@ QualType RewriteModernObjC::getConstantStringStructType() { FieldTypes[i], 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false)); + ICIS_NoInit)); } ConstantStringDecl->completeDefinition(); @@ -2948,6 +3062,112 @@ QualType RewriteModernObjC::getConstantStringStructType() { return Context->getTagDeclType(ConstantStringDecl); } +/// getFunctionSourceLocation - returns start location of a function +/// definition. Complication arises when function has declared as +/// extern "C" or extern "C" {...} +static SourceLocation getFunctionSourceLocation (RewriteModernObjC &R, + FunctionDecl *FD) { + if (FD->isExternC() && !FD->isMain()) { + const DeclContext *DC = FD->getDeclContext(); + if (const LinkageSpecDecl *LSD = dyn_cast(DC)) + // if it is extern "C" {...}, return function decl's own location. + if (!LSD->getRBraceLoc().isValid()) + return LSD->getExternLoc(); + } + if (FD->getStorageClassAsWritten() != SC_None) + R.RewriteBlockLiteralFunctionDecl(FD); + return FD->getTypeSpecStartLoc(); +} + +/// SynthMsgSendStretCallExpr - This routine translates message expression +/// into a call to objc_msgSend_stret() entry point. Tricky part is that +/// nil check on receiver must be performed before calling objc_msgSend_stret. +/// MsgSendStretFlavor - function declaration objc_msgSend_stret(...) +/// msgSendType - function type of objc_msgSend_stret(...) +/// returnType - Result type of the method being synthesized. +/// ArgTypes - type of the arguments passed to objc_msgSend_stret, starting with receiver type. +/// MsgExprs - list of argument expressions being passed to objc_msgSend_stret, +/// starting with receiver. +/// Method - Method being rewritten. +Expr *RewriteModernObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavor, + QualType msgSendType, + QualType returnType, + SmallVectorImpl &ArgTypes, + SmallVectorImpl &MsgExprs, + ObjCMethodDecl *Method) { + // Now do the "normal" pointer to function cast. + QualType castType = getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), + Method ? Method->isVariadic() : false); + castType = Context->getPointerType(castType); + + // build type for containing the objc_msgSend_stret object. + static unsigned stretCount=0; + std::string name = "__Stret"; name += utostr(stretCount); + std::string str = + "extern \"C\" void * __cdecl memset(void *_Dst, int _Val, size_t _Size);\n"; + str += "struct "; str += name; + str += " {\n\t"; + str += name; + str += "(id receiver, SEL sel"; + for (unsigned i = 2; i < ArgTypes.size(); i++) { + std::string ArgName = "arg"; ArgName += utostr(i); + ArgTypes[i].getAsStringInternal(ArgName, Context->getPrintingPolicy()); + str += ", "; str += ArgName; + } + // could be vararg. + for (unsigned i = ArgTypes.size(); i < MsgExprs.size(); i++) { + std::string ArgName = "arg"; ArgName += utostr(i); + MsgExprs[i]->getType().getAsStringInternal(ArgName, + Context->getPrintingPolicy()); + str += ", "; str += ArgName; + } + + str += ") {\n"; + str += "\t if (receiver == 0)\n"; + str += "\t memset((void*)&s, 0, sizeof(s));\n"; + str += "\t else\n"; + str += "\t s = (("; str += castType.getAsString(Context->getPrintingPolicy()); + str += ")(void *)objc_msgSend_stret)(receiver, sel"; + for (unsigned i = 2; i < ArgTypes.size(); i++) { + str += ", arg"; str += utostr(i); + } + // could be vararg. + for (unsigned i = ArgTypes.size(); i < MsgExprs.size(); i++) { + str += ", arg"; str += utostr(i); + } + + str += ");\n"; + str += "\t}\n"; + str += "\t"; str += returnType.getAsString(Context->getPrintingPolicy()); + str += " s;\n"; + str += "};\n\n"; + SourceLocation FunLocStart = getFunctionSourceLocation(*this, CurFunctionDef); + InsertText(FunLocStart, str); + ++stretCount; + + // AST for __Stretn(receiver, args).s; + IdentifierInfo *ID = &Context->Idents.get(name); + FunctionDecl *FD = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), + SourceLocation(), ID, castType, 0, SC_Extern, + SC_None, false, false); + DeclRefExpr *DRE = new (Context) DeclRefExpr(FD, false, castType, VK_RValue, + SourceLocation()); + CallExpr *STCE = new (Context) CallExpr(*Context, DRE, &MsgExprs[0], MsgExprs.size(), + castType, VK_LValue, SourceLocation()); + + FieldDecl *FieldD = FieldDecl::Create(*Context, 0, SourceLocation(), + SourceLocation(), + &Context->Idents.get("s"), + returnType, 0, + /*BitWidth=*/0, /*Mutable=*/true, + ICIS_NoInit); + MemberExpr *ME = new (Context) MemberExpr(STCE, false, FieldD, SourceLocation(), + FieldD->getType(), VK_LValue, + OK_Ordinary); + + return ME; +} + Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, SourceLocation StartLoc, SourceLocation EndLoc) { @@ -3013,17 +3233,14 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, ClassDecl->getIdentifier()->getName(), StringLiteral::Ascii, false, argType, SourceLocation())); + // (Class)objc_getClass("CurrentClass") CallExpr *Cls = SynthesizeCallToFunctionDecl(GetMetaClassFunctionDecl, &ClsExprs[0], ClsExprs.size(), StartLoc, EndLoc); - // (Class)objc_getClass("CurrentClass") - CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context, - Context->getObjCClassType(), - CK_BitCast, Cls); ClsExprs.clear(); - ClsExprs.push_back(ArgExpr); + ClsExprs.push_back(Cls); Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, &ClsExprs[0], ClsExprs.size(), StartLoc, EndLoc); @@ -3096,7 +3313,10 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, &ClsExprs[0], ClsExprs.size(), StartLoc, EndLoc); - MsgExprs.push_back(Cls); + CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context, + Context->getObjCIdType(), + CK_BitCast, Cls); + MsgExprs.push_back(ArgExpr); break; } @@ -3124,16 +3344,13 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, ClassDecl->getIdentifier()->getName(), StringLiteral::Ascii, false, argType, SourceLocation())); + // (Class)objc_getClass("CurrentClass") CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, &ClsExprs[0], ClsExprs.size(), StartLoc, EndLoc); - // (Class)objc_getClass("CurrentClass") - CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context, - Context->getObjCClassType(), - CK_BitCast, Cls); ClsExprs.clear(); - ClsExprs.push_back(ArgExpr); + ClsExprs.push_back(Cls); Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, &ClsExprs[0], ClsExprs.size(), StartLoc, EndLoc); @@ -3339,29 +3556,10 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp, // expression which dictate which one to envoke depending on size of // method's return type. - // Create a reference to the objc_msgSend_stret() declaration. - DeclRefExpr *STDRE = new (Context) DeclRefExpr(MsgSendStretFlavor, - false, msgSendType, - VK_LValue, SourceLocation()); - // Need to cast objc_msgSend_stret to "void *" (see above comment). - cast = NoTypeInfoCStyleCastExpr(Context, - Context->getPointerType(Context->VoidTy), - CK_BitCast, STDRE); - // Now do the "normal" pointer to function cast. - castType = getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), - Exp->getMethodDecl() ? Exp->getMethodDecl()->isVariadic() : false); - castType = Context->getPointerType(castType); - cast = NoTypeInfoCStyleCastExpr(Context, castType, CK_BitCast, - cast); - - // Don't forget the parens to enforce the proper binding. - PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), cast); - - FT = msgSendType->getAs(); - CallExpr *STCE = new (Context) CallExpr(*Context, PE, &MsgExprs[0], - MsgExprs.size(), - FT->getResultType(), VK_RValue, - SourceLocation()); + Expr *STCE = SynthMsgSendStretCallExpr(MsgSendStretFlavor, + msgSendType, returnType, + ArgTypes, MsgExprs, + Exp->getMethodDecl()); // Build sizeof(returnType) UnaryExprOrTypeTraitExpr *sizeofExpr = @@ -3471,10 +3669,44 @@ bool RewriteModernObjC::BufferContainsPPDirectives(const char *startBuf, return false; } +/// IsTagDefinedInsideClass - This routine checks that a named tagged type +/// is defined inside an objective-c class. If so, it returns true. +bool RewriteModernObjC::IsTagDefinedInsideClass(ObjCContainerDecl *IDecl, + TagDecl *Tag, + bool &IsNamedDefinition) { + if (!IDecl) + return false; + SourceLocation TagLocation; + if (RecordDecl *RD = dyn_cast(Tag)) { + RD = RD->getDefinition(); + if (!RD || !RD->getDeclName().getAsIdentifierInfo()) + return false; + IsNamedDefinition = true; + TagLocation = RD->getLocation(); + return Context->getSourceManager().isBeforeInTranslationUnit( + IDecl->getLocation(), TagLocation); + } + if (EnumDecl *ED = dyn_cast(Tag)) { + if (!ED || !ED->getDeclName().getAsIdentifierInfo()) + return false; + IsNamedDefinition = true; + TagLocation = ED->getLocation(); + return Context->getSourceManager().isBeforeInTranslationUnit( + IDecl->getLocation(), TagLocation); + + } + return false; +} + /// RewriteObjCFieldDeclType - This routine rewrites a type into the buffer. /// It handles elaborated types, as well as enum types in the process. bool RewriteModernObjC::RewriteObjCFieldDeclType(QualType &Type, std::string &Result) { + if (isa(Type)) { + Result += "\t"; + return false; + } + if (Type->isArrayType()) { QualType ElemTy = Context->getBaseElementType(Type); return RewriteObjCFieldDeclType(ElemTy, Result); @@ -3490,12 +3722,11 @@ bool RewriteModernObjC::RewriteObjCFieldDeclType(QualType &Type, assert(false && "class not allowed as an ivar type"); Result += RD->getName(); - if (TagsDefinedInIvarDecls.count(RD)) { - // This struct is already defined. Do not write its definition again. + if (GlobalDefinedTags.count(RD)) { + // struct/union is defined globally, use it. Result += " "; return true; } - TagsDefinedInIvarDecls.insert(RD); Result += " {\n"; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i) { @@ -3511,12 +3742,11 @@ bool RewriteModernObjC::RewriteObjCFieldDeclType(QualType &Type, if (ED->isCompleteDefinition()) { Result += "\n\tenum "; Result += ED->getName(); - if (TagsDefinedInIvarDecls.count(ED)) { - // This enum is already defined. Do not write its definition again. + if (GlobalDefinedTags.count(ED)) { + // Enum is globall defined, use it. Result += " "; return true; } - TagsDefinedInIvarDecls.insert(ED); Result += " {\n"; for (EnumDecl::enumerator_iterator EC = ED->enumerator_begin(), @@ -3567,6 +3797,41 @@ void RewriteModernObjC::RewriteObjCFieldDecl(FieldDecl *fieldDecl, Result += ";\n"; } +/// RewriteLocallyDefinedNamedAggregates - This routine rewrites locally defined +/// named aggregate types into the input buffer. +void RewriteModernObjC::RewriteLocallyDefinedNamedAggregates(FieldDecl *fieldDecl, + std::string &Result) { + QualType Type = fieldDecl->getType(); + if (isa(Type)) + return; + if (Type->isArrayType()) + Type = Context->getBaseElementType(Type); + ObjCContainerDecl *IDecl = + dyn_cast(fieldDecl->getDeclContext()); + + TagDecl *TD = 0; + if (Type->isRecordType()) { + TD = Type->getAs()->getDecl(); + } + else if (Type->isEnumeralType()) { + TD = Type->getAs()->getDecl(); + } + + if (TD) { + if (GlobalDefinedTags.count(TD)) + return; + + bool IsNamedDefinition = false; + if (IsTagDefinedInsideClass(IDecl, TD, IsNamedDefinition)) { + RewriteObjCFieldDeclType(Type, Result); + Result += ";"; + } + if (IsNamedDefinition) + GlobalDefinedTags.insert(TD); + } + +} + /// RewriteObjCInternalStruct - Rewrite one internal struct corresponding to /// an objective-c class with ivars. void RewriteModernObjC::RewriteObjCInternalStruct(ObjCInterfaceDecl *CDecl, @@ -3595,6 +3860,12 @@ void RewriteModernObjC::RewriteObjCInternalStruct(ObjCInterfaceDecl *CDecl, return; } + // Insert named struct/union definitions inside class to + // outer scope. This follows semantics of locally defined + // struct/unions in objective-c classes. + for (unsigned i = 0, e = IVars.size(); i < e; i++) + RewriteLocallyDefinedNamedAggregates(IVars[i], Result); + Result += "\nstruct "; Result += CDecl->getNameAsString(); Result += "_IMPL {\n"; @@ -3604,7 +3875,7 @@ void RewriteModernObjC::RewriteObjCInternalStruct(ObjCInterfaceDecl *CDecl, Result += "_IMPL "; Result += RCDecl->getNameAsString(); Result += "_IVARS;\n"; } - TagsDefinedInIvarDecls.clear(); + for (unsigned i = 0, e = IVars.size(); i < e; i++) RewriteObjCFieldDecl(IVars[i], Result); @@ -3616,14 +3887,6 @@ void RewriteModernObjC::RewriteObjCInternalStruct(ObjCInterfaceDecl *CDecl, llvm_unreachable("struct already synthesize- RewriteObjCInternalStruct"); } -static void WriteInternalIvarName(ObjCInterfaceDecl *IDecl, - ObjCIvarDecl *IvarDecl, std::string &Result) { - Result += "OBJC_IVAR_$_"; - Result += IDecl->getName(); - Result += "$"; - Result += IvarDecl->getName(); -} - /// RewriteIvarOffsetSymbols - Rewrite ivar offset symbols of those ivars which /// have been referenced in an ivar access expression. void RewriteModernObjC::RewriteIvarOffsetSymbols(ObjCInterfaceDecl *CDecl, @@ -3961,8 +4224,8 @@ std::string RewriteModernObjC::SynthesizeBlockDescriptor(std::string DescTag, unsigned hasCopy) { std::string S = "\nstatic struct " + DescTag; - S += " {\n unsigned long reserved;\n"; - S += " unsigned long Block_size;\n"; + S += " {\n size_t reserved;\n"; + S += " size_t Block_size;\n"; if (hasCopy) { S += " void (*copy)(struct "; S += ImplTag; S += "*, struct "; @@ -3983,23 +4246,6 @@ std::string RewriteModernObjC::SynthesizeBlockDescriptor(std::string DescTag, return S; } -/// getFunctionSourceLocation - returns start location of a function -/// definition. Complication arises when function has declared as -/// extern "C" or extern "C" {...} -static SourceLocation getFunctionSourceLocation (FunctionDecl *FD) { - if (!FD->isExternC() || FD->isMain()) - return FD->getTypeSpecStartLoc(); - const DeclContext *DC = FD->getDeclContext(); - if (const LinkageSpecDecl *LSD = dyn_cast(DC)) { - SourceLocation BodyRBrace = LSD->getRBraceLoc(); - // if it is extern "C" {...}, return function decl's own location. - if (BodyRBrace.isValid()) - return FD->getTypeSpecStartLoc(); - return LSD->getExternLoc(); - } - return FD->getTypeSpecStartLoc(); -} - void RewriteModernObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, StringRef FunName) { bool RewriteSC = (GlobalVarDecl && @@ -4110,7 +4356,9 @@ void RewriteModernObjC::SynthesizeBlockLiterals(SourceLocation FunLocStart, } void RewriteModernObjC::InsertBlockLiteralsWithinFunction(FunctionDecl *FD) { - SourceLocation FunLocStart = getFunctionSourceLocation(FD); + SourceLocation FunLocStart = + (!Blocks.empty()) ? getFunctionSourceLocation(*this, FD) + : FD->getTypeSpecStartLoc(); StringRef FuncName = FD->getName(); SynthesizeBlockLiterals(FunLocStart, FuncName); @@ -4320,7 +4568,7 @@ Stmt *RewriteModernObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp &Context->Idents.get("FuncPtr"), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *ME = new (Context) MemberExpr(PE, true, FD, SourceLocation(), FD->getType(), VK_LValue, OK_Ordinary); @@ -4369,7 +4617,7 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) { &Context->Idents.get("__forwarding"), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *ME = new (Context) MemberExpr(DeclRefExp, isArrow, FD, SourceLocation(), FD->getType(), VK_LValue, @@ -4380,7 +4628,7 @@ Stmt *RewriteModernObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) { &Context->Idents.get(Name), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); ME = new (Context) MemberExpr(ME, true, FD, SourceLocation(), DeclRefExp->getType(), VK_LValue, OK_Ordinary); @@ -4719,7 +4967,8 @@ std::string RewriteModernObjC::SynthesizeByrefCopyDestroyHelper(VarDecl *VD, /// ND=initializer-if-any}; /// /// -void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { +void RewriteModernObjC::RewriteByRefVar(VarDecl *ND, bool firstDecl, + bool lastDecl) { int flag = 0; int isa = 0; SourceLocation DeclLoc = ND->getTypeSpecStartLoc(); @@ -4758,17 +5007,17 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { // Insert this type in global scope. It is needed by helper function. SourceLocation FunLocStart; if (CurFunctionDef) - FunLocStart = getFunctionSourceLocation(CurFunctionDef); + FunLocStart = getFunctionSourceLocation(*this, CurFunctionDef); else { assert(CurMethodDef && "RewriteByRefVar - CurMethodDef is null"); FunLocStart = CurMethodDef->getLocStart(); } InsertText(FunLocStart, ByrefType); + if (Ty.isObjCGCWeak()) { flag |= BLOCK_FIELD_IS_WEAK; isa = 1; } - if (HasCopyAndDispose) { flag = BLOCK_BYREF_CALLER; QualType Ty = ND->getType(); @@ -4788,8 +5037,13 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { bool hasInit = (ND->getInit() != 0); // FIXME. rewriter does not support __block c++ objects which // require construction. - if (hasInit && dyn_cast(ND->getInit())) - hasInit = false; + if (hasInit) + if (CXXConstructExpr *CExp = dyn_cast(ND->getInit())) { + CXXConstructorDecl *CXXDecl = CExp->getConstructor(); + if (CXXDecl && CXXDecl->isDefaultConstructor()) + hasInit = false; + } + unsigned flags = 0; if (HasCopyAndDispose) flags |= BLOCK_HAS_COPY_DISPOSE; @@ -4798,21 +5052,36 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { RewriteByRefString(ByrefType, Name, ND); std::string ForwardingCastType("("); ForwardingCastType += ByrefType + " *)"; + ByrefType += " " + Name + " = {(void*)"; + ByrefType += utostr(isa); + ByrefType += "," + ForwardingCastType + "&" + Name + ", "; + ByrefType += utostr(flags); + ByrefType += ", "; + ByrefType += "sizeof("; + RewriteByRefString(ByrefType, Name, ND); + ByrefType += ")"; + if (HasCopyAndDispose) { + ByrefType += ", __Block_byref_id_object_copy_"; + ByrefType += utostr(flag); + ByrefType += ", __Block_byref_id_object_dispose_"; + ByrefType += utostr(flag); + } + + if (!firstDecl) { + // In multiple __block declarations, and for all but 1st declaration, + // find location of the separating comma. This would be start location + // where new text is to be inserted. + DeclLoc = ND->getLocation(); + const char *startDeclBuf = SM->getCharacterData(DeclLoc); + const char *commaBuf = startDeclBuf; + while (*commaBuf != ',') + commaBuf--; + assert((*commaBuf == ',') && "RewriteByRefVar: can't find ','"); + DeclLoc = DeclLoc.getLocWithOffset(commaBuf - startDeclBuf); + startBuf = commaBuf; + } + if (!hasInit) { - ByrefType += " " + Name + " = {(void*)"; - ByrefType += utostr(isa); - ByrefType += "," + ForwardingCastType + "&" + Name + ", "; - ByrefType += utostr(flags); - ByrefType += ", "; - ByrefType += "sizeof("; - RewriteByRefString(ByrefType, Name, ND); - ByrefType += ")"; - if (HasCopyAndDispose) { - ByrefType += ", __Block_byref_id_object_copy_"; - ByrefType += utostr(flag); - ByrefType += ", __Block_byref_id_object_dispose_"; - ByrefType += utostr(flag); - } ByrefType += "};\n"; unsigned nameSize = Name.size(); // for block or function pointer declaration. Name is aleady @@ -4822,6 +5091,7 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { ReplaceText(DeclLoc, endBuf-startBuf+nameSize, ByrefType); } else { + ByrefType += ", "; SourceLocation startLoc; Expr *E = ND->getInit(); if (const CStyleCastExpr *ECE = dyn_cast(E)) @@ -4830,39 +5100,17 @@ void RewriteModernObjC::RewriteByRefVar(VarDecl *ND) { startLoc = E->getLocStart(); startLoc = SM->getExpansionLoc(startLoc); endBuf = SM->getCharacterData(startLoc); - ByrefType += " " + Name; - ByrefType += " = {(void*)"; - ByrefType += utostr(isa); - ByrefType += "," + ForwardingCastType + "&" + Name + ", "; - ByrefType += utostr(flags); - ByrefType += ", "; - ByrefType += "sizeof("; - RewriteByRefString(ByrefType, Name, ND); - ByrefType += "), "; - if (HasCopyAndDispose) { - ByrefType += "__Block_byref_id_object_copy_"; - ByrefType += utostr(flag); - ByrefType += ", __Block_byref_id_object_dispose_"; - ByrefType += utostr(flag); - ByrefType += ", "; - } ReplaceText(DeclLoc, endBuf-startBuf, ByrefType); - - // Complete the newly synthesized compound expression by inserting a right - // curly brace before the end of the declaration. - // FIXME: This approach avoids rewriting the initializer expression. It - // also assumes there is only one declarator. For example, the following - // isn't currently supported by this routine (in general): - // - // double __block BYREFVAR = 1.34, BYREFVAR2 = 1.37; - // - const char *startInitializerBuf = SM->getCharacterData(startLoc); - const char *semiBuf = strchr(startInitializerBuf, ';'); - assert((*semiBuf == ';') && "RewriteByRefVar: can't find ';'"); - SourceLocation semiLoc = - startLoc.getLocWithOffset(semiBuf-startInitializerBuf); - InsertText(semiLoc, "}"); + const char separator = lastDecl ? ';' : ','; + const char *startInitializerBuf = SM->getCharacterData(startLoc); + const char *separatorBuf = strchr(startInitializerBuf, separator); + assert((*separatorBuf == separator) && + "RewriteByRefVar: can't find ';' or ','"); + SourceLocation separatorLoc = + startLoc.getLocWithOffset(separatorBuf-startInitializerBuf); + + InsertText(separatorLoc, lastDecl ? "}" : "};\n"); } return; } @@ -5214,8 +5462,8 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) { if (ObjCBoolLiteralExpr *BoolLitExpr = dyn_cast(S)) return RewriteObjCBoolLiteralExpr(BoolLitExpr); - if (ObjCNumericLiteral *NumericLitExpr = dyn_cast(S)) - return RewriteObjCNumericLiteralExpr(NumericLitExpr); + if (ObjCBoxedExpr *BoxedExpr = dyn_cast(S)) + return RewriteObjCBoxedExpr(BoxedExpr); if (ObjCArrayLiteral *ArrayLitExpr = dyn_cast(S)) return RewriteObjCArrayLiteralExpr(ArrayLitExpr); @@ -5247,6 +5495,11 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) { return RewriteMessageExpr(MessExpr); } + if (ObjCAutoreleasePoolStmt *StmtAutoRelease = + dyn_cast(S)) { + return RewriteObjCAutoreleasePoolStmt(StmtAutoRelease); + } + if (ObjCAtTryStmt *StmtTry = dyn_cast(S)) return RewriteObjCTryStmt(StmtTry); @@ -5300,7 +5553,7 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) { assert(!BlockByRefDeclNo.count(ND) && "RewriteFunctionBodyOrGlobalInitializer: Duplicate byref decl"); BlockByRefDeclNo[ND] = uniqueByrefDeclCount++; - RewriteByRefVar(VD); + RewriteByRefVar(VD, (DI == DS->decl_begin()), ((DI+1) == DE)); } else RewriteTypeOfDecl(VD); @@ -5402,7 +5655,6 @@ void RewriteModernObjC::HandleDeclInMainFile(Decl *D) { // FIXME: If this should support Obj-C++, support CXXTryStmt if (CompoundStmt *Body = dyn_cast_or_null(FD->getBody())) { CurFunctionDef = FD; - CurFunctionDeclToDeclareForBlock = FD; CurrentBody = Body; Body = cast_or_null(RewriteFunctionBodyOrGlobalInitializer(Body)); @@ -5416,7 +5668,6 @@ void RewriteModernObjC::HandleDeclInMainFile(Decl *D) { // and any copy/dispose helper functions. InsertBlockLiteralsWithinFunction(FD); CurFunctionDef = 0; - CurFunctionDeclToDeclareForBlock = 0; } break; } @@ -5515,7 +5766,7 @@ static void Write_ProtocolExprReferencedMetadata(ASTContext *Context, std::string &Result) { // Also output .objc_protorefs$B section and its meta-data. if (Context->getLangOpts().MicrosoftExt) - Result += "__declspec(allocate(\".objc_protorefs$B\")) "; + Result += "static "; Result += "struct _protocol_t *"; Result += "_OBJC_PROTOCOL_REFERENCE_$_"; Result += PDecl->getNameAsString(); @@ -5539,6 +5790,10 @@ void RewriteModernObjC::HandleTranslationUnit(ASTContext &C) { } InsertText(SM->getLocForStartOfFile(MainFileID), Preamble, false); + + if (ClassImplementation.size() || CategoryImplementation.size()) + RewriteImplementations(); + for (unsigned i = 0, e = ObjCInterfacesSeen.size(); i < e; i++) { ObjCInterfaceDecl *CDecl = ObjCInterfacesSeen[i]; // Write struct declaration for the class matching its ivar declarations. @@ -5547,9 +5802,6 @@ void RewriteModernObjC::HandleTranslationUnit(ASTContext &C) { // private ivars. RewriteInterfaceDecl(CDecl); } - - if (ClassImplementation.size() || CategoryImplementation.size()) - RewriteImplementations(); // Get the buffer corresponding to MainFileID. If we haven't changed it, then // we are done. @@ -5605,7 +5857,6 @@ void RewriteModernObjC::Initialize(ASTContext &context) { Preamble += "#pragma section(\".objc_imageinfo$B\", long, read, write)\n"; Preamble += "#pragma section(\".objc_nlclslist$B\", long, read, write)\n"; Preamble += "#pragma section(\".objc_nlcatlist$B\", long, read, write)\n"; - Preamble += "#pragma section(\".objc_protorefs$B\", long, read, write)\n"; // These are generated but not necessary for functionality. Preamble += "#pragma section(\".cat_cls_meth$B\", long, read, write)\n"; Preamble += "#pragma section(\".inst_meth$B\", long, read, write)\n"; @@ -5636,11 +5887,11 @@ void RewriteModernObjC::Initialize(ASTContext &context) { Preamble += "__OBJC_RW_DLLIMPORT void objc_msgSendSuper_stret(void);\n"; Preamble += "__OBJC_RW_DLLIMPORT void objc_msgSend_fpret(void);\n"; - Preamble += "__OBJC_RW_DLLIMPORT struct objc_object *objc_getClass"; + Preamble += "__OBJC_RW_DLLIMPORT struct objc_class *objc_getClass"; Preamble += "(const char *);\n"; Preamble += "__OBJC_RW_DLLIMPORT struct objc_class *class_getSuperclass"; Preamble += "(struct objc_class *);\n"; - Preamble += "__OBJC_RW_DLLIMPORT struct objc_object *objc_getMetaClass"; + Preamble += "__OBJC_RW_DLLIMPORT struct objc_class *objc_getMetaClass"; Preamble += "(const char *);\n"; Preamble += "__OBJC_RW_DLLIMPORT void objc_exception_throw( struct objc_object *);\n"; // @synchronized hooks. @@ -5723,11 +5974,20 @@ void RewriteModernObjC::Initialize(ASTContext &context) { Preamble += "\t arr[i] = va_arg(marker, void *);\n"; Preamble += "\tva_end( marker );\n"; Preamble += " };\n"; - Preamble += " __NSContainer_literal() {\n"; + Preamble += " ~__NSContainer_literal() {\n"; Preamble += "\tdelete[] arr;\n"; Preamble += " }\n"; Preamble += "};\n"; + // Declaration required for implementation of @autoreleasepool statement. + Preamble += "extern \"C\" __declspec(dllimport) void * objc_autoreleasePoolPush(void);\n"; + Preamble += "extern \"C\" __declspec(dllimport) void objc_autoreleasePoolPop(void *);\n\n"; + Preamble += "struct __AtAutoreleasePool {\n"; + Preamble += " __AtAutoreleasePool() {atautoreleasepoolobj = objc_autoreleasePoolPush();}\n"; + Preamble += " ~__AtAutoreleasePool() {objc_autoreleasePoolPop(atautoreleasepoolobj);}\n"; + Preamble += " void * atautoreleasepoolobj;\n"; + Preamble += "};\n"; + // NOTE! Windows uses LLP64 for 64bit mode. So, cast pointer to long long // as this avoids warning in any 64bit/32bit compilation model. Preamble += "\n#define __OFFSETOFIVAR__(TYPE, MEMBER) ((long long) &((TYPE *)0)->MEMBER)\n"; @@ -6738,20 +6998,20 @@ void RewriteModernObjC::RewriteObjCClassMetaData(ObjCImplementationDecl *IDecl, for (ObjCImplDecl::propimpl_iterator Prop = IDecl->propimpl_begin(), PropEnd = IDecl->propimpl_end(); Prop != PropEnd; ++Prop) { - if ((*Prop)->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) + if (Prop->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) continue; - if (!(*Prop)->getPropertyIvarDecl()) + if (!Prop->getPropertyIvarDecl()) continue; - ObjCPropertyDecl *PD = (*Prop)->getPropertyDecl(); + ObjCPropertyDecl *PD = Prop->getPropertyDecl(); if (!PD) continue; if (ObjCMethodDecl *Getter = PD->getGetterMethodDecl()) - if (!Getter->isDefined()) + if (mustSynthesizeSetterGetterMethod(IDecl, PD, true /*getter*/)) InstanceMethods.push_back(Getter); if (PD->isReadOnly()) continue; if (ObjCMethodDecl *Setter = PD->getSetterMethodDecl()) - if (!Setter->isDefined()) + if (mustSynthesizeSetterGetterMethod(IDecl, PD, false /*setter*/)) InstanceMethods.push_back(Setter); } @@ -7002,11 +7262,11 @@ void RewriteModernObjC::RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *IDecl, for (ObjCImplDecl::propimpl_iterator Prop = IDecl->propimpl_begin(), PropEnd = IDecl->propimpl_end(); Prop != PropEnd; ++Prop) { - if ((*Prop)->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) + if (Prop->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) continue; - if (!(*Prop)->getPropertyIvarDecl()) + if (!Prop->getPropertyIvarDecl()) continue; - ObjCPropertyDecl *PD = (*Prop)->getPropertyDecl(); + ObjCPropertyDecl *PD = Prop->getPropertyDecl(); if (!PD) continue; if (ObjCMethodDecl *Getter = PD->getGetterMethodDecl()) @@ -7053,7 +7313,7 @@ void RewriteModernObjC::RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *IDecl, ClassProperties.push_back(*I); Write_prop_list_t_initializer(*this, Context, Result, ClassProperties, - /* Container */0, + /* Container */IDecl, "_OBJC_$_PROP_LIST_", FullCategoryName); @@ -7189,7 +7449,7 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) { if (BaseExpr->getType()->isObjCObjectPointerType()) { const ObjCInterfaceType *iFaceDecl = - dyn_cast(BaseExpr->getType()->getPointeeType()); + dyn_cast(BaseExpr->getType()->getPointeeType()); assert(iFaceDecl && "RewriteObjCIvarRefExpr - iFaceDecl is null"); // lookup which class implements the instance variable. ObjCInterfaceDecl *clsDeclared = 0; @@ -7223,13 +7483,52 @@ Stmt *RewriteModernObjC::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) { SourceLocation(), addExpr); QualType IvarT = D->getType(); + + if (!isa(IvarT) && IvarT->isRecordType()) { + RecordDecl *RD = IvarT->getAs()->getDecl(); + RD = RD->getDefinition(); + if (RD && !RD->getDeclName().getAsIdentifierInfo()) { + // decltype(((Foo_IMPL*)0)->bar) * + ObjCContainerDecl *CDecl = + dyn_cast(D->getDeclContext()); + // ivar in class extensions requires special treatment. + if (ObjCCategoryDecl *CatDecl = dyn_cast(CDecl)) + CDecl = CatDecl->getClassInterface(); + std::string RecName = CDecl->getName(); + RecName += "_IMPL"; + RecordDecl *RD = RecordDecl::Create(*Context, TTK_Struct, TUDecl, + SourceLocation(), SourceLocation(), + &Context->Idents.get(RecName.c_str())); + QualType PtrStructIMPL = Context->getPointerType(Context->getTagDeclType(RD)); + unsigned UnsignedIntSize = + static_cast(Context->getTypeSize(Context->UnsignedIntTy)); + Expr *Zero = IntegerLiteral::Create(*Context, + llvm::APInt(UnsignedIntSize, 0), + Context->UnsignedIntTy, SourceLocation()); + Zero = NoTypeInfoCStyleCastExpr(Context, PtrStructIMPL, CK_BitCast, Zero); + ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), + Zero); + FieldDecl *FD = FieldDecl::Create(*Context, 0, SourceLocation(), + SourceLocation(), + &Context->Idents.get(D->getNameAsString()), + IvarT, 0, + /*BitWidth=*/0, /*Mutable=*/true, + ICIS_NoInit); + MemberExpr *ME = new (Context) MemberExpr(PE, true, FD, SourceLocation(), + FD->getType(), VK_LValue, + OK_Ordinary); + IvarT = Context->getDecltypeType(ME, ME->getType()); + } + } convertObjCTypeToCStyleType(IvarT); QualType castT = Context->getPointerType(IvarT); - + castExpr = NoTypeInfoCStyleCastExpr(Context, castT, CK_BitCast, PE); + + Expr *Exp = new (Context) UnaryOperator(castExpr, UO_Deref, IvarT, VK_LValue, OK_Ordinary, SourceLocation()); diff --git a/lib/Rewrite/RewriteObjC.cpp b/lib/Rewrite/RewriteObjC.cpp index 9c0737f..425cd77 100644 --- a/lib/Rewrite/RewriteObjC.cpp +++ b/lib/Rewrite/RewriteObjC.cpp @@ -349,13 +349,18 @@ namespace { virtual void RewriteIvarOffsetComputation(ObjCIvarDecl *ivar, std::string &Result) = 0; - // Misc. AST transformation routines. Somtimes they end up calling + // Misc. AST transformation routines. Sometimes they end up calling // rewriting routines on the new ASTs. CallExpr *SynthesizeCallToFunctionDecl(FunctionDecl *FD, Expr **args, unsigned nargs, SourceLocation StartLoc=SourceLocation(), SourceLocation EndLoc=SourceLocation()); - + CallExpr *SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavor, + QualType msgSendType, + QualType returnType, + SmallVectorImpl &ArgTypes, + SmallVectorImpl &MsgExprs, + ObjCMethodDecl *Method); Stmt *SynthMessageExpr(ObjCMessageExpr *Exp, SourceLocation StartLoc=SourceLocation(), SourceLocation EndLoc=SourceLocation()); @@ -2592,7 +2597,7 @@ QualType RewriteObjC::getSuperStructType() { FieldTypes[i], 0, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false)); + ICIS_NoInit)); } SuperStructDecl->completeDefinition(); @@ -2625,7 +2630,7 @@ QualType RewriteObjC::getConstantStringStructType() { FieldTypes[i], 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false)); + ICIS_NoInit)); } ConstantStringDecl->completeDefinition(); @@ -2633,6 +2638,40 @@ QualType RewriteObjC::getConstantStringStructType() { return Context->getTagDeclType(ConstantStringDecl); } +CallExpr *RewriteObjC::SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavor, + QualType msgSendType, + QualType returnType, + SmallVectorImpl &ArgTypes, + SmallVectorImpl &MsgExprs, + ObjCMethodDecl *Method) { + // Create a reference to the objc_msgSend_stret() declaration. + DeclRefExpr *STDRE = new (Context) DeclRefExpr(MsgSendStretFlavor, + false, msgSendType, + VK_LValue, SourceLocation()); + // Need to cast objc_msgSend_stret to "void *" (see above comment). + CastExpr *cast = NoTypeInfoCStyleCastExpr(Context, + Context->getPointerType(Context->VoidTy), + CK_BitCast, STDRE); + // Now do the "normal" pointer to function cast. + QualType castType = getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), + Method ? Method->isVariadic() : false); + castType = Context->getPointerType(castType); + cast = NoTypeInfoCStyleCastExpr(Context, castType, CK_BitCast, + cast); + + // Don't forget the parens to enforce the proper binding. + ParenExpr *PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), cast); + + const FunctionType *FT = msgSendType->getAs(); + CallExpr *STCE = new (Context) CallExpr(*Context, PE, &MsgExprs[0], + MsgExprs.size(), + FT->getResultType(), VK_RValue, + SourceLocation()); + return STCE; + +} + + Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp, SourceLocation StartLoc, SourceLocation EndLoc) { @@ -3023,30 +3062,11 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp, // call to objc_msgSend_stret and hang both varieties on a conditional // expression which dictate which one to envoke depending on size of // method's return type. - - // Create a reference to the objc_msgSend_stret() declaration. - DeclRefExpr *STDRE = new (Context) DeclRefExpr(MsgSendStretFlavor, - false, msgSendType, - VK_LValue, SourceLocation()); - // Need to cast objc_msgSend_stret to "void *" (see above comment). - cast = NoTypeInfoCStyleCastExpr(Context, - Context->getPointerType(Context->VoidTy), - CK_BitCast, STDRE); - // Now do the "normal" pointer to function cast. - castType = getSimpleFunctionType(returnType, &ArgTypes[0], ArgTypes.size(), - Exp->getMethodDecl() ? Exp->getMethodDecl()->isVariadic() : false); - castType = Context->getPointerType(castType); - cast = NoTypeInfoCStyleCastExpr(Context, castType, CK_BitCast, - cast); - - // Don't forget the parens to enforce the proper binding. - PE = new (Context) ParenExpr(SourceLocation(), SourceLocation(), cast); - - FT = msgSendType->getAs(); - CallExpr *STCE = new (Context) CallExpr(*Context, PE, &MsgExprs[0], - MsgExprs.size(), - FT->getResultType(), VK_RValue, - SourceLocation()); + + CallExpr *STCE = SynthMsgSendStretCallExpr(MsgSendStretFlavor, + msgSendType, returnType, + ArgTypes, MsgExprs, + Exp->getMethodDecl()); // Build sizeof(returnType) UnaryExprOrTypeTraitExpr *sizeofExpr = @@ -3887,7 +3907,7 @@ Stmt *RewriteObjC::SynthesizeBlockCall(CallExpr *Exp, const Expr *BlockExp) { &Context->Idents.get("FuncPtr"), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *ME = new (Context) MemberExpr(PE, true, FD, SourceLocation(), FD->getType(), VK_LValue, OK_Ordinary); @@ -3936,7 +3956,7 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) { &Context->Idents.get("__forwarding"), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); MemberExpr *ME = new (Context) MemberExpr(DeclRefExp, isArrow, FD, SourceLocation(), FD->getType(), VK_LValue, @@ -3947,7 +3967,7 @@ Stmt *RewriteObjC::RewriteBlockDeclRefExpr(DeclRefExpr *DeclRefExp) { &Context->Idents.get(Name), Context->VoidPtrTy, 0, /*BitWidth=*/0, /*Mutable=*/true, - /*HasInit=*/false); + ICIS_NoInit); ME = new (Context) MemberExpr(ME, true, FD, SourceLocation(), DeclRefExp->getType(), VK_LValue, OK_Ordinary); @@ -5442,10 +5462,10 @@ void RewriteObjCFragileABI::RewriteObjCClassMetaData(ObjCImplementationDecl *IDe IVE = CDecl->ivar_end(); } Result += "\t,{{\""; - Result += (*IVI)->getNameAsString(); + Result += IVI->getNameAsString(); Result += "\", \""; std::string TmpString, StrEncoding; - Context->getObjCEncodingForType((*IVI)->getType(), TmpString, *IVI); + Context->getObjCEncodingForType(IVI->getType(), TmpString, *IVI); QuoteDoublequotes(TmpString, StrEncoding); Result += StrEncoding; Result += "\", "; @@ -5453,14 +5473,14 @@ void RewriteObjCFragileABI::RewriteObjCClassMetaData(ObjCImplementationDecl *IDe Result += "}\n"; for (++IVI; IVI != IVE; ++IVI) { Result += "\t ,{\""; - Result += (*IVI)->getNameAsString(); + Result += IVI->getNameAsString(); Result += "\", \""; std::string TmpString, StrEncoding; - Context->getObjCEncodingForType((*IVI)->getType(), TmpString, *IVI); + Context->getObjCEncodingForType(IVI->getType(), TmpString, *IVI); QuoteDoublequotes(TmpString, StrEncoding); Result += StrEncoding; Result += "\", "; - RewriteIvarOffsetComputation((*IVI), Result); + RewriteIvarOffsetComputation(*IVI, Result); Result += "}\n"; } @@ -5476,11 +5496,11 @@ void RewriteObjCFragileABI::RewriteObjCClassMetaData(ObjCImplementationDecl *IDe for (ObjCImplDecl::propimpl_iterator Prop = IDecl->propimpl_begin(), PropEnd = IDecl->propimpl_end(); Prop != PropEnd; ++Prop) { - if ((*Prop)->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) + if (Prop->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) continue; - if (!(*Prop)->getPropertyIvarDecl()) + if (!Prop->getPropertyIvarDecl()) continue; - ObjCPropertyDecl *PD = (*Prop)->getPropertyDecl(); + ObjCPropertyDecl *PD = Prop->getPropertyDecl(); if (!PD) continue; if (ObjCMethodDecl *Getter = PD->getGetterMethodDecl()) @@ -5761,11 +5781,11 @@ void RewriteObjCFragileABI::RewriteObjCCategoryImplDecl(ObjCCategoryImplDecl *ID for (ObjCImplDecl::propimpl_iterator Prop = IDecl->propimpl_begin(), PropEnd = IDecl->propimpl_end(); Prop != PropEnd; ++Prop) { - if ((*Prop)->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) + if (Prop->getPropertyImplementation() == ObjCPropertyImplDecl::Dynamic) continue; - if (!(*Prop)->getPropertyIvarDecl()) + if (!Prop->getPropertyIvarDecl()) continue; - ObjCPropertyDecl *PD = (*Prop)->getPropertyDecl(); + ObjCPropertyDecl *PD = Prop->getPropertyDecl(); if (!PD) continue; if (ObjCMethodDecl *Getter = PD->getGetterMethodDecl()) @@ -6015,4 +6035,3 @@ Stmt *RewriteObjCFragileABI::RewriteObjCIvarRefExpr(ObjCIvarRefExpr *IV) { ReplaceStmtWithRange(IV, Replacement, OldRange); return Replacement; } - diff --git a/lib/Rewrite/Rewriter.cpp b/lib/Rewrite/Rewriter.cpp index 43fb01b..7c27114 100644 --- a/lib/Rewrite/Rewriter.cpp +++ b/lib/Rewrite/Rewriter.cpp @@ -15,9 +15,12 @@ #include "clang/Rewrite/Rewriter.h" #include "clang/AST/Stmt.h" #include "clang/AST/Decl.h" -#include "clang/Lex/Lexer.h" +#include "clang/Basic/DiagnosticIDs.h" +#include "clang/Basic/FileManager.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/FileSystem.h" using namespace clang; raw_ostream &RewriteBuffer::write(raw_ostream &os) const { @@ -27,7 +30,7 @@ raw_ostream &RewriteBuffer::write(raw_ostream &os) const { } /// \brief Return true if this character is non-new-line whitespace: -/// ' ', '\t', '\f', '\v', '\r'. +/// ' ', '\\t', '\\f', '\\v', '\\r'. static inline bool isWhitespace(unsigned char c) { switch (c) { case ' ': @@ -412,3 +415,72 @@ bool Rewriter::IncreaseIndentation(CharSourceRange range, return false; } + +// A wrapper for a file stream that atomically overwrites the target. +// +// Creates a file output stream for a temporary file in the constructor, +// which is later accessible via getStream() if ok() return true. +// Flushes the stream and moves the temporary file to the target location +// in the destructor. +class AtomicallyMovedFile { +public: + AtomicallyMovedFile(DiagnosticsEngine &Diagnostics, StringRef Filename, + bool &AllWritten) + : Diagnostics(Diagnostics), Filename(Filename), AllWritten(AllWritten) { + TempFilename = Filename; + TempFilename += "-%%%%%%%%"; + int FD; + if (llvm::sys::fs::unique_file(TempFilename.str(), FD, TempFilename, + /*makeAbsolute=*/true, 0664)) { + AllWritten = false; + Diagnostics.Report(clang::diag::err_unable_to_make_temp) + << TempFilename; + } else { + FileStream.reset(new llvm::raw_fd_ostream(FD, /*shouldClose=*/true)); + } + } + + ~AtomicallyMovedFile() { + if (!ok()) return; + + FileStream->flush(); +#ifdef _WIN32 + // Win32 does not allow rename/removing opened files. + FileStream.reset(); +#endif + if (llvm::error_code ec = + llvm::sys::fs::rename(TempFilename.str(), Filename)) { + AllWritten = false; + Diagnostics.Report(clang::diag::err_unable_to_rename_temp) + << TempFilename << Filename << ec.message(); + bool existed; + // If the remove fails, there's not a lot we can do - this is already an + // error. + llvm::sys::fs::remove(TempFilename.str(), existed); + } + } + + bool ok() { return FileStream; } + llvm::raw_ostream &getStream() { return *FileStream; } + +private: + DiagnosticsEngine &Diagnostics; + StringRef Filename; + SmallString<128> TempFilename; + OwningPtr FileStream; + bool &AllWritten; +}; + +bool Rewriter::overwriteChangedFiles() { + bool AllWritten = true; + for (buffer_iterator I = buffer_begin(), E = buffer_end(); I != E; ++I) { + const FileEntry *Entry = + getSourceMgr().getFileEntryForID(I->first); + AtomicallyMovedFile File(getSourceMgr().getDiagnostics(), Entry->getName(), + AllWritten); + if (File.ok()) { + I->second.write(File.getStream()); + } + } + return !AllWritten; +} diff --git a/lib/Sema/AnalysisBasedWarnings.cpp b/lib/Sema/AnalysisBasedWarnings.cpp index a8e6791..19a7d6f 100644 --- a/lib/Sema/AnalysisBasedWarnings.cpp +++ b/lib/Sema/AnalysisBasedWarnings.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Lexer.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/ExprObjC.h" @@ -27,6 +28,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Analysis/AnalysisContext.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/Analyses/ReachableCode.h" @@ -42,7 +44,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include +#include #include +#include using namespace clang; @@ -185,6 +189,12 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) { continue; } } + if (isa(S)) { + // TODO: Verify this is correct. + HasFakeEdge = true; + HasLiveReturn = true; + continue; + } if (isa(S)) { HasAbnormalEdge = true; continue; @@ -438,9 +448,14 @@ static bool SuggestInitializationFixit(Sema &S, const VarDecl *VD) { return false; // Suggest possible initialization (if any). - const char *Init = S.getFixItZeroInitializerForType(VariableTy); - if (!Init) + std::string Init = S.getFixItZeroInitializerForType(VariableTy); + if (Init.empty()) return false; + + // Don't suggest a fixit inside macros. + if (VD->getLocEnd().isMacroID()) + return false; + SourceLocation Loc = S.PP.getLocForEndOfToken(VD->getLocEnd()); S.Diag(Loc, diag::note_var_fixit_add_initialization) << VD->getDeclName() @@ -448,82 +463,428 @@ static bool SuggestInitializationFixit(Sema &S, const VarDecl *VD) { return true; } +/// Create a fixit to remove an if-like statement, on the assumption that its +/// condition is CondVal. +static void CreateIfFixit(Sema &S, const Stmt *If, const Stmt *Then, + const Stmt *Else, bool CondVal, + FixItHint &Fixit1, FixItHint &Fixit2) { + if (CondVal) { + // If condition is always true, remove all but the 'then'. + Fixit1 = FixItHint::CreateRemoval( + CharSourceRange::getCharRange(If->getLocStart(), + Then->getLocStart())); + if (Else) { + SourceLocation ElseKwLoc = Lexer::getLocForEndOfToken( + Then->getLocEnd(), 0, S.getSourceManager(), S.getLangOpts()); + Fixit2 = FixItHint::CreateRemoval( + SourceRange(ElseKwLoc, Else->getLocEnd())); + } + } else { + // If condition is always false, remove all but the 'else'. + if (Else) + Fixit1 = FixItHint::CreateRemoval( + CharSourceRange::getCharRange(If->getLocStart(), + Else->getLocStart())); + else + Fixit1 = FixItHint::CreateRemoval(If->getSourceRange()); + } +} + +/// DiagUninitUse -- Helper function to produce a diagnostic for an +/// uninitialized use of a variable. +static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use, + bool IsCapturedByBlock) { + bool Diagnosed = false; + + // Diagnose each branch which leads to a sometimes-uninitialized use. + for (UninitUse::branch_iterator I = Use.branch_begin(), E = Use.branch_end(); + I != E; ++I) { + assert(Use.getKind() == UninitUse::Sometimes); + + const Expr *User = Use.getUser(); + const Stmt *Term = I->Terminator; + + // Information used when building the diagnostic. + unsigned DiagKind; + const char *Str; + SourceRange Range; + + // FixIts to suppress the diagnosic by removing the dead condition. + // For all binary terminators, branch 0 is taken if the condition is true, + // and branch 1 is taken if the condition is false. + int RemoveDiagKind = -1; + const char *FixitStr = + S.getLangOpts().CPlusPlus ? (I->Output ? "true" : "false") + : (I->Output ? "1" : "0"); + FixItHint Fixit1, Fixit2; + + switch (Term->getStmtClass()) { + default: + // Don't know how to report this. Just fall back to 'may be used + // uninitialized'. This happens for range-based for, which the user + // can't explicitly fix. + // FIXME: This also happens if the first use of a variable is always + // uninitialized, eg "for (int n; n < 10; ++n)". We should report that + // with the 'is uninitialized' diagnostic. + continue; + + // "condition is true / condition is false". + case Stmt::IfStmtClass: { + const IfStmt *IS = cast(Term); + DiagKind = 0; + Str = "if"; + Range = IS->getCond()->getSourceRange(); + RemoveDiagKind = 0; + CreateIfFixit(S, IS, IS->getThen(), IS->getElse(), + I->Output, Fixit1, Fixit2); + break; + } + case Stmt::ConditionalOperatorClass: { + const ConditionalOperator *CO = cast(Term); + DiagKind = 0; + Str = "?:"; + Range = CO->getCond()->getSourceRange(); + RemoveDiagKind = 0; + CreateIfFixit(S, CO, CO->getTrueExpr(), CO->getFalseExpr(), + I->Output, Fixit1, Fixit2); + break; + } + case Stmt::BinaryOperatorClass: { + const BinaryOperator *BO = cast(Term); + if (!BO->isLogicalOp()) + continue; + DiagKind = 0; + Str = BO->getOpcodeStr(); + Range = BO->getLHS()->getSourceRange(); + RemoveDiagKind = 0; + if ((BO->getOpcode() == BO_LAnd && I->Output) || + (BO->getOpcode() == BO_LOr && !I->Output)) + // true && y -> y, false || y -> y. + Fixit1 = FixItHint::CreateRemoval(SourceRange(BO->getLocStart(), + BO->getOperatorLoc())); + else + // false && y -> false, true || y -> true. + Fixit1 = FixItHint::CreateReplacement(BO->getSourceRange(), FixitStr); + break; + } + + // "loop is entered / loop is exited". + case Stmt::WhileStmtClass: + DiagKind = 1; + Str = "while"; + Range = cast(Term)->getCond()->getSourceRange(); + RemoveDiagKind = 1; + Fixit1 = FixItHint::CreateReplacement(Range, FixitStr); + break; + case Stmt::ForStmtClass: + DiagKind = 1; + Str = "for"; + Range = cast(Term)->getCond()->getSourceRange(); + RemoveDiagKind = 1; + if (I->Output) + Fixit1 = FixItHint::CreateRemoval(Range); + else + Fixit1 = FixItHint::CreateReplacement(Range, FixitStr); + break; + + // "condition is true / loop is exited". + case Stmt::DoStmtClass: + DiagKind = 2; + Str = "do"; + Range = cast(Term)->getCond()->getSourceRange(); + RemoveDiagKind = 1; + Fixit1 = FixItHint::CreateReplacement(Range, FixitStr); + break; + + // "switch case is taken". + case Stmt::CaseStmtClass: + DiagKind = 3; + Str = "case"; + Range = cast(Term)->getLHS()->getSourceRange(); + break; + case Stmt::DefaultStmtClass: + DiagKind = 3; + Str = "default"; + Range = cast(Term)->getDefaultLoc(); + break; + } + + S.Diag(Range.getBegin(), diag::warn_sometimes_uninit_var) + << VD->getDeclName() << IsCapturedByBlock << DiagKind + << Str << I->Output << Range; + S.Diag(User->getLocStart(), diag::note_uninit_var_use) + << IsCapturedByBlock << User->getSourceRange(); + if (RemoveDiagKind != -1) + S.Diag(Fixit1.RemoveRange.getBegin(), diag::note_uninit_fixit_remove_cond) + << RemoveDiagKind << Str << I->Output << Fixit1 << Fixit2; + + Diagnosed = true; + } + + if (!Diagnosed) + S.Diag(Use.getUser()->getLocStart(), + Use.getKind() == UninitUse::Always ? diag::warn_uninit_var + : diag::warn_maybe_uninit_var) + << VD->getDeclName() << IsCapturedByBlock + << Use.getUser()->getSourceRange(); +} + /// DiagnoseUninitializedUse -- Helper function for diagnosing uses of an /// uninitialized variable. This manages the different forms of diagnostic /// emitted for particular types of uses. Returns true if the use was diagnosed -/// as a warning. If a pariticular use is one we omit warnings for, returns +/// as a warning. If a particular use is one we omit warnings for, returns /// false. static bool DiagnoseUninitializedUse(Sema &S, const VarDecl *VD, - const Expr *E, bool isAlwaysUninit, + const UninitUse &Use, bool alwaysReportSelfInit = false) { - bool isSelfInit = false; - - if (const DeclRefExpr *DRE = dyn_cast(E)) { - if (isAlwaysUninit) { - // Inspect the initializer of the variable declaration which is - // being referenced prior to its initialization. We emit - // specialized diagnostics for self-initialization, and we - // specifically avoid warning about self references which take the - // form of: - // - // int x = x; - // - // This is used to indicate to GCC that 'x' is intentionally left - // uninitialized. Proven code paths which access 'x' in - // an uninitialized state after this will still warn. - // - // TODO: Should we suppress maybe-uninitialized warnings for - // variables initialized in this way? - if (const Expr *Initializer = VD->getInit()) { - if (!alwaysReportSelfInit && DRE == Initializer->IgnoreParenImpCasts()) - return false; - - ContainsReference CR(S.Context, DRE); - CR.Visit(const_cast(Initializer)); - isSelfInit = CR.doesContainReference(); - } - if (isSelfInit) { + + if (const DeclRefExpr *DRE = dyn_cast(Use.getUser())) { + // Inspect the initializer of the variable declaration which is + // being referenced prior to its initialization. We emit + // specialized diagnostics for self-initialization, and we + // specifically avoid warning about self references which take the + // form of: + // + // int x = x; + // + // This is used to indicate to GCC that 'x' is intentionally left + // uninitialized. Proven code paths which access 'x' in + // an uninitialized state after this will still warn. + if (const Expr *Initializer = VD->getInit()) { + if (!alwaysReportSelfInit && DRE == Initializer->IgnoreParenImpCasts()) + return false; + + ContainsReference CR(S.Context, DRE); + CR.Visit(const_cast(Initializer)); + if (CR.doesContainReference()) { S.Diag(DRE->getLocStart(), diag::warn_uninit_self_reference_in_init) - << VD->getDeclName() << VD->getLocation() << DRE->getSourceRange(); - } else { - S.Diag(DRE->getLocStart(), diag::warn_uninit_var) - << VD->getDeclName() << DRE->getSourceRange(); + << VD->getDeclName() << VD->getLocation() << DRE->getSourceRange(); + return true; } - } else { - S.Diag(DRE->getLocStart(), diag::warn_maybe_uninit_var) - << VD->getDeclName() << DRE->getSourceRange(); } + + DiagUninitUse(S, VD, Use, false); } else { - const BlockExpr *BE = cast(E); - if (VD->getType()->isBlockPointerType() && - !VD->hasAttr()) - S.Diag(BE->getLocStart(), diag::warn_uninit_byref_blockvar_captured_by_block) - << VD->getDeclName(); - else + const BlockExpr *BE = cast(Use.getUser()); + if (VD->getType()->isBlockPointerType() && !VD->hasAttr()) S.Diag(BE->getLocStart(), - isAlwaysUninit ? diag::warn_uninit_var_captured_by_block - : diag::warn_maybe_uninit_var_captured_by_block) + diag::warn_uninit_byref_blockvar_captured_by_block) << VD->getDeclName(); + else + DiagUninitUse(S, VD, Use, true); } // Report where the variable was declared when the use wasn't within // the initializer of that declaration & we didn't already suggest // an initialization fixit. - if (!isSelfInit && !SuggestInitializationFixit(S, VD)) + if (!SuggestInitializationFixit(S, VD)) S.Diag(VD->getLocStart(), diag::note_uninit_var_def) << VD->getDeclName(); return true; } -typedef std::pair UninitUse; +namespace { + class FallthroughMapper : public RecursiveASTVisitor { + public: + FallthroughMapper(Sema &S) + : FoundSwitchStatements(false), + S(S) { + } + + bool foundSwitchStatements() const { return FoundSwitchStatements; } + + void markFallthroughVisited(const AttributedStmt *Stmt) { + bool Found = FallthroughStmts.erase(Stmt); + assert(Found); + (void)Found; + } + + typedef llvm::SmallPtrSet AttrStmts; + + const AttrStmts &getFallthroughStmts() const { + return FallthroughStmts; + } + + bool checkFallThroughIntoBlock(const CFGBlock &B, int &AnnotatedCnt) { + int UnannotatedCnt = 0; + AnnotatedCnt = 0; + + std::deque BlockQueue; + + std::copy(B.pred_begin(), B.pred_end(), std::back_inserter(BlockQueue)); + + while (!BlockQueue.empty()) { + const CFGBlock *P = BlockQueue.front(); + BlockQueue.pop_front(); + + const Stmt *Term = P->getTerminator(); + if (Term && isa(Term)) + continue; // Switch statement, good. + + const SwitchCase *SW = dyn_cast_or_null(P->getLabel()); + if (SW && SW->getSubStmt() == B.getLabel() && P->begin() == P->end()) + continue; // Previous case label has no statements, good. + + if (P->pred_begin() == P->pred_end()) { // The block is unreachable. + // This only catches trivially unreachable blocks. + for (CFGBlock::const_iterator ElIt = P->begin(), ElEnd = P->end(); + ElIt != ElEnd; ++ElIt) { + if (const CFGStmt *CS = ElIt->getAs()){ + if (const AttributedStmt *AS = asFallThroughAttr(CS->getStmt())) { + S.Diag(AS->getLocStart(), + diag::warn_fallthrough_attr_unreachable); + markFallthroughVisited(AS); + ++AnnotatedCnt; + } + // Don't care about other unreachable statements. + } + } + // If there are no unreachable statements, this may be a special + // case in CFG: + // case X: { + // A a; // A has a destructor. + // break; + // } + // // <<<< This place is represented by a 'hanging' CFG block. + // case Y: + continue; + } + + const Stmt *LastStmt = getLastStmt(*P); + if (const AttributedStmt *AS = asFallThroughAttr(LastStmt)) { + markFallthroughVisited(AS); + ++AnnotatedCnt; + continue; // Fallthrough annotation, good. + } + + if (!LastStmt) { // This block contains no executable statements. + // Traverse its predecessors. + std::copy(P->pred_begin(), P->pred_end(), + std::back_inserter(BlockQueue)); + continue; + } + + ++UnannotatedCnt; + } + return !!UnannotatedCnt; + } + + // RecursiveASTVisitor setup. + bool shouldWalkTypesOfTypeLocs() const { return false; } + + bool VisitAttributedStmt(AttributedStmt *S) { + if (asFallThroughAttr(S)) + FallthroughStmts.insert(S); + return true; + } + + bool VisitSwitchStmt(SwitchStmt *S) { + FoundSwitchStatements = true; + return true; + } + + private: + + static const AttributedStmt *asFallThroughAttr(const Stmt *S) { + if (const AttributedStmt *AS = dyn_cast_or_null(S)) { + if (hasSpecificAttr(AS->getAttrs())) + return AS; + } + return 0; + } + + static const Stmt *getLastStmt(const CFGBlock &B) { + if (const Stmt *Term = B.getTerminator()) + return Term; + for (CFGBlock::const_reverse_iterator ElemIt = B.rbegin(), + ElemEnd = B.rend(); + ElemIt != ElemEnd; ++ElemIt) { + if (const CFGStmt *CS = ElemIt->getAs()) + return CS->getStmt(); + } + // Workaround to detect a statement thrown out by CFGBuilder: + // case X: {} case Y: + // case X: ; case Y: + if (const SwitchCase *SW = dyn_cast_or_null(B.getLabel())) + if (!isa(SW->getSubStmt())) + return SW->getSubStmt(); + + return 0; + } + + bool FoundSwitchStatements; + AttrStmts FallthroughStmts; + Sema &S; + }; +} + +static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC, + bool PerFunction) { + FallthroughMapper FM(S); + FM.TraverseStmt(AC.getBody()); + + if (!FM.foundSwitchStatements()) + return; + + if (PerFunction && FM.getFallthroughStmts().empty()) + return; + + CFG *Cfg = AC.getCFG(); + + if (!Cfg) + return; + + int AnnotatedCnt; + + for (CFG::reverse_iterator I = Cfg->rbegin(), E = Cfg->rend(); I != E; ++I) { + const CFGBlock &B = **I; + const Stmt *Label = B.getLabel(); + + if (!Label || !isa(Label)) + continue; + + if (!FM.checkFallThroughIntoBlock(B, AnnotatedCnt)) + continue; + + S.Diag(Label->getLocStart(), + PerFunction ? diag::warn_unannotated_fallthrough_per_function + : diag::warn_unannotated_fallthrough); + + if (!AnnotatedCnt) { + SourceLocation L = Label->getLocStart(); + if (L.isMacroID()) + continue; + if (S.getLangOpts().CPlusPlus0x) { + const Stmt *Term = B.getTerminator(); + if (!(B.empty() && Term && isa(Term))) { + S.Diag(L, diag::note_insert_fallthrough_fixit) << + FixItHint::CreateInsertion(L, "[[clang::fallthrough]]; "); + } + } + S.Diag(L, diag::note_insert_break_fixit) << + FixItHint::CreateInsertion(L, "break; "); + } + } + + const FallthroughMapper::AttrStmts &Fallthroughs = FM.getFallthroughStmts(); + for (FallthroughMapper::AttrStmts::const_iterator I = Fallthroughs.begin(), + E = Fallthroughs.end(); + I != E; ++I) { + S.Diag((*I)->getLocStart(), diag::warn_fallthrough_attr_invalid_placement); + } + +} namespace { struct SLocSort { bool operator()(const UninitUse &a, const UninitUse &b) { - SourceLocation aLoc = a.first->getLocStart(); - SourceLocation bLoc = b.first->getLocStart(); + // Prefer a more confident report over a less confident one. + if (a.getKind() != b.getKind()) + return a.getKind() > b.getKind(); + SourceLocation aLoc = a.getUser()->getLocStart(); + SourceLocation bLoc = b.getUser()->getLocStart(); return aLoc.getRawEncoding() < bLoc.getRawEncoding(); } }; @@ -552,9 +913,8 @@ public: return V; } - void handleUseOfUninitVariable(const Expr *ex, const VarDecl *vd, - bool isAlwaysUninit) { - getUses(vd).first->push_back(std::make_pair(ex, isAlwaysUninit)); + void handleUseOfUninitVariable(const VarDecl *vd, const UninitUse &use) { + getUses(vd).first->push_back(use); } void handleSelfInit(const VarDecl *vd) { @@ -565,6 +925,8 @@ public: if (!uses) return; + // FIXME: This iteration order, and thus the resulting diagnostic order, + // is nondeterministic. for (UsesMap::iterator i = uses->begin(), e = uses->end(); i != e; ++i) { const VarDecl *vd = i->first; const UsesMap::mapped_type &V = i->second; @@ -576,8 +938,9 @@ public: // variable, but the root cause is an idiomatic self-init. We want // to report the diagnostic at the self-init since that is the root cause. if (!vec->empty() && hasSelfInit && hasAlwaysUninitializedUse(vec)) - DiagnoseUninitializedUse(S, vd, vd->getInit()->IgnoreParenCasts(), - /* isAlwaysUninit */ true, + DiagnoseUninitializedUse(S, vd, + UninitUse(vd->getInit()->IgnoreParenCasts(), + /* isAlwaysUninit */ true), /* alwaysReportSelfInit */ true); else { // Sort the uses by their SourceLocations. While not strictly @@ -587,8 +950,10 @@ public: for (UsesVec::iterator vi = vec->begin(), ve = vec->end(); vi != ve; ++vi) { - if (DiagnoseUninitializedUse(S, vd, vi->first, - /*isAlwaysUninit=*/vi->second)) + // If we have self-init, downgrade all uses to 'may be uninitialized'. + UninitUse Use = hasSelfInit ? UninitUse(vi->getUser(), false) : *vi; + + if (DiagnoseUninitializedUse(S, vd, Use)) // Skip further diagnostics for this variable. We try to warn only // on the first point at which a variable is used uninitialized. break; @@ -604,7 +969,7 @@ public: private: static bool hasAlwaysUninitializedUse(const UsesVec* vec) { for (UsesVec::const_iterator i = vec->begin(), e = vec->end(); i != e; ++i) { - if (i->second) { + if (i->getKind() == UninitUse::Always) { return true; } } @@ -696,6 +1061,9 @@ class ThreadSafetyReporter : public clang::thread_safety::ThreadSafetyHandler { case LEK_LockedAtEndOfFunction: DiagID = diag::warn_no_unlock; break; + case LEK_NotLockedAtEndOfFunction: + DiagID = diag::warn_expecting_locked; + break; } if (LocEndOfScope.isInvalid()) LocEndOfScope = FunEndLocation; @@ -830,7 +1198,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, const Stmt *Body = D->getBody(); assert(Body); - AnalysisDeclContext AC(/* AnalysisDeclContextManager */ 0, D, 0); + AnalysisDeclContext AC(/* AnalysisDeclContextManager */ 0, D); // Don't generate EH edges for CallExprs as we'd like to avoid the n^2 // explosion for destrutors that can result and the compile time hit. @@ -852,11 +1220,13 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, else { AC.getCFGBuildOptions() .setAlwaysAdd(Stmt::BinaryOperatorClass) + .setAlwaysAdd(Stmt::CompoundAssignOperatorClass) .setAlwaysAdd(Stmt::BlockExprClass) .setAlwaysAdd(Stmt::CStyleCastExprClass) .setAlwaysAdd(Stmt::DeclRefExprClass) .setAlwaysAdd(Stmt::ImplicitCastExprClass) - .setAlwaysAdd(Stmt::UnaryOperatorClass); + .setAlwaysAdd(Stmt::UnaryOperatorClass) + .setAlwaysAdd(Stmt::AttributedStmtClass); } // Construct the analysis context with the specified CFG build options. @@ -945,6 +1315,8 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, if (Diags.getDiagnosticLevel(diag::warn_uninit_var, D->getLocStart()) != DiagnosticsEngine::Ignored || + Diags.getDiagnosticLevel(diag::warn_sometimes_uninit_var,D->getLocStart()) + != DiagnosticsEngine::Ignored || Diags.getDiagnosticLevel(diag::warn_maybe_uninit_var, D->getLocStart()) != DiagnosticsEngine::Ignored) { if (CFG *cfg = AC.getCFG()) { @@ -968,6 +1340,16 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, } } + bool FallThroughDiagFull = + Diags.getDiagnosticLevel(diag::warn_unannotated_fallthrough, + D->getLocStart()) != DiagnosticsEngine::Ignored; + bool FallThroughDiagPerFunction = + Diags.getDiagnosticLevel(diag::warn_unannotated_fallthrough_per_function, + D->getLocStart()) != DiagnosticsEngine::Ignored; + if (FallThroughDiagFull || FallThroughDiagPerFunction) { + DiagnoseSwitchLabelsFallthrough(S, AC, !FallThroughDiagFull); + } + // Collect statistics about the CFG if it was built. if (S.CollectStats && AC.isCFGBuilt()) { ++NumFunctionsAnalyzed; diff --git a/lib/Sema/AttributeList.cpp b/lib/Sema/AttributeList.cpp index f142ab4..0f209fd 100644 --- a/lib/Sema/AttributeList.cpp +++ b/lib/Sema/AttributeList.cpp @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "clang/Sema/AttributeList.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Expr.h" #include "clang/Basic/IdentifierTable.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/SmallString.h" using namespace clang; size_t AttributeList::allocated_size() const { @@ -94,10 +96,15 @@ AttributePool::createIntegerAttribute(ASTContext &C, IdentifierInfo *Name, SourceLocation TokLoc, int Arg) { Expr *IArg = IntegerLiteral::Create(C, llvm::APInt(32, (uint64_t) Arg), C.IntTy, TokLoc); - return create(Name, TokLoc, 0, TokLoc, 0, TokLoc, &IArg, 1, 0); + return create(Name, TokLoc, 0, TokLoc, 0, TokLoc, &IArg, 1, + AttributeList::AS_GNU); } -AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name) { +#include "clang/Sema/AttrParsedAttrKinds.inc" + +AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name, + const IdentifierInfo *ScopeName, + Syntax SyntaxUsed) { StringRef AttrName = Name->getName(); // Normalize the attribute name, __foo__ becomes foo. @@ -105,22 +112,14 @@ AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name) { AttrName.size() >= 4) AttrName = AttrName.substr(2, AttrName.size() - 4); - return llvm::StringSwitch(AttrName) - #include "clang/Sema/AttrParsedAttrKinds.inc" - .Case("address_space", AT_address_space) - .Case("align", AT_aligned) // FIXME - should it be "aligned"? - .Case("base_check", AT_base_check) - .Case("bounded", IgnoredAttribute) // OpenBSD - .Case("__const", AT_const) // some GCC headers do contain this spelling - .Case("cf_returns_autoreleased", AT_cf_returns_autoreleased) - .Case("mode", AT_mode) - .Case("vec_type_hint", IgnoredAttribute) - .Case("ext_vector_type", AT_ext_vector_type) - .Case("neon_vector_type", AT_neon_vector_type) - .Case("neon_polyvector_type", AT_neon_polyvector_type) - .Case("opencl_image_access", AT_opencl_image_access) - .Case("objc_gc", AT_objc_gc) - .Case("objc_ownership", AT_objc_ownership) - .Case("vector_size", AT_vector_size) - .Default(UnknownAttribute); + SmallString<64> Buf; + if (ScopeName) + Buf += ScopeName->getName(); + // Ensure that in the case of C++11 attributes, we look for '::foo' if it is + // unscoped. + if (ScopeName || SyntaxUsed == AS_CXX11) + Buf += "::"; + Buf += AttrName; + + return ::getAttrKind(Buf); } diff --git a/lib/Sema/CMakeLists.txt b/lib/Sema/CMakeLists.txt index 07734c7..46dfa05 100644 --- a/lib/Sema/CMakeLists.txt +++ b/lib/Sema/CMakeLists.txt @@ -1,9 +1,8 @@ -set(LLVM_USED_LIBS - clangAST - clangAnalysis - clangBasic - clangEdit - clangLex +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + asmparser + support + mc ) add_clang_library(clangSema @@ -50,9 +49,27 @@ add_clang_library(clangSema TargetAttributesSema.cpp ) -add_dependencies(clangSema ClangARMNeon ClangAttrClasses ClangAttrList - ClangDiagnosticSema ClangDeclNodes ClangStmtNodes - ClangAttrTemplateInstantiate ClangAttrParsedAttrList - ClangAttrParsedAttrKinds) - +add_dependencies(clangSema + ClangARMNeon + ClangAttrClasses + ClangAttrList + ClangAttrParsedAttrList + ClangAttrParsedAttrKinds + ClangAttrTemplateInstantiate + ClangCommentNodes + ClangDeclNodes + ClangDiagnosticAST + ClangDiagnosticComment + ClangDiagnosticCommon + ClangDiagnosticParse + ClangDiagnosticSema + ClangStmtNodes + ) +target_link_libraries(clangSema + clangAST + clangAnalysis + clangBasic + clangEdit + clangLex + ) diff --git a/lib/Sema/CodeCompleteConsumer.cpp b/lib/Sema/CodeCompleteConsumer.cpp index ce9bbb9..a835725 100644 --- a/lib/Sema/CodeCompleteConsumer.cpp +++ b/lib/Sema/CodeCompleteConsumer.cpp @@ -194,10 +194,11 @@ CodeCompletionString::CodeCompletionString(const Chunk *Chunks, const char **Annotations, unsigned NumAnnotations, CXCursorKind ParentKind, - StringRef ParentName) + StringRef ParentName, + const char *BriefComment) : NumChunks(NumChunks), NumAnnotations(NumAnnotations), Priority(Priority), Availability(Availability), ParentKind(ParentKind), - ParentName(ParentName) + ParentName(ParentName), BriefComment(BriefComment) { assert(NumChunks <= 0xffff); assert(NumAnnotations <= 0xffff); @@ -338,7 +339,7 @@ CodeCompletionString *CodeCompletionBuilder::TakeString() { = new (Mem) CodeCompletionString(Chunks.data(), Chunks.size(), Priority, Availability, Annotations.data(), Annotations.size(), - ParentKind, ParentName); + ParentKind, ParentName, BriefComment); Chunks.clear(); return Result; } @@ -394,6 +395,10 @@ void CodeCompletionBuilder::addParentContext(DeclContext *DC) { ParentName = getCodeCompletionTUInfo().getParentName(DC); } +void CodeCompletionBuilder::addBriefComment(StringRef Comment) { + BriefComment = Allocator.CopyString(Comment); +} + unsigned CodeCompletionResult::getPriorityFromDecl(NamedDecl *ND) { if (!ND) return CCP_Unlikely; @@ -474,8 +479,11 @@ PrintingCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &SemaRef, OS << " (Hidden)"; if (CodeCompletionString *CCS = Results[I].CreateCodeCompletionString(SemaRef, getAllocator(), - CCTUInfo)) { + CCTUInfo, + includeBriefComments())) { OS << " : " << CCS->getAsString(); + if (const char *BriefComment = CCS->getBriefComment()) + OS << " : " << BriefComment; } OS << '\n'; @@ -489,7 +497,8 @@ PrintingCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &SemaRef, OS << Results[I].Macro->getName(); if (CodeCompletionString *CCS = Results[I].CreateCodeCompletionString(SemaRef, getAllocator(), - CCTUInfo)) { + CCTUInfo, + includeBriefComments())) { OS << " : " << CCS->getAsString(); } OS << '\n'; @@ -573,14 +582,8 @@ void CodeCompletionResult::computeCursorKindAndAvailability(bool Accessible) { } case RK_Macro: - Availability = CXAvailability_Available; - CursorKind = CXCursor_MacroDefinition; - break; - case RK_Keyword: - Availability = CXAvailability_Available; - CursorKind = CXCursor_NotImplemented; - break; + llvm_unreachable("Macro and keyword kinds are handled by the constructors"); } if (!Accessible) diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp index b531acc..d12ca78 100644 --- a/lib/Sema/DeclSpec.cpp +++ b/lib/Sema/DeclSpec.cpp @@ -145,6 +145,7 @@ CXXScopeSpec::getWithLocInContext(ASTContext &Context) const { /// DeclaratorChunk::getFunction - Return a DeclaratorChunk for a function. /// "TheDeclarator" is the declarator that this will be added to. DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, + bool isAmbiguous, SourceLocation EllipsisLoc, ParamInfo *ArgInfo, unsigned NumArgs, @@ -165,7 +166,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, SourceLocation LocalRangeBegin, SourceLocation LocalRangeEnd, Declarator &TheDeclarator, - ParsedType TrailingReturnType) { + TypeResult TrailingReturnType) { DeclaratorChunk I; I.Kind = Function; I.Loc = LocalRangeBegin; @@ -173,6 +174,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, I.Fun.AttrList = 0; I.Fun.hasPrototype = hasProto; I.Fun.isVariadic = isVariadic; + I.Fun.isAmbiguous = isAmbiguous; I.Fun.EllipsisLoc = EllipsisLoc.getRawEncoding(); I.Fun.DeleteArgInfo = false; I.Fun.TypeQuals = TypeQuals; @@ -188,7 +190,9 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto, bool isVariadic, I.Fun.NumExceptions = 0; I.Fun.Exceptions = 0; I.Fun.NoexceptExpr = 0; - I.Fun.TrailingReturnType = TrailingReturnType.getAsOpaquePtr(); + I.Fun.HasTrailingReturnType = TrailingReturnType.isUsable() || + TrailingReturnType.isInvalid(); + I.Fun.TrailingReturnType = TrailingReturnType.get(); // new[] an argument array if needed. if (NumArgs) { @@ -418,19 +422,27 @@ const char *DeclSpec::getSpecifierName(TQ T) { bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc, const char *&PrevSpec, unsigned &DiagID) { - // OpenCL 1.1 6.8g: "The extern, static, auto and register storage-class - // specifiers are not supported." + // OpenCL v1.1 s6.8g: "The extern, static, auto and register storage-class + // specifiers are not supported. // It seems sensible to prohibit private_extern too // The cl_clang_storage_class_specifiers extension enables support for // these storage-class specifiers. + // OpenCL v1.2 s6.8 changes this to "The auto and register storage-class + // specifiers are not supported." if (S.getLangOpts().OpenCL && !S.getOpenCLOptions().cl_clang_storage_class_specifiers) { switch (SC) { case SCS_extern: case SCS_private_extern: + case SCS_static: + if (S.getLangOpts().OpenCLVersion < 120) { + DiagID = diag::err_not_opencl_storage_class_specifier; + PrevSpec = getSpecifierName(SC); + return true; + } + break; case SCS_auto: case SCS_register: - case SCS_static: DiagID = diag::err_not_opencl_storage_class_specifier; PrevSpec = getSpecifierName(SC); return true; @@ -658,9 +670,11 @@ bool DeclSpec::SetTypeSpecError() { } bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec, - unsigned &DiagID, const LangOptions &Lang) { - // Duplicates turn into warnings pre-C99. - if ((TypeQualifiers & T) && !Lang.C99) + unsigned &DiagID, const LangOptions &Lang, + bool IsTypeSpec) { + // Duplicates are permitted in C99, and are permitted in C++11 unless the + // cv-qualifier appears as a type-specifier. + if ((TypeQualifiers & T) && !Lang.C99 && (!Lang.CPlusPlus0x || IsTypeSpec)) return BadSpecifier(T, T, PrevSpec, DiagID); TypeQualifiers |= T; @@ -751,7 +765,7 @@ void DeclSpec::SaveWrittenBuiltinSpecs() { writtenBS.ModeAttr = false; AttributeList* attrs = getAttributes().getList(); while (attrs) { - if (attrs->getKind() == AttributeList::AT_mode) { + if (attrs->getKind() == AttributeList::AT_Mode) { writtenBS.ModeAttr = true; break; } @@ -935,13 +949,6 @@ bool DeclSpec::isMissingDeclaratorOk() { StorageClassSpec != DeclSpec::SCS_typedef; } -void UnqualifiedId::clear() { - Kind = IK_Identifier; - Identifier = 0; - StartLocation = SourceLocation(); - EndLocation = SourceLocation(); -} - void UnqualifiedId::setOperatorFunctionId(SourceLocation OperatorLoc, OverloadedOperatorKind Op, SourceLocation SymbolLocations[3]) { diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp index 30a9cd7..7f79f0c 100644 --- a/lib/Sema/Sema.cpp +++ b/lib/Sema/Sema.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/APFloat.h" +#include "llvm/Support/CrashRecoveryContext.h" #include "clang/Sema/CXXFieldCollector.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/ExternalSemaSource.h" @@ -29,6 +30,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" @@ -90,13 +92,13 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, PackContext(0), MSStructPragmaOn(false), VisContext(0), ExprNeedsCleanups(false), LateTemplateParser(0), OpaqueParser(0), IdResolver(pp), StdInitializerList(0), CXXTypeInfoDecl(0), MSVCGuidDecl(0), - NSNumberDecl(0), NSArrayDecl(0), ArrayWithObjectsMethod(0), + NSNumberDecl(0), + NSStringDecl(0), StringWithUTF8StringMethod(0), + NSArrayDecl(0), ArrayWithObjectsMethod(0), NSDictionaryDecl(0), DictionaryWithObjectsMethod(0), GlobalNewDeleteDeclared(false), - ObjCShouldCallSuperDealloc(false), - ObjCShouldCallSuperFinalize(false), TUKind(TUKind), - NumSFINAEErrors(0), InFunctionDeclarator(0), SuppressAccessChecking(false), + NumSFINAEErrors(0), InFunctionDeclarator(0), AccessCheckingSFINAE(false), InNonInstantiationSFINAEContext(false), NonInstantiationEntries(0), ArgumentPackSubstitutionIndex(-1), CurrentInstantiationScope(0), TyposCorrected(0), @@ -176,6 +178,10 @@ void Sema::Initialize() { if (IdResolver.begin(Protocol) == IdResolver.end()) PushOnScopeChains(Context.getObjCProtocolDecl(), TUScope); } + + DeclarationName BuiltinVaList = &Context.Idents.get("__builtin_va_list"); + if (IdResolver.begin(BuiltinVaList) == IdResolver.end()) + PushOnScopeChains(Context.getBuiltinVaListDecl(), TUScope); } Sema::~Sema() { @@ -199,7 +205,6 @@ Sema::~Sema() { ExternalSema->ForgetSema(); } - /// makeUnavailableInSystemHeader - There is an error in the current /// context. If we're still in a system header, and we can plausibly /// make the relevant declaration unavailable instead of erroring, do @@ -420,10 +425,88 @@ void Sema::LoadExternalWeakUndeclaredIdentifiers() { } } + +typedef llvm::DenseMap RecordCompleteMap; + +/// \brief Returns true, if all methods and nested classes of the given +/// CXXRecordDecl are defined in this translation unit. +/// +/// Should only be called from ActOnEndOfTranslationUnit so that all +/// definitions are actually read. +static bool MethodsAndNestedClassesComplete(const CXXRecordDecl *RD, + RecordCompleteMap &MNCComplete) { + RecordCompleteMap::iterator Cache = MNCComplete.find(RD); + if (Cache != MNCComplete.end()) + return Cache->second; + if (!RD->isCompleteDefinition()) + return false; + bool Complete = true; + for (DeclContext::decl_iterator I = RD->decls_begin(), + E = RD->decls_end(); + I != E && Complete; ++I) { + if (const CXXMethodDecl *M = dyn_cast(*I)) + Complete = M->isDefined() || (M->isPure() && !isa(M)); + else if (const FunctionTemplateDecl *F = dyn_cast(*I)) + Complete = F->getTemplatedDecl()->isDefined(); + else if (const CXXRecordDecl *R = dyn_cast(*I)) { + if (R->isInjectedClassName()) + continue; + if (R->hasDefinition()) + Complete = MethodsAndNestedClassesComplete(R->getDefinition(), + MNCComplete); + else + Complete = false; + } + } + MNCComplete[RD] = Complete; + return Complete; +} + +/// \brief Returns true, if the given CXXRecordDecl is fully defined in this +/// translation unit, i.e. all methods are defined or pure virtual and all +/// friends, friend functions and nested classes are fully defined in this +/// translation unit. +/// +/// Should only be called from ActOnEndOfTranslationUnit so that all +/// definitions are actually read. +static bool IsRecordFullyDefined(const CXXRecordDecl *RD, + RecordCompleteMap &RecordsComplete, + RecordCompleteMap &MNCComplete) { + RecordCompleteMap::iterator Cache = RecordsComplete.find(RD); + if (Cache != RecordsComplete.end()) + return Cache->second; + bool Complete = MethodsAndNestedClassesComplete(RD, MNCComplete); + for (CXXRecordDecl::friend_iterator I = RD->friend_begin(), + E = RD->friend_end(); + I != E && Complete; ++I) { + // Check if friend classes and methods are complete. + if (TypeSourceInfo *TSI = (*I)->getFriendType()) { + // Friend classes are available as the TypeSourceInfo of the FriendDecl. + if (CXXRecordDecl *FriendD = TSI->getType()->getAsCXXRecordDecl()) + Complete = MethodsAndNestedClassesComplete(FriendD, MNCComplete); + else + Complete = false; + } else { + // Friend functions are available through the NamedDecl of FriendDecl. + if (const FunctionDecl *FD = + dyn_cast((*I)->getFriendDecl())) + Complete = FD->isDefined(); + else + // This is a template friend, give up. + Complete = false; + } + } + RecordsComplete[RD] = Complete; + return Complete; +} + /// ActOnEndOfTranslationUnit - This is called at the very end of the /// translation unit when EOF is reached and all but the top-level scope is /// popped. void Sema::ActOnEndOfTranslationUnit() { + assert(DelayedDiagnostics.getCurrentPool() == NULL + && "reached end of translation unit with a pool attached?"); + // Only complete translation units define vtables and perform implicit // instantiations. if (TUKind == TU_Complete) { @@ -597,9 +680,17 @@ void Sema::ActOnEndOfTranslationUnit() { if (isa(DiagD)) Diag(DiagD->getLocation(), diag::warn_unneeded_member_function) << DiagD->getDeclName(); - else - Diag(DiagD->getLocation(), diag::warn_unneeded_internal_decl) - << /*function*/0 << DiagD->getDeclName(); + else { + if (FD->getStorageClassAsWritten() == SC_Static && + !FD->isInlineSpecified() && + !SourceMgr.isFromMainFile( + SourceMgr.getExpansionLoc(FD->getLocation()))) + Diag(DiagD->getLocation(), diag::warn_unneeded_static_internal_decl) + << DiagD->getDeclName(); + else + Diag(DiagD->getLocation(), diag::warn_unneeded_internal_decl) + << /*function*/0 << DiagD->getDeclName(); + } } else { Diag(DiagD->getLocation(), isa(DiagD) ? diag::warn_unused_member_function @@ -623,6 +714,23 @@ void Sema::ActOnEndOfTranslationUnit() { checkUndefinedInternals(*this); } + if (Diags.getDiagnosticLevel(diag::warn_unused_private_field, + SourceLocation()) + != DiagnosticsEngine::Ignored) { + RecordCompleteMap RecordsComplete; + RecordCompleteMap MNCComplete; + for (NamedDeclSetType::iterator I = UnusedPrivateFields.begin(), + E = UnusedPrivateFields.end(); I != E; ++I) { + const NamedDecl *D = *I; + const CXXRecordDecl *RD = dyn_cast(D->getDeclContext()); + if (RD && !RD->isUnion() && + IsRecordFullyDefined(RD, RecordsComplete, MNCComplete)) { + Diag(D->getLocation(), diag::warn_unused_private_field) + << D->getDeclName(); + } + } + } + // Check we've noticed that we're no longer parsing the initializer for every // variable. If we miss cases, then at best we have a performance issue and // at worst a rejects-valid bug. @@ -693,6 +801,15 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // Count this failure so that we know that template argument deduction // has failed. ++NumSFINAEErrors; + + // Make a copy of this suppressed diagnostic and store it with the + // template-deduction information. + if (*Info && !(*Info)->hasSFINAEDiagnostic()) { + Diagnostic DiagInfo(&Diags); + (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(), + PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); + } + Diags.setLastDiagnosticIgnored(); Diags.Clear(); return; @@ -709,6 +826,15 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { // Suppress this diagnostic. ++NumSFINAEErrors; + + // Make a copy of this suppressed diagnostic and store it with the + // template-deduction information. + if (*Info && !(*Info)->hasSFINAEDiagnostic()) { + Diagnostic DiagInfo(&Diags); + (*Info)->addSFINAEDiagnostic(DiagInfo.getLocation(), + PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); + } + Diags.setLastDiagnosticIgnored(); Diags.Clear(); @@ -725,13 +851,13 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { case DiagnosticIDs::SFINAE_Suppress: // Make a copy of this suppressed diagnostic and store it with the // template-deduction information; - Diagnostic DiagInfo(&Diags); - - if (*Info) + if (*Info) { + Diagnostic DiagInfo(&Diags); (*Info)->addSuppressedDiagnostic(DiagInfo.getLocation(), - PartialDiagnostic(DiagInfo,Context.getDiagAllocator())); - - // Suppress this diagnostic. + PartialDiagnostic(DiagInfo, Context.getDiagAllocator())); + } + + // Suppress this diagnostic. Diags.setLastDiagnosticIgnored(); Diags.Clear(); return; @@ -894,6 +1020,29 @@ LambdaScopeInfo *Sema::getCurLambda() { return dyn_cast(FunctionScopes.back()); } +void Sema::ActOnComment(SourceRange Comment) { + RawComment RC(SourceMgr, Comment); + if (RC.isAlmostTrailingComment()) { + SourceRange MagicMarkerRange(Comment.getBegin(), + Comment.getBegin().getLocWithOffset(3)); + StringRef MagicMarkerText; + switch (RC.getKind()) { + case RawComment::RCK_OrdinaryBCPL: + MagicMarkerText = "///<"; + break; + case RawComment::RCK_OrdinaryC: + MagicMarkerText = "/**<"; + break; + default: + llvm_unreachable("if this is an almost Doxygen comment, " + "it should be ordinary"); + } + Diag(Comment.getBegin(), diag::warn_not_a_doxygen_trailing_member_comment) << + FixItHint::CreateReplacement(MagicMarkerRange, MagicMarkerText); + } + Context.addComment(RC); +} + // Pin this vtable to this file. ExternalSemaSource::~ExternalSemaSource() {} diff --git a/lib/Sema/SemaAccess.cpp b/lib/Sema/SemaAccess.cpp index 01c141e..3481171 100644 --- a/lib/Sema/SemaAccess.cpp +++ b/lib/Sema/SemaAccess.cpp @@ -152,7 +152,8 @@ struct AccessTarget : public AccessedEntity { CXXRecordDecl *NamingClass, DeclAccessPair FoundDecl, QualType BaseObjectType) - : AccessedEntity(Context, Member, NamingClass, FoundDecl, BaseObjectType) { + : AccessedEntity(Context.getDiagAllocator(), Member, NamingClass, + FoundDecl, BaseObjectType) { initialize(); } @@ -161,7 +162,8 @@ struct AccessTarget : public AccessedEntity { CXXRecordDecl *BaseClass, CXXRecordDecl *DerivedClass, AccessSpecifier Access) - : AccessedEntity(Context, Base, BaseClass, DerivedClass, Access) { + : AccessedEntity(Context.getDiagAllocator(), Base, BaseClass, DerivedClass, + Access) { initialize(); } @@ -781,7 +783,7 @@ static AccessResult HasAccess(Sema &S, // Emulate a MSVC bug where the creation of pointer-to-member // to protected member of base class is allowed but only from - // a static function member functions. + // static member functions. if (S.getLangOpts().MicrosoftMode && !EC.Functions.empty()) if (CXXMethodDecl* MD = dyn_cast(EC.Functions.front())) if (MD->isStatic()) return AR_accessible; @@ -1391,9 +1393,6 @@ static Sema::AccessResult CheckAccess(Sema &S, SourceLocation Loc, if (Entity.getAccess() == AS_public) return Sema::AR_accessible; - if (S.SuppressAccessChecking) - return Sema::AR_accessible; - // If we're currently parsing a declaration, we may need to delay // access control checking, because our effective context might be // different based on what the declaration comes out as. @@ -1633,25 +1632,6 @@ Sema::AccessResult Sema::CheckConstructorAccess(SourceLocation UseLoc, return CheckAccess(*this, UseLoc, AccessEntity); } -/// Checks direct (i.e. non-inherited) access to an arbitrary class -/// member. -Sema::AccessResult Sema::CheckDirectMemberAccess(SourceLocation UseLoc, - NamedDecl *Target, - const PartialDiagnostic &Diag) { - AccessSpecifier Access = Target->getAccess(); - if (!getLangOpts().AccessControl || - Access == AS_public) - return AR_accessible; - - CXXRecordDecl *NamingClass = cast(Target->getDeclContext()); - AccessTarget Entity(Context, AccessTarget::Member, NamingClass, - DeclAccessPair::make(Target, Access), - QualType()); - Entity.setDiag(Diag); - return CheckAccess(*this, UseLoc, Entity); -} - - /// Checks access to an overloaded operator new or delete. Sema::AccessResult Sema::CheckAllocationAccess(SourceLocation OpLoc, SourceRange PlacementRange, @@ -1694,6 +1674,44 @@ Sema::AccessResult Sema::CheckMemberOperatorAccess(SourceLocation OpLoc, return CheckAccess(*this, OpLoc, Entity); } +/// Checks access to the target of a friend declaration. +Sema::AccessResult Sema::CheckFriendAccess(NamedDecl *target) { + assert(isa(target) || + (isa(target) && + isa(cast(target) + ->getTemplatedDecl()))); + + // Friendship lookup is a redeclaration lookup, so there's never an + // inheritance path modifying access. + AccessSpecifier access = target->getAccess(); + + if (!getLangOpts().AccessControl || access == AS_public) + return AR_accessible; + + CXXMethodDecl *method = dyn_cast(target); + if (!method) + method = cast( + cast(target)->getTemplatedDecl()); + assert(method->getQualifier()); + + AccessTarget entity(Context, AccessTarget::Member, + cast(target->getDeclContext()), + DeclAccessPair::make(target, access), + /*no instance context*/ QualType()); + entity.setDiag(diag::err_access_friend_function) + << method->getQualifierLoc().getSourceRange(); + + // We need to bypass delayed-diagnostics because we might be called + // while the ParsingDeclarator is active. + EffectiveContext EC(CurContext); + switch (CheckEffectiveAccess(*this, EC, target->getLocation(), entity)) { + case AR_accessible: return Sema::AR_accessible; + case AR_inaccessible: return Sema::AR_inaccessible; + case AR_dependent: return Sema::AR_dependent; + } + llvm_unreachable("falling off end"); +} + Sema::AccessResult Sema::CheckAddressOfMemberAccess(Expr *OvlExpr, DeclAccessPair Found) { if (!getLangOpts().AccessControl || @@ -1714,13 +1732,10 @@ Sema::AccessResult Sema::CheckAddressOfMemberAccess(Expr *OvlExpr, /// Checks access for a hierarchy conversion. /// -/// \param IsBaseToDerived whether this is a base-to-derived conversion (true) -/// or a derived-to-base conversion (false) /// \param ForceCheck true if this check should be performed even if access /// control is disabled; some things rely on this for semantics /// \param ForceUnprivileged true if this check should proceed as if the /// context had no special privileges -/// \param ADK controls the kind of diagnostics that are used Sema::AccessResult Sema::CheckBaseClassAccess(SourceLocation AccessLoc, QualType Base, QualType Derived, @@ -1836,15 +1851,3 @@ bool Sema::IsSimplyAccessible(NamedDecl *Decl, DeclContext *Ctx) { return true; } - -void Sema::ActOnStartSuppressingAccessChecks() { - assert(!SuppressAccessChecking && - "Tried to start access check suppression when already started."); - SuppressAccessChecking = true; -} - -void Sema::ActOnStopSuppressingAccessChecks() { - assert(SuppressAccessChecking && - "Tried to stop access check suprression when already stopped."); - SuppressAccessChecking = false; -} diff --git a/lib/Sema/SemaCXXScopeSpec.cpp b/lib/Sema/SemaCXXScopeSpec.cpp index 5a0fcec..0de9dd5 100644 --- a/lib/Sema/SemaCXXScopeSpec.cpp +++ b/lib/Sema/SemaCXXScopeSpec.cpp @@ -227,9 +227,8 @@ bool Sema::RequireCompleteDeclContext(CXXScopeSpec &SS, if (loc.isInvalid()) loc = SS.getRange().getBegin(); // The type must be complete. - if (RequireCompleteType(loc, type, - PDiag(diag::err_incomplete_nested_name_spec) - << SS.getRange())) { + if (RequireCompleteType(loc, type, diag::err_incomplete_nested_name_spec, + SS.getRange())) { SS.SetInvalid(SS.getRange()); return true; } @@ -539,8 +538,9 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S, NamedDecl *SD = Found.getAsSingle(); if (isAcceptableNestedNameSpecifier(SD)) { - if (!ObjectType.isNull() && !ObjectTypeSearchedInScope) { - // C++ [basic.lookup.classref]p4: + if (!ObjectType.isNull() && !ObjectTypeSearchedInScope && + !getLangOpts().CPlusPlus0x) { + // C++03 [basic.lookup.classref]p4: // [...] If the name is found in both contexts, the // class-name-or-namespace-name shall refer to the same entity. // @@ -548,6 +548,8 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S, // into the current scope (the scope of the postfix-expression) to // see if we can find the same name there. As above, if there is no // scope, reconstruct the result from the template instantiation itself. + // + // Note that C++11 does *not* perform this redundant lookup. NamedDecl *OuterDecl; if (S) { LookupResult FoundOuter(*this, &Identifier, IdentifierLoc, diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp index 54683e1..8199751 100644 --- a/lib/Sema/SemaCast.cpp +++ b/lib/Sema/SemaCast.cpp @@ -561,8 +561,8 @@ void CastOperation::CheckDynamicCast() { assert(DestPointer && "Reference to void is not possible"); } else if (DestRecord) { if (Self.RequireCompleteType(OpRange.getBegin(), DestPointee, - Self.PDiag(diag::err_bad_dynamic_cast_incomplete) - << DestRange)) + diag::err_bad_dynamic_cast_incomplete, + DestRange)) return; } else { Self.Diag(OpRange.getBegin(), diag::err_bad_dynamic_cast_not_class) @@ -597,8 +597,8 @@ void CastOperation::CheckDynamicCast() { const RecordType *SrcRecord = SrcPointee->getAs(); if (SrcRecord) { if (Self.RequireCompleteType(OpRange.getBegin(), SrcPointee, - Self.PDiag(diag::err_bad_dynamic_cast_incomplete) - << SrcExpr.get()->getSourceRange())) + diag::err_bad_dynamic_cast_incomplete, + SrcExpr.get())) return; } else { Self.Diag(OpRange.getBegin(), diag::err_bad_dynamic_cast_not_class) @@ -1075,8 +1075,8 @@ TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType, QualType OrigDestType, unsigned &msg, CastKind &Kind, CXXCastPath &BasePath) { // We can only work with complete types. But don't complain if it doesn't work - if (Self.RequireCompleteType(OpRange.getBegin(), SrcType, Self.PDiag(0)) || - Self.RequireCompleteType(OpRange.getBegin(), DestType, Self.PDiag(0))) + if (Self.RequireCompleteType(OpRange.getBegin(), SrcType, 0) || + Self.RequireCompleteType(OpRange.getBegin(), DestType, 0)) return TC_NotApplicable; // Downcast can only happen in class hierarchies, so we need classes. @@ -1302,7 +1302,9 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType, CastKind &Kind, bool ListInitialization) { if (DestType->isRecordType()) { if (Self.RequireCompleteType(OpRange.getBegin(), DestType, - diag::err_bad_dynamic_cast_incomplete)) { + diag::err_bad_dynamic_cast_incomplete) || + Self.RequireNonAbstractType(OpRange.getBegin(), DestType, + diag::err_allocation_of_abstract_type)) { msg = 0; return TC_Failed; } @@ -1504,10 +1506,9 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, } if (const ReferenceType *DestTypeTmp = DestType->getAs()) { - bool LValue = DestTypeTmp->isLValueReferenceType(); - if (LValue && !SrcExpr.get()->isLValue()) { - // Cannot cast non-lvalue to lvalue reference type. See the similar - // comment in const_cast. + if (!SrcExpr.get()->isGLValue()) { + // Cannot cast non-glvalue to (lvalue or rvalue) reference type. See the + // similar comment in const_cast. msg = diag::err_bad_cxx_cast_rvalue; return TC_NotApplicable; } @@ -1915,10 +1916,6 @@ void CastOperation::CheckCStyleCast() { return; QualType SrcType = SrcExpr.get()->getType(); - // You can cast an _Atomic(T) to anything you can cast a T to. - if (const AtomicType *AtomicSrcType = SrcType->getAs()) - SrcType = AtomicSrcType->getValueType(); - assert(!SrcType->isPlaceholderType()); if (Self.RequireCompleteType(OpRange.getBegin(), DestType, @@ -2105,6 +2102,9 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, Op.CheckCXXCStyleCast(/*FunctionalStyle=*/true, /*ListInit=*/false); if (Op.SrcExpr.isInvalid()) return ExprError(); + + if (CXXConstructExpr *ConstructExpr = dyn_cast(Op.SrcExpr.get())) + ConstructExpr->setParenRange(SourceRange(LPLoc, RPLoc)); return Op.complete(CXXFunctionalCastExpr::Create(Context, Op.ResultType, Op.ValueKind, CastTypeInfo, Op.DestRange.getBegin(), diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 0d15ce2..2594648 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -16,12 +16,14 @@ #include "clang/Sema/Sema.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Initialization.h" +#include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Analysis/Analyses/FormatString.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/EvaluatedExprVisitor.h" @@ -66,16 +68,31 @@ static bool checkArgCount(Sema &S, CallExpr *call, unsigned desiredArgCount) { << call->getArg(1)->getSourceRange(); } -/// CheckBuiltinAnnotationString - Checks that string argument to the builtin -/// annotation is a non wide string literal. -static bool CheckBuiltinAnnotationString(Sema &S, Expr *Arg) { - Arg = Arg->IgnoreParenCasts(); - StringLiteral *Literal = dyn_cast(Arg); +/// Check that the first argument to __builtin_annotation is an integer +/// and the second argument is a non-wide string literal. +static bool SemaBuiltinAnnotation(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 2)) + return true; + + // First argument should be an integer. + Expr *ValArg = TheCall->getArg(0); + QualType Ty = ValArg->getType(); + if (!Ty->isIntegerType()) { + S.Diag(ValArg->getLocStart(), diag::err_builtin_annotation_first_arg) + << ValArg->getSourceRange(); + return true; + } + + // Second argument should be a constant string. + Expr *StrArg = TheCall->getArg(1)->IgnoreParenCasts(); + StringLiteral *Literal = dyn_cast(StrArg); if (!Literal || !Literal->isAscii()) { - S.Diag(Arg->getLocStart(), diag::err_builtin_annotation_not_string_constant) - << Arg->getSourceRange(); + S.Diag(StrArg->getLocStart(), diag::err_builtin_annotation_second_arg) + << StrArg->getSourceRange(); return true; } + + TheCall->setType(Ty); return false; } @@ -256,7 +273,7 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return SemaAtomicOpsOverloaded(move(TheCallResult), AtomicExpr::AO##ID); #include "clang/Basic/Builtins.def" case Builtin::BI__builtin_annotation: - if (CheckBuiltinAnnotationString(*this, TheCall->getArg(1))) + if (SemaBuiltinAnnotation(*this, TheCall)) return ExprError(); break; } @@ -270,6 +287,13 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + if (CheckMipsBuiltinFunctionCall(BuiltinID, TheCall)) + return ExprError(); + break; default: break; } @@ -331,7 +355,7 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) { bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { llvm::APSInt Result; - unsigned mask = 0; + uint64_t mask = 0; unsigned TV = 0; int PtrArgNum = -1; bool HasConstPtr = false; @@ -349,7 +373,7 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; TV = Result.getLimitedValue(64); - if ((TV > 63) || (mask & (1 << TV)) == 0) + if ((TV > 63) || (mask & (1ULL << TV)) == 0) return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) << TheCall->getArg(ImmArg)->getSourceRange(); } @@ -388,6 +412,11 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { #undef GET_NEON_IMMEDIATE_CHECK }; + // We can't check the value of a dependent argument. + if (TheCall->getArg(i)->isTypeDependent() || + TheCall->getArg(i)->isValueDependent()) + return false; + // Check that the immediate argument is actually a constant. if (SemaBuiltinConstantArg(TheCall, i, Result)) return true; @@ -402,34 +431,119 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return false; } -/// CheckFunctionCall - Check a direct function call for various correctness -/// and safety properties not strictly enforced by the C type system. -bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { - // Get the IdentifierInfo* for the called function. - IdentifierInfo *FnInfo = FDecl->getIdentifier(); +bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + unsigned i = 0, l = 0, u = 0; + switch (BuiltinID) { + default: return false; + case Mips::BI__builtin_mips_wrdsp: i = 1; l = 0; u = 63; break; + case Mips::BI__builtin_mips_rddsp: i = 0; l = 0; u = 63; break; + }; - // None of the checks below are needed for functions that don't have - // simple names (e.g., C++ conversion functions). - if (!FnInfo) + // We can't check the value of a dependent argument. + if (TheCall->getArg(i)->isTypeDependent() || + TheCall->getArg(i)->isValueDependent()) return false; + // Check that the immediate argument is actually a constant. + llvm::APSInt Result; + if (SemaBuiltinConstantArg(TheCall, i, Result)) + return true; + + // Range check against the upper/lower values for this instruction. + unsigned Val = Result.getZExtValue(); + if (Val < l || Val > u) + return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) + << l << u << TheCall->getArg(i)->getSourceRange(); + + return false; +} + +/// Given a FunctionDecl's FormatAttr, attempts to populate the FomatStringInfo +/// parameter with the FormatAttr's correct format_idx and firstDataArg. +/// Returns true when the format fits the function and the FormatStringInfo has +/// been populated. +bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, + FormatStringInfo *FSI) { + FSI->HasVAListArg = Format->getFirstArg() == 0; + FSI->FormatIdx = Format->getFormatIdx() - 1; + FSI->FirstDataArg = FSI->HasVAListArg ? 0 : Format->getFirstArg() - 1; + + // The way the format attribute works in GCC, the implicit this argument + // of member functions is counted. However, it doesn't appear in our own + // lists, so decrement format_idx in that case. + if (IsCXXMember) { + if(FSI->FormatIdx == 0) + return false; + --FSI->FormatIdx; + if (FSI->FirstDataArg != 0) + --FSI->FirstDataArg; + } + return true; +} + +/// Handles the checks for format strings, non-POD arguments to vararg +/// functions, and NULL arguments passed to non-NULL parameters. +void Sema::checkCall(NamedDecl *FDecl, Expr **Args, + unsigned NumArgs, + unsigned NumProtoArgs, + bool IsMemberFunction, + SourceLocation Loc, + SourceRange Range, + VariadicCallType CallType) { // FIXME: This mechanism should be abstracted to be less fragile and // more efficient. For example, just map function ids to custom // handlers. // Printf and scanf checking. + bool HandledFormatString = false; for (specific_attr_iterator - i = FDecl->specific_attr_begin(), - e = FDecl->specific_attr_end(); i != e ; ++i) { - CheckFormatArguments(*i, TheCall); - } + I = FDecl->specific_attr_begin(), + E = FDecl->specific_attr_end(); I != E ; ++I) + if (CheckFormatArguments(*I, Args, NumArgs, IsMemberFunction, CallType, + Loc, Range)) + HandledFormatString = true; + + // Refuse POD arguments that weren't caught by the format string + // checks above. + if (!HandledFormatString && CallType != VariadicDoesNotApply) + for (unsigned ArgIdx = NumProtoArgs; ArgIdx < NumArgs; ++ArgIdx) + variadicArgumentPODCheck(Args[ArgIdx], CallType); for (specific_attr_iterator - i = FDecl->specific_attr_begin(), - e = FDecl->specific_attr_end(); i != e; ++i) { - CheckNonNullArguments(*i, TheCall->getArgs(), - TheCall->getCallee()->getLocStart()); - } + I = FDecl->specific_attr_begin(), + E = FDecl->specific_attr_end(); I != E; ++I) + CheckNonNullArguments(*I, Args, Loc); +} + +/// CheckConstructorCall - Check a constructor call for correctness and safety +/// properties not enforced by the C type system. +void Sema::CheckConstructorCall(FunctionDecl *FDecl, Expr **Args, + unsigned NumArgs, + const FunctionProtoType *Proto, + SourceLocation Loc) { + VariadicCallType CallType = + Proto->isVariadic() ? VariadicConstructor : VariadicDoesNotApply; + checkCall(FDecl, Args, NumArgs, Proto->getNumArgs(), + /*IsMemberFunction=*/true, Loc, SourceRange(), CallType); +} + +/// CheckFunctionCall - Check a direct function call for various correctness +/// and safety properties not strictly enforced by the C type system. +bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, + const FunctionProtoType *Proto) { + bool IsMemberFunction = isa(TheCall); + VariadicCallType CallType = getVariadicCallType(FDecl, Proto, + TheCall->getCallee()); + unsigned NumProtoArgs = Proto ? Proto->getNumArgs() : 0; + checkCall(FDecl, TheCall->getArgs(), TheCall->getNumArgs(), NumProtoArgs, + IsMemberFunction, TheCall->getRParenLoc(), + TheCall->getCallee()->getSourceRange(), CallType); + + IdentifierInfo *FnInfo = FDecl->getIdentifier(); + // None of the checks below are needed for functions that don't have + // simple names (e.g., C++ conversion functions). + if (!FnInfo) + return false; unsigned CMId = FDecl->getMemoryFunctionKind(); if (CMId == 0) @@ -448,25 +562,18 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { bool Sema::CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation lbrac, Expr **Args, unsigned NumArgs) { - for (specific_attr_iterator - i = Method->specific_attr_begin(), - e = Method->specific_attr_end(); i != e ; ++i) { - - CheckFormatArguments(*i, Args, NumArgs, false, lbrac, - Method->getSourceRange()); - } + VariadicCallType CallType = + Method->isVariadic() ? VariadicMethod : VariadicDoesNotApply; - // diagnose nonnull arguments. - for (specific_attr_iterator - i = Method->specific_attr_begin(), - e = Method->specific_attr_end(); i != e; ++i) { - CheckNonNullArguments(*i, Args, lbrac); - } + checkCall(Method, Args, NumArgs, Method->param_size(), + /*IsMemberFunction=*/false, + lbrac, Method->getSourceRange(), CallType); return false; } -bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { +bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall, + const FunctionProtoType *Proto) { const VarDecl *V = dyn_cast(NDecl); if (!V) return false; @@ -475,13 +582,15 @@ bool Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) { if (!Ty->isBlockPointerType()) return false; - // format string checking. - for (specific_attr_iterator - i = NDecl->specific_attr_begin(), - e = NDecl->specific_attr_end(); i != e ; ++i) { - CheckFormatArguments(*i, TheCall); - } + VariadicCallType CallType = + Proto && Proto->isVariadic() ? VariadicBlock : VariadicDoesNotApply ; + unsigned NumProtoArgs = Proto ? Proto->getNumArgs() : 0; + checkCall(NDecl, TheCall->getArgs(), TheCall->getNumArgs(), + NumProtoArgs, /*IsMemberFunction=*/false, + TheCall->getRParenLoc(), + TheCall->getCallee()->getSourceRange(), CallType); + return false; } @@ -1260,7 +1369,7 @@ bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) { // If the common type isn't a real floating type, then the arguments were // invalid for this operation. - if (!Res->isRealFloatingType()) + if (Res.isNull() || !Res->isRealFloatingType()) return Diag(OrigArg0.get()->getLocStart(), diag::err_typecheck_call_invalid_ordered_compare) << OrigArg0.get()->getType() << OrigArg1.get()->getType() @@ -1409,7 +1518,11 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { // constant integers. for (unsigned i = 1; i != NumArgs; ++i) { Expr *Arg = TheCall->getArg(i); - + + // We can't check the value of a dependent argument. + if (Arg->isTypeDependent() || Arg->isValueDependent()) + continue; + llvm::APSInt Result; if (SemaBuiltinConstantArg(TheCall, i, Result)) return true; @@ -1454,7 +1567,12 @@ bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, // For compatibility check 0-3, llvm only handles 0 and 2. bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) { llvm::APSInt Result; - + + // We can't check the value of a dependent argument. + if (TheCall->getArg(1)->isTypeDependent() || + TheCall->getArg(1)->isValueDependent()) + return false; + // Check constant-ness first. if (SemaBuiltinConstantArg(TheCall, 1, Result)) return true; @@ -1485,14 +1603,19 @@ bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { return false; } -// Handle i > 1 ? "x" : "y", recursively. -bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, - unsigned NumArgs, bool HasVAListArg, - unsigned format_idx, unsigned firstDataArg, - FormatStringType Type, bool inFunctionCall) { +// Determine if an expression is a string literal or constant string. +// If this function returns false on the arguments to a function expecting a +// format string, we will usually need to emit a warning. +// True string literals are then checked by CheckFormatString. +Sema::StringLiteralCheckType +Sema::checkFormatStringExpr(const Expr *E, Expr **Args, + unsigned NumArgs, bool HasVAListArg, + unsigned format_idx, unsigned firstDataArg, + FormatStringType Type, VariadicCallType CallType, + bool inFunctionCall) { tryAgain: if (E->isTypeDependent() || E->isValueDependent()) - return false; + return SLCT_NotALiteral; E = E->IgnoreParenCasts(); @@ -1501,18 +1624,26 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, // The behavior of printf and friends in this case is implementation // dependent. Ideally if the format string cannot be null then // it should have a 'nonnull' attribute in the function prototype. - return true; + return SLCT_CheckedLiteral; switch (E->getStmtClass()) { case Stmt::BinaryConditionalOperatorClass: case Stmt::ConditionalOperatorClass: { - const AbstractConditionalOperator *C = cast(E); - return SemaCheckStringLiteral(C->getTrueExpr(), Args, NumArgs, HasVAListArg, - format_idx, firstDataArg, Type, - inFunctionCall) - && SemaCheckStringLiteral(C->getFalseExpr(), Args, NumArgs, HasVAListArg, - format_idx, firstDataArg, Type, - inFunctionCall); + // The expression is a literal if both sub-expressions were, and it was + // completely checked only if both sub-expressions were checked. + const AbstractConditionalOperator *C = + cast(E); + StringLiteralCheckType Left = + checkFormatStringExpr(C->getTrueExpr(), Args, NumArgs, + HasVAListArg, format_idx, firstDataArg, + Type, CallType, inFunctionCall); + if (Left == SLCT_NotALiteral) + return SLCT_NotALiteral; + StringLiteralCheckType Right = + checkFormatStringExpr(C->getFalseExpr(), Args, NumArgs, + HasVAListArg, format_idx, firstDataArg, + Type, CallType, inFunctionCall); + return Left < Right ? Left : Right; } case Stmt::ImplicitCastExprClass: { @@ -1525,13 +1656,13 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, E = src; goto tryAgain; } - return false; + return SLCT_NotALiteral; case Stmt::PredefinedExprClass: // While __func__, etc., are technically not string literals, they // cannot contain format specifiers and thus are not a security // liability. - return true; + return SLCT_UncheckedLiteral; case Stmt::DeclRefExprClass: { const DeclRefExpr *DR = cast(E); @@ -1554,10 +1685,17 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, } if (isConstant) { - if (const Expr *Init = VD->getAnyInitializer()) - return SemaCheckStringLiteral(Init, Args, NumArgs, - HasVAListArg, format_idx, firstDataArg, - Type, /*inFunctionCall*/false); + if (const Expr *Init = VD->getAnyInitializer()) { + // Look through initializers like const char c[] = { "foo" } + if (const InitListExpr *InitList = dyn_cast(Init)) { + if (InitList->isStringLiteralInit()) + Init = InitList->getInit(0)->IgnoreParenImpCasts(); + } + return checkFormatStringExpr(Init, Args, NumArgs, + HasVAListArg, format_idx, + firstDataArg, Type, CallType, + /*inFunctionCall*/false); + } } // For vprintf* functions (i.e., HasVAListArg==true), we add a @@ -1590,14 +1728,14 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, // We can't pass a 'scanf' string to a 'printf' function. if (PVIndex == PVFormat->getFormatIdx() && Type == GetFormatStringType(PVFormat)) - return true; + return SLCT_UncheckedLiteral; } } } } } - return false; + return SLCT_NotALiteral; } case Stmt::CallExprClass: @@ -1611,13 +1749,23 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, --ArgIndex; const Expr *Arg = CE->getArg(ArgIndex - 1); - return SemaCheckStringLiteral(Arg, Args, NumArgs, HasVAListArg, - format_idx, firstDataArg, Type, - inFunctionCall); + return checkFormatStringExpr(Arg, Args, NumArgs, + HasVAListArg, format_idx, firstDataArg, + Type, CallType, inFunctionCall); + } else if (const FunctionDecl *FD = dyn_cast(ND)) { + unsigned BuiltinID = FD->getBuiltinID(); + if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || + BuiltinID == Builtin::BI__builtin___NSStringMakeConstantString) { + const Expr *Arg = CE->getArg(0); + return checkFormatStringExpr(Arg, Args, NumArgs, + HasVAListArg, format_idx, + firstDataArg, Type, CallType, + inFunctionCall); + } } } - return false; + return SLCT_NotALiteral; } case Stmt::ObjCStringLiteralClass: case Stmt::StringLiteralClass: { @@ -1630,15 +1778,15 @@ bool Sema::SemaCheckStringLiteral(const Expr *E, Expr **Args, if (StrE) { CheckFormatString(StrE, E, Args, NumArgs, HasVAListArg, format_idx, - firstDataArg, Type, inFunctionCall); - return true; + firstDataArg, Type, inFunctionCall, CallType); + return SLCT_CheckedLiteral; } - return false; + return SLCT_NotALiteral; } default: - return false; + return SLCT_NotALiteral; } } @@ -1667,44 +1815,30 @@ Sema::FormatStringType Sema::GetFormatStringType(const FormatAttr *Format) { .Default(FST_Unknown); } -/// CheckPrintfScanfArguments - Check calls to printf and scanf (and similar +/// CheckFormatArguments - Check calls to printf and scanf (and similar /// functions) for correct use of format strings. -void Sema::CheckFormatArguments(const FormatAttr *Format, CallExpr *TheCall) { - bool IsCXXMember = false; - // The way the format attribute works in GCC, the implicit this argument - // of member functions is counted. However, it doesn't appear in our own - // lists, so decrement format_idx in that case. - IsCXXMember = isa(TheCall); - CheckFormatArguments(Format, TheCall->getArgs(), TheCall->getNumArgs(), - IsCXXMember, TheCall->getRParenLoc(), - TheCall->getCallee()->getSourceRange()); -} - -void Sema::CheckFormatArguments(const FormatAttr *Format, Expr **Args, +/// Returns true if a format string has been fully checked. +bool Sema::CheckFormatArguments(const FormatAttr *Format, Expr **Args, unsigned NumArgs, bool IsCXXMember, + VariadicCallType CallType, SourceLocation Loc, SourceRange Range) { - bool HasVAListArg = Format->getFirstArg() == 0; - unsigned format_idx = Format->getFormatIdx() - 1; - unsigned firstDataArg = HasVAListArg ? 0 : Format->getFirstArg() - 1; - if (IsCXXMember) { - if (format_idx == 0) - return; - --format_idx; - if(firstDataArg != 0) - --firstDataArg; - } - CheckFormatArguments(Args, NumArgs, HasVAListArg, format_idx, - firstDataArg, GetFormatStringType(Format), Loc, Range); + FormatStringInfo FSI; + if (getFormatStringInfo(Format, IsCXXMember, &FSI)) + return CheckFormatArguments(Args, NumArgs, FSI.HasVAListArg, FSI.FormatIdx, + FSI.FirstDataArg, GetFormatStringType(Format), + CallType, Loc, Range); + return false; } -void Sema::CheckFormatArguments(Expr **Args, unsigned NumArgs, +bool Sema::CheckFormatArguments(Expr **Args, unsigned NumArgs, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg, FormatStringType Type, + VariadicCallType CallType, SourceLocation Loc, SourceRange Range) { // CHECK: printf/scanf-like function is called with no format string. if (format_idx >= NumArgs) { Diag(Loc, diag::warn_missing_format_string) << Range; - return; + return false; } const Expr *OrigFormatExpr = Args[format_idx]->IgnoreParenCasts(); @@ -1721,21 +1855,25 @@ void Sema::CheckFormatArguments(Expr **Args, unsigned NumArgs, // C string (e.g. "%d") // ObjC string uses the same format specifiers as C string, so we can use // the same format string checking logic for both ObjC and C strings. - if (SemaCheckStringLiteral(OrigFormatExpr, Args, NumArgs, HasVAListArg, - format_idx, firstDataArg, Type)) - return; // Literal format string found, check done! + StringLiteralCheckType CT = + checkFormatStringExpr(OrigFormatExpr, Args, NumArgs, HasVAListArg, + format_idx, firstDataArg, Type, CallType); + if (CT != SLCT_NotALiteral) + // Literal format string found, check done! + return CT == SLCT_CheckedLiteral; // Strftime is particular as it always uses a single 'time' argument, // so it is safe to pass a non-literal string. if (Type == FST_Strftime) - return; + return false; // Do not emit diag when the string param is a macro expansion and the // format is either NSString or CFString. This is a hack to prevent // diag when using the NSLocalizedString and CFCopyLocalizedString macros // which are usually used in place of NS and CF string literals. - if (Type == FST_NSString && Args[format_idx]->getLocStart().isMacroID()) - return; + if (Type == FST_NSString && + SourceMgr.isInSystemMacro(Args[format_idx]->getLocStart())) + return false; // If there are no arguments specified, warn with -Wformat-security, otherwise // warn only with -Wformat-nonliteral. @@ -1747,6 +1885,7 @@ void Sema::CheckFormatArguments(Expr **Args, unsigned NumArgs, Diag(Args[format_idx]->getLocStart(), diag::warn_format_nonliteral) << OrigFormatExpr->getSourceRange(); + return false; } namespace { @@ -1757,7 +1896,6 @@ protected: const Expr *OrigFormatExpr; const unsigned FirstDataArg; const unsigned NumDataArgs; - const bool IsObjCLiteral; const char *Beg; // Start of format string. const bool HasVAListArg; const Expr * const *Args; @@ -1767,21 +1905,20 @@ protected: bool usesPositionalArgs; bool atFirstArg; bool inFunctionCall; + Sema::VariadicCallType CallType; public: CheckFormatHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, - unsigned numDataArgs, bool isObjCLiteral, - const char *beg, bool hasVAListArg, + unsigned numDataArgs, const char *beg, bool hasVAListArg, Expr **args, unsigned numArgs, - unsigned formatIdx, bool inFunctionCall) + unsigned formatIdx, bool inFunctionCall, + Sema::VariadicCallType callType) : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), - FirstDataArg(firstDataArg), - NumDataArgs(numDataArgs), - IsObjCLiteral(isObjCLiteral), Beg(beg), - HasVAListArg(hasVAListArg), + FirstDataArg(firstDataArg), NumDataArgs(numDataArgs), + Beg(beg), HasVAListArg(hasVAListArg), Args(args), NumArgs(numArgs), FormatIdx(formatIdx), usesPositionalArgs(false), atFirstArg(true), - inFunctionCall(inFunctionCall) { + inFunctionCall(inFunctionCall), CallType(callType) { CoveredArgs.resize(numDataArgs); CoveredArgs.reset(); } @@ -1938,7 +2075,7 @@ void CheckFormatHandler::HandleZeroPosition(const char *startPos, } void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { - if (!IsObjCLiteral) { + if (!isa(OrigFormatExpr)) { // The presence of a null character is likely an error. EmitFormatDiagnostic( S.PDiag(diag::warn_printf_format_string_contains_null_char), @@ -1947,6 +2084,8 @@ void CheckFormatHandler::HandleNullChar(const char *nullCharacter) { } } +// Note that this may return NULL if there was an error parsing or building +// one of the argument expressions. const Expr *CheckFormatHandler::getDataArg(unsigned i) const { return Args[FirstDataArg + i]; } @@ -1960,9 +2099,14 @@ void CheckFormatHandler::DoneProcessing() { signed notCoveredArg = CoveredArgs.find_first(); if (notCoveredArg >= 0) { assert((unsigned)notCoveredArg < NumDataArgs); - EmitFormatDiagnostic(S.PDiag(diag::warn_printf_data_arg_not_used), - getDataArg((unsigned) notCoveredArg)->getLocStart(), - /*IsStringLocation*/false, getFormatStringRange()); + if (const Expr *E = getDataArg((unsigned) notCoveredArg)) { + SourceLocation Loc = E->getLocStart(); + if (!S.getSourceManager().isInSystemMacro(Loc)) { + EmitFormatDiagnostic(S.PDiag(diag::warn_printf_data_arg_not_used), + Loc, /*IsStringLocation*/false, + getFormatStringRange()); + } + } } } } @@ -2086,17 +2230,20 @@ void CheckFormatHandler::EmitFormatDiagnostic(Sema &S, bool InFunctionCall, namespace { class CheckPrintfHandler : public CheckFormatHandler { + bool ObjCContext; public: CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, - unsigned numDataArgs, bool isObjCLiteral, + unsigned numDataArgs, bool isObjC, const char *beg, bool hasVAListArg, Expr **Args, unsigned NumArgs, - unsigned formatIdx, bool inFunctionCall) + unsigned formatIdx, bool inFunctionCall, + Sema::VariadicCallType CallType) : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, - numDataArgs, isObjCLiteral, beg, hasVAListArg, - Args, NumArgs, formatIdx, inFunctionCall) {} - + numDataArgs, beg, hasVAListArg, Args, NumArgs, + formatIdx, inFunctionCall, CallType), ObjCContext(isObjC) + {} + bool HandleInvalidPrintfConversionSpecifier( const analyze_printf::PrintfSpecifier &FS, @@ -2106,7 +2253,11 @@ public: bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier, unsigned specifierLen); - + bool checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, + const char *StartSpecifier, + unsigned SpecifierLen, + const Expr *E); + bool HandleAmount(const analyze_format_string::OptionalAmount &Amt, unsigned k, const char *startSpecifier, unsigned specifierLen); void HandleInvalidAmount(const analyze_printf::PrintfSpecifier &FS, @@ -2120,6 +2271,9 @@ public: const analyze_printf::OptionalFlag &ignoredFlag, const analyze_printf::OptionalFlag &flag, const char *startSpecifier, unsigned specifierLen); + bool checkForCStrMembers(const analyze_printf::ArgType &AT, + const Expr *E, const CharSourceRange &CSR); + }; } @@ -2161,14 +2315,17 @@ bool CheckPrintfHandler::HandleAmount( // doesn't emit a warning for that case. CoveredArgs.set(argIndex); const Expr *Arg = getDataArg(argIndex); + if (!Arg) + return false; + QualType T = Arg->getType(); - const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); - assert(ATR.isValid()); + const analyze_printf::ArgType &AT = Amt.getArgType(S.Context); + assert(AT.isValid()); - if (!ATR.matchesType(S.Context, T)) { + if (!AT.matchesType(S.Context, T)) { EmitFormatDiagnostic(S.PDiag(diag::warn_printf_asterisk_wrong_type) - << k << ATR.getRepresentativeTypeName(S.Context) + << k << AT.getRepresentativeTypeName(S.Context) << T << Arg->getSourceRange(), getLocationOfByte(Amt.getStart()), /*IsStringLocation*/true, @@ -2237,6 +2394,64 @@ void CheckPrintfHandler::HandleIgnoredFlag( getSpecifierRange(ignoredFlag.getPosition(), 1))); } +// Determines if the specified is a C++ class or struct containing +// a member with the specified name and kind (e.g. a CXXMethodDecl named +// "c_str()"). +template +static llvm::SmallPtrSet +CXXRecordMembersNamed(StringRef Name, Sema &S, QualType Ty) { + const RecordType *RT = Ty->getAs(); + llvm::SmallPtrSet Results; + + if (!RT) + return Results; + const CXXRecordDecl *RD = dyn_cast(RT->getDecl()); + if (!RD) + return Results; + + LookupResult R(S, &S.PP.getIdentifierTable().get(Name), SourceLocation(), + Sema::LookupMemberName); + + // We just need to include all members of the right kind turned up by the + // filter, at this point. + if (S.LookupQualifiedName(R, RT->getDecl())) + for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) { + NamedDecl *decl = (*I)->getUnderlyingDecl(); + if (MemberKind *FK = dyn_cast(decl)) + Results.insert(FK); + } + return Results; +} + +// Check if a (w)string was passed when a (w)char* was needed, and offer a +// better diagnostic if so. AT is assumed to be valid. +// Returns true when a c_str() conversion method is found. +bool CheckPrintfHandler::checkForCStrMembers( + const analyze_printf::ArgType &AT, const Expr *E, + const CharSourceRange &CSR) { + typedef llvm::SmallPtrSet MethodSet; + + MethodSet Results = + CXXRecordMembersNamed("c_str", S, E->getType()); + + for (MethodSet::iterator MI = Results.begin(), ME = Results.end(); + MI != ME; ++MI) { + const CXXMethodDecl *Method = *MI; + if (Method->getNumParams() == 0 && + AT.matchesType(S.Context, Method->getResultType())) { + // FIXME: Suggest parens if the expression needs them. + SourceLocation EndLoc = + S.getPreprocessor().getLocForEndOfToken(E->getLocEnd()); + S.Diag(E->getLocStart(), diag::note_printf_c_str) + << "c_str()" + << FixItHint::CreateInsertion(EndLoc, ".c_str()"); + return true; + } + } + + return false; +} + bool CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, @@ -2288,7 +2503,7 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier // Check for using an Objective-C specific conversion specifier // in a non-ObjC literal. - if (!IsObjCLiteral && CS.isObjCArg()) { + if (!ObjCContext && CS.isObjCArg()) { return HandleInvalidPrintfConversionSpecifier(FS, startSpecifier, specifierLen); } @@ -2346,17 +2561,6 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier HandleNonStandardConversionSpecification(LM, CS, startSpecifier, specifierLen); - // Are we using '%n'? - if (CS.getKind() == ConversionSpecifier::nArg) { - // Issue a warning about this being a possible security issue. - EmitFormatDiagnostic(S.PDiag(diag::warn_printf_write_back), - getLocationOfByte(CS.getStart()), - /*IsStringLocation*/true, - getSpecifierRange(startSpecifier, specifierLen)); - // Continue checking the other format specifiers. - return true; - } - // The remaining checks depend on the data arguments. if (HasVAListArg) return true; @@ -2364,54 +2568,98 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex)) return false; + const Expr *Arg = getDataArg(argIndex); + if (!Arg) + return true; + + return checkFormatExpr(FS, startSpecifier, specifierLen, Arg); +} + +bool +CheckPrintfHandler::checkFormatExpr(const analyze_printf::PrintfSpecifier &FS, + const char *StartSpecifier, + unsigned SpecifierLen, + const Expr *E) { + using namespace analyze_format_string; + using namespace analyze_printf; // Now type check the data expression that matches the // format specifier. - const Expr *Ex = getDataArg(argIndex); - const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context, - IsObjCLiteral); - if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { - // Check if we didn't match because of an implicit cast from a 'char' - // or 'short' to an 'int'. This is done because printf is a varargs - // function. - if (const ImplicitCastExpr *ICE = dyn_cast(Ex)) - if (ICE->getType() == S.Context.IntTy) { - // All further checking is done on the subexpression. - Ex = ICE->getSubExpr(); - if (ATR.matchesType(S.Context, Ex->getType())) - return true; + const analyze_printf::ArgType &AT = FS.getArgType(S.Context, + ObjCContext); + if (AT.isValid() && !AT.matchesType(S.Context, E->getType())) { + // Look through argument promotions for our error message's reported type. + // This includes the integral and floating promotions, but excludes array + // and function pointer decay; seeing that an argument intended to be a + // string has type 'char [6]' is probably more confusing than 'char *'. + if (const ImplicitCastExpr *ICE = dyn_cast(E)) { + if (ICE->getCastKind() == CK_IntegralCast || + ICE->getCastKind() == CK_FloatingCast) { + E = ICE->getSubExpr(); + + // Check if we didn't match because of an implicit cast from a 'char' + // or 'short' to an 'int'. This is done because printf is a varargs + // function. + if (ICE->getType() == S.Context.IntTy || + ICE->getType() == S.Context.UnsignedIntTy) { + // All further checking is done on the subexpression. + if (AT.matchesType(S.Context, E->getType())) + return true; + } } + } // We may be able to offer a FixItHint if it is a supported type. PrintfSpecifier fixedFS = FS; - bool success = fixedFS.fixType(Ex->getType(), S.getLangOpts(), - S.Context, IsObjCLiteral); + bool success = fixedFS.fixType(E->getType(), S.getLangOpts(), + S.Context, ObjCContext); if (success) { // Get the fix string from the fixed format specifier - SmallString<128> buf; + SmallString<16> buf; llvm::raw_svector_ostream os(buf); fixedFS.toString(os); EmitFormatDiagnostic( S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) - << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() - << Ex->getSourceRange(), - getLocationOfByte(CS.getStart()), - /*IsStringLocation*/true, - getSpecifierRange(startSpecifier, specifierLen), + << AT.getRepresentativeTypeName(S.Context) << E->getType() + << E->getSourceRange(), + E->getLocStart(), + /*IsStringLocation*/false, + getSpecifierRange(StartSpecifier, SpecifierLen), FixItHint::CreateReplacement( - getSpecifierRange(startSpecifier, specifierLen), + getSpecifierRange(StartSpecifier, SpecifierLen), os.str())); - } - else { - EmitFormatDiagnostic( - S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) - << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() - << getSpecifierRange(startSpecifier, specifierLen) - << Ex->getSourceRange(), - getLocationOfByte(CS.getStart()), - true, - getSpecifierRange(startSpecifier, specifierLen)); + } else { + const CharSourceRange &CSR = getSpecifierRange(StartSpecifier, + SpecifierLen); + // Since the warning for passing non-POD types to variadic functions + // was deferred until now, we emit a warning for non-POD + // arguments here. + if (S.isValidVarArgType(E->getType()) == Sema::VAK_Invalid) { + unsigned DiagKind; + if (E->getType()->isObjCObjectType()) + DiagKind = diag::err_cannot_pass_objc_interface_to_vararg_format; + else + DiagKind = diag::warn_non_pod_vararg_with_format_string; + + EmitFormatDiagnostic( + S.PDiag(DiagKind) + << S.getLangOpts().CPlusPlus0x + << E->getType() + << CallType + << AT.getRepresentativeTypeName(S.Context) + << CSR + << E->getSourceRange(), + E->getLocStart(), /*IsStringLocation*/false, CSR); + + checkForCStrMembers(AT, E, CSR); + } else + EmitFormatDiagnostic( + S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) + << AT.getRepresentativeTypeName(S.Context) << E->getType() + << CSR + << E->getSourceRange(), + E->getLocStart(), /*IsStringLocation*/false, CSR); } } @@ -2425,13 +2673,14 @@ class CheckScanfHandler : public CheckFormatHandler { public: CheckScanfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, - unsigned numDataArgs, bool isObjCLiteral, - const char *beg, bool hasVAListArg, + unsigned numDataArgs, const char *beg, bool hasVAListArg, Expr **Args, unsigned NumArgs, - unsigned formatIdx, bool inFunctionCall) + unsigned formatIdx, bool inFunctionCall, + Sema::VariadicCallType CallType) : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, - numDataArgs, isObjCLiteral, beg, hasVAListArg, - Args, NumArgs, formatIdx, inFunctionCall) {} + numDataArgs, beg, hasVAListArg, + Args, NumArgs, formatIdx, inFunctionCall, CallType) + {} bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, @@ -2548,8 +2797,11 @@ bool CheckScanfHandler::HandleScanfSpecifier( // Check that the argument type matches the format specifier. const Expr *Ex = getDataArg(argIndex); - const analyze_scanf::ScanfArgTypeResult &ATR = FS.getArgType(S.Context); - if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { + if (!Ex) + return true; + + const analyze_format_string::ArgType &AT = FS.getArgType(S.Context); + if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType())) { ScanfSpecifier fixedFS = FS; bool success = fixedFS.fixType(Ex->getType(), S.getLangOpts(), S.Context); @@ -2562,10 +2814,10 @@ bool CheckScanfHandler::HandleScanfSpecifier( EmitFormatDiagnostic( S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) - << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() + << AT.getRepresentativeTypeName(S.Context) << Ex->getType() << Ex->getSourceRange(), - getLocationOfByte(CS.getStart()), - /*IsStringLocation*/true, + Ex->getLocStart(), + /*IsStringLocation*/false, getSpecifierRange(startSpecifier, specifierLen), FixItHint::CreateReplacement( getSpecifierRange(startSpecifier, specifierLen), @@ -2573,10 +2825,10 @@ bool CheckScanfHandler::HandleScanfSpecifier( } else { EmitFormatDiagnostic( S.PDiag(diag::warn_printf_conversion_argument_type_mismatch) - << ATR.getRepresentativeTypeName(S.Context) << Ex->getType() + << AT.getRepresentativeTypeName(S.Context) << Ex->getType() << Ex->getSourceRange(), - getLocationOfByte(CS.getStart()), - /*IsStringLocation*/true, + Ex->getLocStart(), + /*IsStringLocation*/false, getSpecifierRange(startSpecifier, specifierLen)); } } @@ -2589,10 +2841,10 @@ void Sema::CheckFormatString(const StringLiteral *FExpr, Expr **Args, unsigned NumArgs, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg, FormatStringType Type, - bool inFunctionCall) { + bool inFunctionCall, VariadicCallType CallType) { // CHECK: is the format string a wide literal? - if (!FExpr->isAscii()) { + if (!FExpr->isAscii() && !FExpr->isUTF8()) { CheckFormatHandler::EmitFormatDiagnostic( *this, inFunctionCall, Args[format_idx], PDiag(diag::warn_format_string_is_wide_literal), FExpr->getLocStart(), @@ -2617,18 +2869,17 @@ void Sema::CheckFormatString(const StringLiteral *FExpr, if (Type == FST_Printf || Type == FST_NSString) { CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, - numDataArgs, isa(OrigFormatExpr), + numDataArgs, (Type == FST_NSString), Str, HasVAListArg, Args, NumArgs, format_idx, - inFunctionCall); + inFunctionCall, CallType); if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen, getLangOpts())) H.DoneProcessing(); } else if (Type == FST_Scanf) { - CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, - numDataArgs, isa(OrigFormatExpr), + CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs, Str, HasVAListArg, Args, NumArgs, format_idx, - inFunctionCall); + inFunctionCall, CallType); if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen, getLangOpts())) @@ -2728,19 +2979,43 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, // TODO: For strncpy() and friends, this could suggest sizeof(dst) // over sizeof(src) as well. unsigned ActionIdx = 0; // Default is to suggest dereferencing. + StringRef ReadableName = FnName->getName(); + if (const UnaryOperator *UnaryOp = dyn_cast(Dest)) if (UnaryOp->getOpcode() == UO_AddrOf) ActionIdx = 1; // If its an address-of operator, just remove it. if (Context.getTypeSize(PointeeTy) == Context.getCharWidth()) ActionIdx = 2; // If the pointee's size is sizeof(char), // suggest an explicit length. - unsigned DestSrcSelect = - (BId == Builtin::BIstrndup ? 1 : ArgIdx); - DiagRuntimeBehavior(SizeOfArg->getExprLoc(), Dest, + + // If the function is defined as a builtin macro, do not show macro + // expansion. + SourceLocation SL = SizeOfArg->getExprLoc(); + SourceRange DSR = Dest->getSourceRange(); + SourceRange SSR = SizeOfArg->getSourceRange(); + SourceManager &SM = PP.getSourceManager(); + + if (SM.isMacroArgExpansion(SL)) { + ReadableName = Lexer::getImmediateMacroName(SL, SM, LangOpts); + SL = SM.getSpellingLoc(SL); + DSR = SourceRange(SM.getSpellingLoc(DSR.getBegin()), + SM.getSpellingLoc(DSR.getEnd())); + SSR = SourceRange(SM.getSpellingLoc(SSR.getBegin()), + SM.getSpellingLoc(SSR.getEnd())); + } + + DiagRuntimeBehavior(SL, SizeOfArg, PDiag(diag::warn_sizeof_pointer_expr_memaccess) - << FnName << DestSrcSelect << ActionIdx - << Dest->getSourceRange() - << SizeOfArg->getSourceRange()); + << ReadableName + << PointeeTy + << DestTy + << DSR + << SSR); + DiagRuntimeBehavior(SL, SizeOfArg, + PDiag(diag::warn_sizeof_pointer_expr_memaccess_note) + << ActionIdx + << SSR); + break; } } @@ -2826,6 +3101,19 @@ static const Expr *ignoreLiteralAdditions(const Expr *Ex, ASTContext &Ctx) { return Ex; } +static bool isConstantSizeArrayWithMoreThanOneElement(QualType Ty, + ASTContext &Context) { + // Only handle constant-sized or VLAs, but not flexible members. + if (const ConstantArrayType *CAT = Context.getAsConstantArrayType(Ty)) { + // Only issue the FIXIT for arrays of size > 1. + if (CAT->getSize().getSExtValue() <= 1) + return false; + } else if (!Ty->isVariableArrayType()) { + return false; + } + return true; +} + // Warn if the user has made the 'size' argument to strlcpy or strlcat // be the size of the source, instead of the destination. void Sema::CheckStrlcpycatArguments(const CallExpr *Call, @@ -2876,16 +3164,8 @@ void Sema::CheckStrlcpycatArguments(const CallExpr *Call, // pointers if we know the actual size, like if DstArg is 'array+2' // we could say 'sizeof(array)-2'. const Expr *DstArg = Call->getArg(0)->IgnoreParenImpCasts(); - QualType DstArgTy = DstArg->getType(); - - // Only handle constant-sized or VLAs, but not flexible members. - if (const ConstantArrayType *CAT = Context.getAsConstantArrayType(DstArgTy)) { - // Only issue the FIXIT for arrays of size > 1. - if (CAT->getSize().getSExtValue() <= 1) - return; - } else if (!DstArgTy->isVariableArrayType()) { + if (!isConstantSizeArrayWithMoreThanOneElement(DstArg->getType(), Context)) return; - } SmallString<128> sizeString; llvm::raw_svector_ostream OS(sizeString); @@ -2967,26 +3247,23 @@ void Sema::CheckStrncatArguments(const CallExpr *CE, SM.getSpellingLoc(SR.getEnd())); } + // Check if the destination is an array (rather than a pointer to an array). + QualType DstTy = DstArg->getType(); + bool isKnownSizeArray = isConstantSizeArrayWithMoreThanOneElement(DstTy, + Context); + if (!isKnownSizeArray) { + if (PatternType == 1) + Diag(SL, diag::warn_strncat_wrong_size) << SR; + else + Diag(SL, diag::warn_strncat_src_size) << SR; + return; + } + if (PatternType == 1) Diag(SL, diag::warn_strncat_large_size) << SR; else Diag(SL, diag::warn_strncat_src_size) << SR; - // Output a FIXIT hint if the destination is an array (rather than a - // pointer to an array). This could be enhanced to handle some - // pointers if we know the actual size, like if DstArg is 'array+2' - // we could say 'sizeof(array)-2'. - QualType DstArgTy = DstArg->getType(); - - // Only handle constant-sized or VLAs, but not flexible members. - if (const ConstantArrayType *CAT = Context.getAsConstantArrayType(DstArgTy)) { - // Only issue the FIXIT for arrays of size > 1. - if (CAT->getSize().getSExtValue() <= 1) - return; - } else if (!DstArgTy->isVariableArrayType()) { - return; - } - SmallString<128> sizeString; llvm::raw_svector_ostream OS(sizeString); OS << "sizeof("; @@ -3002,8 +3279,10 @@ void Sema::CheckStrncatArguments(const CallExpr *CE, //===--- CHECK: Return Address of Stack Variable --------------------------===// -static Expr *EvalVal(Expr *E, SmallVectorImpl &refVars); -static Expr *EvalAddr(Expr* E, SmallVectorImpl &refVars); +static Expr *EvalVal(Expr *E, SmallVectorImpl &refVars, + Decl *ParentDecl); +static Expr *EvalAddr(Expr* E, SmallVectorImpl &refVars, + Decl *ParentDecl); /// CheckReturnStackAddr - Check if a return statement returns the address /// of a stack variable. @@ -3018,9 +3297,9 @@ Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, // label addresses or references to temporaries. if (lhsType->isPointerType() || (!getLangOpts().ObjCAutoRefCount && lhsType->isBlockPointerType())) { - stackE = EvalAddr(RetValExp, refVars); + stackE = EvalAddr(RetValExp, refVars, /*ParentDecl=*/0); } else if (lhsType->isReferenceType()) { - stackE = EvalVal(RetValExp, refVars); + stackE = EvalVal(RetValExp, refVars, /*ParentDecl=*/0); } if (stackE == 0) @@ -3094,7 +3373,8 @@ Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType, /// * arbitrary interplay between "&" and "*" operators /// * pointer arithmetic from an address of a stack variable /// * taking the address of an array element where the array is on the stack -static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { +static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars, + Decl *ParentDecl) { if (E->isTypeDependent()) return NULL; @@ -3120,7 +3400,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { V->getType()->isReferenceType() && V->hasInit()) { // Add the reference variable to the "trail". refVars.push_back(DR); - return EvalAddr(V->getInit(), refVars); + return EvalAddr(V->getInit(), refVars, ParentDecl); } return NULL; @@ -3132,7 +3412,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { UnaryOperator *U = cast(E); if (U->getOpcode() == UO_AddrOf) - return EvalVal(U->getSubExpr(), refVars); + return EvalVal(U->getSubExpr(), refVars, ParentDecl); else return NULL; } @@ -3153,7 +3433,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { if (!Base->getType()->isPointerType()) Base = B->getRHS(); assert (Base->getType()->isPointerType()); - return EvalAddr(Base, refVars); + return EvalAddr(Base, refVars, ParentDecl); } // For conditional operators we need to see if either the LHS or RHS are @@ -3165,7 +3445,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { if (Expr *lhsExpr = C->getLHS()) { // In C++, we can have a throw-expression, which has 'void' type. if (!lhsExpr->getType()->isVoidType()) - if (Expr* LHS = EvalAddr(lhsExpr, refVars)) + if (Expr* LHS = EvalAddr(lhsExpr, refVars, ParentDecl)) return LHS; } @@ -3173,7 +3453,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { if (C->getRHS()->getType()->isVoidType()) return NULL; - return EvalAddr(C->getRHS(), refVars); + return EvalAddr(C->getRHS(), refVars, ParentDecl); } case Stmt::BlockExprClass: @@ -3185,7 +3465,8 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { return E; // address of label. case Stmt::ExprWithCleanupsClass: - return EvalAddr(cast(E)->getSubExpr(), refVars); + return EvalAddr(cast(E)->getSubExpr(), refVars, + ParentDecl); // For casts, we need to handle conversions from arrays to // pointer values, and pointer-to-pointer conversions. @@ -3209,10 +3490,10 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { case CK_CPointerToObjCPointerCast: case CK_BlockPointerToObjCPointerCast: case CK_AnyPointerToBlockPointerCast: - return EvalAddr(SubExpr, refVars); + return EvalAddr(SubExpr, refVars, ParentDecl); case CK_ArrayToPointerDecay: - return EvalVal(SubExpr, refVars); + return EvalVal(SubExpr, refVars, ParentDecl); default: return 0; @@ -3222,7 +3503,7 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { case Stmt::MaterializeTemporaryExprClass: if (Expr *Result = EvalAddr( cast(E)->GetTemporaryExpr(), - refVars)) + refVars, ParentDecl)) return Result; return E; @@ -3236,7 +3517,8 @@ static Expr *EvalAddr(Expr *E, SmallVectorImpl &refVars) { /// EvalVal - This function is complements EvalAddr in the mutual recursion. /// See the comments for EvalAddr for more details. -static Expr *EvalVal(Expr *E, SmallVectorImpl &refVars) { +static Expr *EvalVal(Expr *E, SmallVectorImpl &refVars, + Decl *ParentDecl) { do { // We should only be called for evaluating non-pointer expressions, or // expressions with a pointer type that are not used as references but instead @@ -3258,7 +3540,7 @@ do { } case Stmt::ExprWithCleanupsClass: - return EvalVal(cast(E)->getSubExpr(), refVars); + return EvalVal(cast(E)->getSubExpr(), refVars,ParentDecl); case Stmt::DeclRefExprClass: { // When we hit a DeclRefExpr we are looking at code that refers to a @@ -3266,7 +3548,11 @@ do { // local storage within the function, and if so, return the expression. DeclRefExpr *DR = cast(E); - if (VarDecl *V = dyn_cast(DR->getDecl())) + if (VarDecl *V = dyn_cast(DR->getDecl())) { + // Check if it refers to itself, e.g. "int& i = i;". + if (V == ParentDecl) + return DR; + if (V->hasLocalStorage()) { if (!V->getType()->isReferenceType()) return DR; @@ -3276,9 +3562,10 @@ do { if (V->hasInit()) { // Add the reference variable to the "trail". refVars.push_back(DR); - return EvalVal(V->getInit(), refVars); + return EvalVal(V->getInit(), refVars, V); } } + } return NULL; } @@ -3290,7 +3577,7 @@ do { UnaryOperator *U = cast(E); if (U->getOpcode() == UO_Deref) - return EvalAddr(U->getSubExpr(), refVars); + return EvalAddr(U->getSubExpr(), refVars, ParentDecl); return NULL; } @@ -3299,7 +3586,7 @@ do { // Array subscripts are potential references to data on the stack. We // retrieve the DeclRefExpr* for the array variable if it indeed // has local storage. - return EvalAddr(cast(E)->getBase(), refVars); + return EvalAddr(cast(E)->getBase(), refVars,ParentDecl); } case Stmt::ConditionalOperatorClass: { @@ -3309,10 +3596,10 @@ do { // Handle the GNU extension for missing LHS. if (Expr *lhsExpr = C->getLHS()) - if (Expr *LHS = EvalVal(lhsExpr, refVars)) + if (Expr *LHS = EvalVal(lhsExpr, refVars, ParentDecl)) return LHS; - return EvalVal(C->getRHS(), refVars); + return EvalVal(C->getRHS(), refVars, ParentDecl); } // Accesses to members are potential references to data on the stack. @@ -3328,13 +3615,13 @@ do { if (M->getMemberDecl()->getType()->isReferenceType()) return NULL; - return EvalVal(M->getBase(), refVars); + return EvalVal(M->getBase(), refVars, ParentDecl); } case Stmt::MaterializeTemporaryExprClass: if (Expr *Result = EvalVal( cast(E)->GetTemporaryExpr(), - refVars)) + refVars, ParentDecl)) return Result; return E; @@ -3357,8 +3644,6 @@ do { /// Issue a warning if these are no self-comparisons, as they are not likely /// to do what the programmer intended. void Sema::CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr *RHS) { - bool EmitWarning = true; - Expr* LeftExprSansParen = LHS->IgnoreParenImpCasts(); Expr* RightExprSansParen = RHS->IgnoreParenImpCasts(); @@ -3367,7 +3652,7 @@ void Sema::CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr *RHS) { if (DeclRefExpr* DRL = dyn_cast(LeftExprSansParen)) if (DeclRefExpr* DRR = dyn_cast(RightExprSansParen)) if (DRL->getDecl() == DRR->getDecl()) - EmitWarning = false; + return; // Special case: check for comparisons against literals that can be exactly @@ -3375,32 +3660,26 @@ void Sema::CheckFloatComparison(SourceLocation Loc, Expr* LHS, Expr *RHS) { // is a heuristic: often comparison against such literals are used to // detect if a value in a variable has not changed. This clearly can // lead to false negatives. - if (EmitWarning) { - if (FloatingLiteral* FLL = dyn_cast(LeftExprSansParen)) { - if (FLL->isExact()) - EmitWarning = false; - } else - if (FloatingLiteral* FLR = dyn_cast(RightExprSansParen)){ - if (FLR->isExact()) - EmitWarning = false; - } - } + if (FloatingLiteral* FLL = dyn_cast(LeftExprSansParen)) { + if (FLL->isExact()) + return; + } else + if (FloatingLiteral* FLR = dyn_cast(RightExprSansParen)) + if (FLR->isExact()) + return; // Check for comparisons with builtin types. - if (EmitWarning) - if (CallExpr* CL = dyn_cast(LeftExprSansParen)) - if (CL->isBuiltinCall()) - EmitWarning = false; + if (CallExpr* CL = dyn_cast(LeftExprSansParen)) + if (CL->isBuiltinCall()) + return; - if (EmitWarning) - if (CallExpr* CR = dyn_cast(RightExprSansParen)) - if (CR->isBuiltinCall()) - EmitWarning = false; + if (CallExpr* CR = dyn_cast(RightExprSansParen)) + if (CR->isBuiltinCall()) + return; // Emit the diagnostic. - if (EmitWarning) - Diag(Loc, diag::warn_floatingpoint_eq) - << LHS->getSourceRange() << RHS->getSourceRange(); + Diag(Loc, diag::warn_floatingpoint_eq) + << LHS->getSourceRange() << RHS->getSourceRange(); } //===--- CHECK: Integer mixed-sign comparisons (-Wsign-compare) --------===// @@ -3927,9 +4206,10 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) { return; } - S.Diag(E->getOperatorLoc(), diag::warn_mixed_sign_comparison) - << LHS->getType() << RHS->getType() - << LHS->getSourceRange() << RHS->getSourceRange(); + S.DiagRuntimeBehavior(E->getOperatorLoc(), E, + S.PDiag(diag::warn_mixed_sign_comparison) + << LHS->getType() << RHS->getType() + << LHS->getSourceRange() << RHS->getSourceRange()); } /// Analyzes an attempt to assign the given value to a bitfield. @@ -3970,7 +4250,7 @@ static bool AnalyzeBitFieldAssignment(Sema &S, FieldDecl *Bitfield, Expr *Init, // Check whether the stored value is equal to the original value. TruncatedValue = TruncatedValue.extend(OriginalWidth); - if (Value == TruncatedValue) + if (llvm::APSInt::isSameValue(Value, TruncatedValue)) return false; // Special-case bitfields of width 1: booleans are naturally 0/1, and @@ -4044,8 +4324,17 @@ void DiagnoseFloatingLiteralImpCast(Sema &S, FloatingLiteral *FL, QualType T, == llvm::APFloat::opOK && isExact) return; + SmallString<16> PrettySourceValue; + Value.toString(PrettySourceValue); + SmallString<16> PrettyTargetValue; + if (T->isSpecificBuiltinType(BuiltinType::Bool)) + PrettyTargetValue = IntegerValue == 0 ? "false" : "true"; + else + IntegerValue.toString(PrettyTargetValue); + S.Diag(FL->getExprLoc(), diag::warn_impcast_literal_float_to_integer) - << FL->getType() << T << FL->getSourceRange() << SourceRange(CContext); + << FL->getType() << T.getUnqualifiedType() << PrettySourceValue + << PrettyTargetValue << FL->getSourceRange() << SourceRange(CContext); } std::string PrettyPrintInRange(const llvm::APSInt &Value, IntRange Range) { @@ -4112,7 +4401,6 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T, } } } - return; // Other casts to bool are not checked. } // Strip vector types. @@ -4176,7 +4464,7 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T, } // If the target is integral, always warn. - if ((TargetBT && TargetBT->isInteger())) { + if (TargetBT && TargetBT->isInteger()) { if (S.SourceMgr.isInSystemMacro(CC)) return; @@ -4196,19 +4484,26 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T, return; } - if (!Source->isIntegerType() || !Target->isIntegerType()) - return; - if ((E->isNullPointerConstant(S.Context, Expr::NPC_ValueDependentIsNotNull) - == Expr::NPCK_GNUNull) && Target->isIntegerType()) { + == Expr::NPCK_GNUNull) && !Target->isAnyPointerType() + && !Target->isBlockPointerType() && !Target->isMemberPointerType()) { SourceLocation Loc = E->getSourceRange().getBegin(); if (Loc.isMacroID()) Loc = S.SourceMgr.getImmediateExpansionRange(Loc).first; - S.Diag(Loc, diag::warn_impcast_null_pointer_to_integer) - << T << Loc << clang::SourceRange(CC); - return; + if (!Loc.isMacroID() || CC.isMacroID()) + S.Diag(Loc, diag::warn_impcast_null_pointer_to_integer) + << T << clang::SourceRange(CC) + << FixItHint::CreateReplacement(Loc, S.getFixItZeroLiteralForType(T)); } + if (!Source->isIntegerType() || !Target->isIntegerType()) + return; + + // TODO: remove this early return once the false positives for constant->bool + // in templates, macros, etc, are reduced or removed. + if (Target->isSpecificBuiltinType(BuiltinType::Bool)) + return; + IntRange SourceRange = GetExprRange(S.Context, E); IntRange TargetRange = IntRange::forTargetOfCanonicalType(S.Context, Target); @@ -4293,14 +4588,15 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T, return; } -void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T); +void CheckConditionalOperator(Sema &S, ConditionalOperator *E, + SourceLocation CC, QualType T); void CheckConditionalOperand(Sema &S, Expr *E, QualType T, SourceLocation CC, bool &ICContext) { E = E->IgnoreParenImpCasts(); if (isa(E)) - return CheckConditionalOperator(S, cast(E), T); + return CheckConditionalOperator(S, cast(E), CC, T); AnalyzeImplicitConversions(S, E, CC); if (E->getType() != T) @@ -4308,9 +4604,8 @@ void CheckConditionalOperand(Sema &S, Expr *E, QualType T, return; } -void CheckConditionalOperator(Sema &S, ConditionalOperator *E, QualType T) { - SourceLocation CC = E->getQuestionLoc(); - +void CheckConditionalOperator(Sema &S, ConditionalOperator *E, + SourceLocation CC, QualType T) { AnalyzeImplicitConversions(S, E->getCond(), CC); bool Suspicious = false; @@ -4352,7 +4647,7 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) { // were being fed directly into the output. if (isa(E)) { ConditionalOperator *CO = cast(E); - CheckConditionalOperator(S, CO, T); + CheckConditionalOperator(S, CO, CC, T); return; } @@ -4417,7 +4712,7 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) { /// conversion void Sema::CheckImplicitConversions(Expr *E, SourceLocation CC) { // Don't diagnose in unevaluated contexts. - if (ExprEvalContexts.back().Context == Sema::Unevaluated) + if (isUnevaluatedContext()) return; // Don't diagnose for value- or type-dependent expressions. @@ -4457,7 +4752,7 @@ bool Sema::CheckParmsForFunctionDef(ParmVarDecl **P, ParmVarDecl **PEnd, // This is also C++ [dcl.fct]p6. if (!Param->isInvalidDecl() && RequireCompleteType(Param->getLocation(), Param->getType(), - diag::err_typecheck_decl_incomplete_type)) { + diag::err_typecheck_decl_incomplete_type)) { Param->setInvalidDecl(); HasInvalidParm = true; } @@ -4478,7 +4773,7 @@ bool Sema::CheckParmsForFunctionDef(ParmVarDecl **P, ParmVarDecl **PEnd, QualType PType = Param->getOriginalType(); if (const ArrayType *AT = Context.getAsArrayType(PType)) { if (AT->getSizeModifier() == ArrayType::Star) { - // FIXME: This diagnosic should point the the '[*]' if source-location + // FIXME: This diagnosic should point the '[*]' if source-location // information is added for it. Diag(Param->getLocation(), diag::err_array_star_in_function_definition); } @@ -4556,11 +4851,23 @@ static bool IsTailPaddedMemberArray(Sema &S, llvm::APInt Size, // Don't consider sizes resulting from macro expansions or template argument // substitution to form C89 tail-padded arrays. - ConstantArrayTypeLoc TL = - cast(FD->getTypeSourceInfo()->getTypeLoc()); - const Expr *SizeExpr = dyn_cast(TL.getSizeExpr()); - if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) - return false; + + TypeSourceInfo *TInfo = FD->getTypeSourceInfo(); + while (TInfo) { + TypeLoc TL = TInfo->getTypeLoc(); + // Look through typedefs. + const TypedefTypeLoc *TTL = dyn_cast(&TL); + if (TTL) { + const TypedefNameDecl *TDL = TTL->getTypedefNameDecl(); + TInfo = TDL->getTypeSourceInfo(); + continue; + } + ConstantArrayTypeLoc CTL = cast(TL); + const Expr *SizeExpr = dyn_cast(CTL.getSizeExpr()); + if (!SizeExpr || SizeExpr->getExprLoc().isMacroID()) + return false; + break; + } const RecordDecl *RD = dyn_cast(FD->getDeclContext()); if (!RD) return false; @@ -4966,7 +5273,7 @@ bool Sema::checkUnsafeAssigns(SourceLocation Loc, while (ImplicitCastExpr *cast = dyn_cast(RHS)) { if (cast->getCastKind() == CK_ARCConsumeObject) { Diag(Loc, diag::warn_arc_retained_assign) - << (LT == Qualifiers::OCL_ExplicitNone) + << (LT == Qualifiers::OCL_ExplicitNone) << 1 << RHS->getSourceRange(); return true; } @@ -5023,6 +5330,16 @@ void Sema::checkUnsafeExprAssigns(SourceLocation Loc, RHS = cast->getSubExpr(); } } + else if (Attributes & ObjCPropertyDecl::OBJC_PR_weak) { + while (ImplicitCastExpr *cast = dyn_cast(RHS)) { + if (cast->getCastKind() == CK_ARCConsumeObject) { + Diag(Loc, diag::warn_arc_retained_assign) + << 0 << 0<< RHS->getSourceRange(); + return; + } + RHS = cast->getSubExpr(); + } + } } } diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp index 1ee7532..9fa757d 100644 --- a/lib/Sema/SemaCodeComplete.cpp +++ b/lib/Sema/SemaCodeComplete.cpp @@ -158,7 +158,7 @@ namespace { /// \brief The completion context in which we are gathering results. CodeCompletionContext CompletionContext; - /// \brief If we are in an instance method definition, the @implementation + /// \brief If we are in an instance method definition, the \@implementation /// object. ObjCImplementationDecl *ObjCImplementation; @@ -1181,7 +1181,7 @@ bool ResultBuilder::IsImpossibleToSatisfy(NamedDecl *ND) const { return false; } -/// \rief Determines whether the given declaration is an Objective-C +/// \brief Determines whether the given declaration is an Objective-C /// instance variable. bool ResultBuilder::IsObjCIvar(NamedDecl *ND) const { return isa(ND); @@ -1414,7 +1414,7 @@ static const char *GetCompletionTypeString(QualType T, if (!T.getLocalQualifiers()) { // Built-in type names are constant strings. if (const BuiltinType *BT = dyn_cast(T)) - return BT->getName(Policy); + return BT->getNameAsCString(Policy); // Anonymous tag types are constant strings. if (const TagType *TagT = dyn_cast(T)) @@ -1955,6 +1955,19 @@ static void AddOrdinaryNameResults(Sema::ParserCompletionContext CCC, AddObjCExpressionResults(Results, true); } + if (SemaRef.getLangOpts().C11) { + // _Alignof + Builder.AddResultTypeChunk("size_t"); + if (SemaRef.getASTContext().Idents.get("alignof").hasMacroDefinition()) + Builder.AddTypedTextChunk("alignof"); + else + Builder.AddTypedTextChunk("_Alignof"); + Builder.AddChunk(CodeCompletionString::CK_LeftParen); + Builder.AddPlaceholderChunk("type"); + Builder.AddChunk(CodeCompletionString::CK_RightParen); + Results.AddResult(Result(Builder.TakeString())); + } + // sizeof expression Builder.AddResultTypeChunk("size_t"); Builder.AddTypedTextChunk("sizeof"); @@ -2356,11 +2369,11 @@ AddFunctionTypeQualsToCompletionString(CodeCompletionBuilder &Result, // Handle multiple qualifiers. std::string QualsStr; - if (Proto->getTypeQuals() & Qualifiers::Const) + if (Proto->isConst()) QualsStr += " const"; - if (Proto->getTypeQuals() & Qualifiers::Volatile) + if (Proto->isVolatile()) QualsStr += " volatile"; - if (Proto->getTypeQuals() & Qualifiers::Restrict) + if (Proto->isRestrict()) QualsStr += " restrict"; Result.AddInformativeChunk(Result.getAllocator().CopyString(QualsStr)); } @@ -2440,8 +2453,10 @@ static void AddTypedNameChunk(ASTContext &Context, const PrintingPolicy &Policy, CodeCompletionString *CodeCompletionResult::CreateCodeCompletionString(Sema &S, CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo) { - return CreateCodeCompletionString(S.Context, S.PP, Allocator, CCTUInfo); + CodeCompletionTUInfo &CCTUInfo, + bool IncludeBriefComments) { + return CreateCodeCompletionString(S.Context, S.PP, Allocator, CCTUInfo, + IncludeBriefComments); } /// \brief If possible, create a new code completion string for the given @@ -2454,7 +2469,8 @@ CodeCompletionString * CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx, Preprocessor &PP, CodeCompletionAllocator &Allocator, - CodeCompletionTUInfo &CCTUInfo) { + CodeCompletionTUInfo &CCTUInfo, + bool IncludeBriefComments) { CodeCompletionBuilder Result(Allocator, CCTUInfo, Priority, Availability); PrintingPolicy Policy = getCompletionPrintingPolicy(Ctx, PP); @@ -2524,7 +2540,14 @@ CodeCompletionResult::CreateCodeCompletionString(ASTContext &Ctx, assert(Kind == RK_Declaration && "Missed a result kind?"); NamedDecl *ND = Declaration; Result.addParentContext(ND->getDeclContext()); - + + if (IncludeBriefComments) { + // Add documentation comment, if it exists. + if (const RawComment *RC = Ctx.getRawCommentForAnyRedecl(ND)) { + Result.addBriefComment(RC->getBriefText(Ctx)); + } + } + if (StartsNestedNameSpecifier) { Result.AddTypedTextChunk( Result.getAllocator().CopyString(ND->getNameAsString())); @@ -2842,6 +2865,7 @@ CXCursorKind clang::getCursorKindForDecl(Decl *D) { case Decl::ClassTemplatePartialSpecialization: return CXCursor_ClassTemplatePartialSpecialization; case Decl::UsingDirective: return CXCursor_UsingDirective; + case Decl::TranslationUnit: return CXCursor_TranslationUnit; case Decl::Using: case Decl::UnresolvedUsingValue: @@ -3270,9 +3294,6 @@ struct Sema::CodeCompleteExpressionData { /// \brief Perform code-completion in an expression context when we know what /// type we're looking for. -/// -/// \param IntegralConstantExpression Only permit integral constant -/// expressions. void Sema::CodeCompleteExpression(Scope *S, const CodeCompleteExpressionData &Data) { typedef CodeCompletionResult Result; @@ -3333,7 +3354,25 @@ void Sema::CodeCompletePostfixExpression(Scope *S, ExprResult E) { /// property name. typedef llvm::SmallPtrSet AddedPropertiesSet; -static void AddObjCProperties(ObjCContainerDecl *Container, +/// \brief Retrieve the container definition, if any? +static ObjCContainerDecl *getContainerDef(ObjCContainerDecl *Container) { + if (ObjCInterfaceDecl *Interface = dyn_cast(Container)) { + if (Interface->hasDefinition()) + return Interface->getDefinition(); + + return Interface; + } + + if (ObjCProtocolDecl *Protocol = dyn_cast(Container)) { + if (Protocol->hasDefinition()) + return Protocol->getDefinition(); + + return Protocol; + } + return Container; +} + +static void AddObjCProperties(ObjCContainerDecl *Container, bool AllowCategories, bool AllowNullaryMethods, DeclContext *CurContext, @@ -3341,6 +3380,9 @@ static void AddObjCProperties(ObjCContainerDecl *Container, ResultBuilder &Results) { typedef CodeCompletionResult Result; + // Retrieve the definition. + Container = getContainerDef(Container); + // Add properties in this container. for (ObjCContainerDecl::prop_iterator P = Container->prop_begin(), PEnd = Container->prop_end(); @@ -3616,6 +3658,8 @@ void Sema::CodeCompleteCase(Scope *S) { // Code-complete the cases of a switch statement over an enumeration type // by providing the list of EnumDecl *Enum = type->castAs()->getDecl(); + if (EnumDecl *Def = Enum->getDefinition()) + Enum = Def; // Determine which enumerators we have already seen in the switch statement. // FIXME: Ideally, we would also be able to look *past* the code-completion @@ -4273,27 +4317,28 @@ void Sema::CodeCompleteLambdaIntroducer(Scope *S, LambdaIntroducer &Intro, Results.data(), Results.size()); } -// Macro that expands to @Keyword or Keyword, depending on whether NeedAt is -// true or false. -#define OBJC_AT_KEYWORD_NAME(NeedAt,Keyword) NeedAt? "@" #Keyword : #Keyword +/// Macro that optionally prepends an "@" to the string literal passed in via +/// Keyword, depending on whether NeedAt is true or false. +#define OBJC_AT_KEYWORD_NAME(NeedAt,Keyword) ((NeedAt)? "@" Keyword : Keyword) + static void AddObjCImplementationResults(const LangOptions &LangOpts, ResultBuilder &Results, bool NeedAt) { typedef CodeCompletionResult Result; // Since we have an implementation, we can end it. - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,end))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"end"))); CodeCompletionBuilder Builder(Results.getAllocator(), Results.getCodeCompletionTUInfo()); if (LangOpts.ObjC2) { // @dynamic - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,dynamic)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"dynamic")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("property"); Results.AddResult(Result(Builder.TakeString())); // @synthesize - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,synthesize)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"synthesize")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("property"); Results.AddResult(Result(Builder.TakeString())); @@ -4306,17 +4351,17 @@ static void AddObjCInterfaceResults(const LangOptions &LangOpts, typedef CodeCompletionResult Result; // Since we have an interface or protocol, we can end it. - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,end))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"end"))); if (LangOpts.ObjC2) { // @property - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,property))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"property"))); // @required - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,required))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"required"))); // @optional - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,optional))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"optional"))); } } @@ -4326,7 +4371,7 @@ static void AddObjCTopLevelResults(ResultBuilder &Results, bool NeedAt) { Results.getCodeCompletionTUInfo()); // @class name ; - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,class)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"class")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("name"); Results.AddResult(Result(Builder.TakeString())); @@ -4335,26 +4380,26 @@ static void AddObjCTopLevelResults(ResultBuilder &Results, bool NeedAt) { // @interface name // FIXME: Could introduce the whole pattern, including superclasses and // such. - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,interface)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"interface")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("class"); Results.AddResult(Result(Builder.TakeString())); // @protocol name - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,protocol)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"protocol")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("protocol"); Results.AddResult(Result(Builder.TakeString())); // @implementation name - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,implementation)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"implementation")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("class"); Results.AddResult(Result(Builder.TakeString())); } // @compatibility_alias name - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,compatibility_alias)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"compatibility_alias")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("alias"); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); @@ -4389,9 +4434,9 @@ static void AddObjCExpressionResults(ResultBuilder &Results, bool NeedAt) { const char *EncodeType = "char[]"; if (Results.getSema().getLangOpts().CPlusPlus || Results.getSema().getLangOpts().ConstStrings) - EncodeType = " const char[]"; + EncodeType = "const char[]"; Builder.AddResultTypeChunk(EncodeType); - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,encode)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"encode")); Builder.AddChunk(CodeCompletionString::CK_LeftParen); Builder.AddPlaceholderChunk("type-name"); Builder.AddChunk(CodeCompletionString::CK_RightParen); @@ -4399,7 +4444,7 @@ static void AddObjCExpressionResults(ResultBuilder &Results, bool NeedAt) { // @protocol ( protocol-name ) Builder.AddResultTypeChunk("Protocol *"); - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,protocol)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"protocol")); Builder.AddChunk(CodeCompletionString::CK_LeftParen); Builder.AddPlaceholderChunk("protocol-name"); Builder.AddChunk(CodeCompletionString::CK_RightParen); @@ -4407,31 +4452,43 @@ static void AddObjCExpressionResults(ResultBuilder &Results, bool NeedAt) { // @selector ( selector ) Builder.AddResultTypeChunk("SEL"); - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,selector)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"selector")); Builder.AddChunk(CodeCompletionString::CK_LeftParen); Builder.AddPlaceholderChunk("selector"); Builder.AddChunk(CodeCompletionString::CK_RightParen); Results.AddResult(Result(Builder.TakeString())); - - // @[ objects, ... ] - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,[)); - Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + + // @"string" + Builder.AddResultTypeChunk("NSString *"); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"\"")); + Builder.AddPlaceholderChunk("string"); + Builder.AddTextChunk("\""); + Results.AddResult(Result(Builder.TakeString())); + + // @[objects, ...] + Builder.AddResultTypeChunk("NSArray *"); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"[")); Builder.AddPlaceholderChunk("objects, ..."); - Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddChunk(CodeCompletionString::CK_RightBracket); Results.AddResult(Result(Builder.TakeString())); - // @{ key : object, ... } - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,{)); - Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); + // @{key : object, ...} + Builder.AddResultTypeChunk("NSDictionary *"); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"{")); Builder.AddPlaceholderChunk("key"); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddChunk(CodeCompletionString::CK_Colon); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("object, ..."); - Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddChunk(CodeCompletionString::CK_RightBrace); Results.AddResult(Result(Builder.TakeString())); + + // @(expression) + Builder.AddResultTypeChunk("id"); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt, "(")); + Builder.AddPlaceholderChunk("expression"); + Builder.AddChunk(CodeCompletionString::CK_RightParen); + Results.AddResult(Result(Builder.TakeString())); } static void AddObjCStatementResults(ResultBuilder &Results, bool NeedAt) { @@ -4442,7 +4499,7 @@ static void AddObjCStatementResults(ResultBuilder &Results, bool NeedAt) { if (Results.includeCodePatterns()) { // @try { statements } @catch ( declaration ) { statements } @finally // { statements } - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,try)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"try")); Builder.AddChunk(CodeCompletionString::CK_LeftBrace); Builder.AddPlaceholderChunk("statements"); Builder.AddChunk(CodeCompletionString::CK_RightBrace); @@ -4461,14 +4518,14 @@ static void AddObjCStatementResults(ResultBuilder &Results, bool NeedAt) { } // @throw - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,throw)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"throw")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddPlaceholderChunk("expression"); Results.AddResult(Result(Builder.TakeString())); if (Results.includeCodePatterns()) { // @synchronized ( expression ) { statements } - Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,synchronized)); + Builder.AddTypedTextChunk(OBJC_AT_KEYWORD_NAME(NeedAt,"synchronized")); Builder.AddChunk(CodeCompletionString::CK_HorizontalSpace); Builder.AddChunk(CodeCompletionString::CK_LeftParen); Builder.AddPlaceholderChunk("expression"); @@ -4484,11 +4541,11 @@ static void AddObjCVisibilityResults(const LangOptions &LangOpts, ResultBuilder &Results, bool NeedAt) { typedef CodeCompletionResult Result; - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,private))); - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,protected))); - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,public))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"private"))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"protected"))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"public"))); if (LangOpts.ObjC2) - Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,package))); + Results.AddResult(Result(OBJC_AT_KEYWORD_NAME(NeedAt,"package"))); } void Sema::CodeCompleteObjCAtVisibility(Scope *S) { @@ -4616,12 +4673,12 @@ void Sema::CodeCompleteObjCPropertyFlags(Scope *S, ObjCDeclSpec &ODS) { Results.data(),Results.size()); } -/// \brief Descripts the kind of Objective-C method that we want to find +/// \brief Describes the kind of Objective-C method that we want to find /// via code completion. enum ObjCMethodKind { - MK_Any, //< Any kind of method, provided it means other specified criteria. - MK_ZeroArgSelector, //< Zero-argument (unary) selector. - MK_OneArgSelector //< One-argument selector. + MK_Any, ///< Any kind of method, provided it means other specified criteria. + MK_ZeroArgSelector, ///< Zero-argument (unary) selector. + MK_OneArgSelector ///< One-argument selector. }; static bool isAcceptableObjCSelector(Selector Sel, @@ -4673,8 +4730,8 @@ namespace { /// /// \param Container the container in which we'll look to find methods. /// -/// \param WantInstance whether to add instance methods (only); if false, this -/// routine will add factory methods (only). +/// \param WantInstanceMethods Whether to add instance methods (only); if +/// false, this routine will add factory methods (only). /// /// \param CurContext the context in which we're performing the lookup that /// finds methods. @@ -4694,17 +4751,18 @@ static void AddObjCMethods(ObjCContainerDecl *Container, ResultBuilder &Results, bool InOriginalClass = true) { typedef CodeCompletionResult Result; + Container = getContainerDef(Container); for (ObjCContainerDecl::method_iterator M = Container->meth_begin(), MEnd = Container->meth_end(); M != MEnd; ++M) { - if ((*M)->isInstanceMethod() == WantInstanceMethods) { + if (M->isInstanceMethod() == WantInstanceMethods) { // Check whether the selector identifiers we've been given are a // subset of the identifiers for this particular method. if (!isAcceptableObjCMethod(*M, WantKind, SelIdents, NumSelIdents, AllowSameLength)) continue; - if (!Selectors.insert((*M)->getSelector())) + if (!Selectors.insert(M->getSelector())) continue; Result R = Result(*M, 0); @@ -5825,7 +5883,8 @@ void Sema::CodeCompleteObjCPropertyDefinition(Scope *S) { return; // Ignore any properties that have already been implemented. - for (DeclContext::decl_iterator D = Container->decls_begin(), + Container = getContainerDef(Container); + for (DeclContext::decl_iterator D = Container->decls_begin(), DEnd = Container->decls_end(); D != DEnd; ++D) if (ObjCPropertyImplDecl *PropertyImpl = dyn_cast(*D)) @@ -5958,9 +6017,12 @@ static void FindImplementableMethods(ASTContext &Context, KnownMethodsMap &KnownMethods, bool InOriginalClass = true) { if (ObjCInterfaceDecl *IFace = dyn_cast(Container)) { - // Recurse into protocols. + // Make sure we have a definition; that's what we'll walk. if (!IFace->hasDefinition()) return; + + IFace = IFace->getDefinition(); + Container = IFace; const ObjCList &Protocols = IFace->getReferencedProtocols(); @@ -6002,16 +6064,20 @@ static void FindImplementableMethods(ASTContext &Context, } if (ObjCProtocolDecl *Protocol = dyn_cast(Container)) { - if (Protocol->hasDefinition()) { - // Recurse into protocols. - const ObjCList &Protocols - = Protocol->getReferencedProtocols(); - for (ObjCList::iterator I = Protocols.begin(), - E = Protocols.end(); - I != E; ++I) - FindImplementableMethods(Context, *I, WantInstanceMethods, ReturnType, - KnownMethods, false); - } + // Make sure we have a definition; that's what we'll walk. + if (!Protocol->hasDefinition()) + return; + Protocol = Protocol->getDefinition(); + Container = Protocol; + + // Recurse into protocols. + const ObjCList &Protocols + = Protocol->getReferencedProtocols(); + for (ObjCList::iterator I = Protocols.begin(), + E = Protocols.end(); + I != E; ++I) + FindImplementableMethods(Context, *I, WantInstanceMethods, ReturnType, + KnownMethods, false); } // Add methods in this container. This operation occurs last because @@ -6020,12 +6086,12 @@ static void FindImplementableMethods(ASTContext &Context, for (ObjCContainerDecl::method_iterator M = Container->meth_begin(), MEnd = Container->meth_end(); M != MEnd; ++M) { - if ((*M)->isInstanceMethod() == WantInstanceMethods) { + if (M->isInstanceMethod() == WantInstanceMethods) { if (!ReturnType.isNull() && - !Context.hasSameUnqualifiedType(ReturnType, (*M)->getResultType())) + !Context.hasSameUnqualifiedType(ReturnType, M->getResultType())) continue; - KnownMethods[(*M)->getSelector()] = std::make_pair(*M, InOriginalClass); + KnownMethods[M->getSelector()] = std::make_pair(*M, InOriginalClass); } } } diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp index 1227e92..3aae99a 100644 --- a/lib/Sema/SemaDecl.cpp +++ b/lib/Sema/SemaDecl.cpp @@ -21,6 +21,7 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/CXXInheritance.h" +#include "clang/AST/CommentDiagnostic.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" @@ -60,7 +61,8 @@ namespace { class TypeNameValidatorCCC : public CorrectionCandidateCallback { public: - TypeNameValidatorCCC(bool AllowInvalid) : AllowInvalidDecl(AllowInvalid) { + TypeNameValidatorCCC(bool AllowInvalid, bool WantClass=false) + : AllowInvalidDecl(AllowInvalid), WantClassName(WantClass) { WantExpressionKeywords = false; WantCXXNamedCasts = false; WantRemainingKeywords = false; @@ -71,15 +73,52 @@ class TypeNameValidatorCCC : public CorrectionCandidateCallback { return (isa(ND) || isa(ND)) && (AllowInvalidDecl || !ND->isInvalidDecl()); else - return candidate.isKeyword(); + return !WantClassName && candidate.isKeyword(); } private: bool AllowInvalidDecl; + bool WantClassName; }; } +/// \brief Determine whether the token kind starts a simple-type-specifier. +bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const { + switch (Kind) { + // FIXME: Take into account the current language when deciding whether a + // token kind is a valid type specifier + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw___underlying_type: + return true; + + case tok::annot_typename: + case tok::kw_char16_t: + case tok::kw_char32_t: + case tok::kw_typeof: + case tok::kw_decltype: + return getLangOpts().CPlusPlus; + + default: + break; + } + + return false; +} + /// \brief If the identifier refers to a type name within this scope, /// return the declaration of that type. /// @@ -173,7 +212,7 @@ ParsedType Sema::getTypeName(IdentifierInfo &II, SourceLocation NameLoc, case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: if (CorrectedII) { - TypeNameValidatorCCC Validator(true); + TypeNameValidatorCCC Validator(true, isClassName); TypoCorrection Correction = CorrectTypo(Result.getLookupNameInfo(), Kind, S, SS, Validator); IdentifierInfo *NewII = Correction.getCorrectionAsIdentifierInfo(); @@ -202,8 +241,8 @@ ParsedType Sema::getTypeName(IdentifierInfo &II, SourceLocation NameLoc, std::string CorrectedStr(Correction.getAsString(getLangOpts())); std::string CorrectedQuotedStr( Correction.getQuoted(getLangOpts())); - Diag(NameLoc, diag::err_unknown_typename_suggest) - << Result.getLookupName() << CorrectedQuotedStr + Diag(NameLoc, diag::err_unknown_type_or_class_name_suggest) + << Result.getLookupName() << CorrectedQuotedStr << isClassName << FixItHint::CreateReplacement(SourceRange(NameLoc), CorrectedStr); if (NamedDecl *FirstDecl = Correction.getCorrectionDecl()) @@ -359,7 +398,7 @@ bool Sema::isMicrosoftMissingTypename(const CXXScopeSpec *SS, Scope *S) { return CurContext->isFunctionOrMethod() || S->isFunctionPrototypeScope(); } -bool Sema::DiagnoseUnknownTypeName(const IdentifierInfo &II, +bool Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II, SourceLocation IILoc, Scope *S, CXXScopeSpec *SS, @@ -370,7 +409,7 @@ bool Sema::DiagnoseUnknownTypeName(const IdentifierInfo &II, // There may have been a typo in the name of the type. Look up typo // results, in case we have something that we can suggest. TypeNameValidatorCCC Validator(false); - if (TypoCorrection Corrected = CorrectTypo(DeclarationNameInfo(&II, IILoc), + if (TypoCorrection Corrected = CorrectTypo(DeclarationNameInfo(II, IILoc), LookupOrdinaryName, S, SS, Validator)) { std::string CorrectedStr(Corrected.getAsString(getLangOpts())); @@ -378,19 +417,23 @@ bool Sema::DiagnoseUnknownTypeName(const IdentifierInfo &II, if (Corrected.isKeyword()) { // We corrected to a keyword. - // FIXME: Actually recover with the keyword we suggest, and emit a fix-it. + IdentifierInfo *NewII = Corrected.getCorrectionAsIdentifierInfo(); + if (!isSimpleTypeSpecifier(NewII->getTokenID())) + CorrectedQuotedStr = "the keyword " + CorrectedQuotedStr; Diag(IILoc, diag::err_unknown_typename_suggest) - << &II << CorrectedQuotedStr; + << II << CorrectedQuotedStr + << FixItHint::CreateReplacement(SourceRange(IILoc), CorrectedStr); + II = NewII; } else { NamedDecl *Result = Corrected.getCorrectionDecl(); // We found a similarly-named type or interface; suggest that. if (!SS || !SS->isSet()) Diag(IILoc, diag::err_unknown_typename_suggest) - << &II << CorrectedQuotedStr + << II << CorrectedQuotedStr << FixItHint::CreateReplacement(SourceRange(IILoc), CorrectedStr); else if (DeclContext *DC = computeDeclContext(*SS, false)) Diag(IILoc, diag::err_unknown_nested_typename_suggest) - << &II << DC << CorrectedQuotedStr << SS->getRange() + << II << DC << CorrectedQuotedStr << SS->getRange() << FixItHint::CreateReplacement(SourceRange(IILoc), CorrectedStr); else llvm_unreachable("could not have corrected a typo here"); @@ -409,7 +452,7 @@ bool Sema::DiagnoseUnknownTypeName(const IdentifierInfo &II, if (getLangOpts().CPlusPlus) { // See if II is a class template that the user forgot to pass arguments to. UnqualifiedId Name; - Name.setIdentifier(&II, IILoc); + Name.setIdentifier(II, IILoc); CXXScopeSpec EmptySS; TemplateTy TemplateResult; bool MemberOfUnknownSpecialization; @@ -430,21 +473,21 @@ bool Sema::DiagnoseUnknownTypeName(const IdentifierInfo &II, // (struct, union, enum) from Parser::ParseImplicitInt here, instead? if (!SS || (!SS->isSet() && !SS->isInvalid())) - Diag(IILoc, diag::err_unknown_typename) << &II; + Diag(IILoc, diag::err_unknown_typename) << II; else if (DeclContext *DC = computeDeclContext(*SS, false)) Diag(IILoc, diag::err_typename_nested_not_found) - << &II << DC << SS->getRange(); + << II << DC << SS->getRange(); else if (isDependentScopeSpecifier(*SS)) { unsigned DiagID = diag::err_typename_missing; if (getLangOpts().MicrosoftMode && isMicrosoftMissingTypename(SS, S)) DiagID = diag::warn_typename_missing; Diag(SS->getRange().getBegin(), DiagID) - << (NestedNameSpecifier *)SS->getScopeRep() << II.getName() + << (NestedNameSpecifier *)SS->getScopeRep() << II->getName() << SourceRange(SS->getRange().getBegin(), IILoc) << FixItHint::CreateInsertion(SS->getRange().getBegin(), "typename "); - SuggestedType = ActOnTypenameType(S, SourceLocation(), *SS, II, IILoc) - .get(); + SuggestedType = ActOnTypenameType(S, SourceLocation(), + *SS, *II, IILoc).get(); } else { assert(SS && SS->isInvalid() && "Invalid scope specifier has already been diagnosed"); @@ -470,6 +513,55 @@ static bool isResultTypeOrTemplate(LookupResult &R, const Token &NextToken) { return false; } +static bool isTagTypeWithMissingTag(Sema &SemaRef, LookupResult &Result, + Scope *S, CXXScopeSpec &SS, + IdentifierInfo *&Name, + SourceLocation NameLoc) { + Result.clear(Sema::LookupTagName); + SemaRef.LookupParsedName(Result, S, &SS); + if (TagDecl *Tag = Result.getAsSingle()) { + const char *TagName = 0; + const char *FixItTagName = 0; + switch (Tag->getTagKind()) { + case TTK_Class: + TagName = "class"; + FixItTagName = "class "; + break; + + case TTK_Enum: + TagName = "enum"; + FixItTagName = "enum "; + break; + + case TTK_Struct: + TagName = "struct"; + FixItTagName = "struct "; + break; + + case TTK_Union: + TagName = "union"; + FixItTagName = "union "; + break; + } + + SemaRef.Diag(NameLoc, diag::err_use_of_tag_name_without_tag) + << Name << TagName << SemaRef.getLangOpts().CPlusPlus + << FixItHint::CreateInsertion(NameLoc, FixItTagName); + + LookupResult R(SemaRef, Name, NameLoc, Sema::LookupOrdinaryName); + if (SemaRef.LookupParsedName(R, S, &SS)) { + for (LookupResult::iterator I = R.begin(), IEnd = R.end(); + I != IEnd; ++I) + SemaRef.Diag((*I)->getLocation(), diag::note_decl_hiding_tag_type) + << Name << TagName; + } + return true; + } + + Result.clear(Sema::LookupOrdinaryName); + return false; +} + Sema::NameClassification Sema::ClassifyName(Scope *S, CXXScopeSpec &SS, IdentifierInfo *&Name, @@ -533,41 +625,9 @@ Corrected: // In C, we first see whether there is a tag type by the same name, in // which case it's likely that the user just forget to write "enum", // "struct", or "union". - if (!getLangOpts().CPlusPlus && !SecondTry) { - Result.clear(LookupTagName); - LookupParsedName(Result, S, &SS); - if (TagDecl *Tag = Result.getAsSingle()) { - const char *TagName = 0; - const char *FixItTagName = 0; - switch (Tag->getTagKind()) { - case TTK_Class: - TagName = "class"; - FixItTagName = "class "; - break; - - case TTK_Enum: - TagName = "enum"; - FixItTagName = "enum "; - break; - - case TTK_Struct: - TagName = "struct"; - FixItTagName = "struct "; - break; - - case TTK_Union: - TagName = "union"; - FixItTagName = "union "; - break; - } - - Diag(NameLoc, diag::err_use_of_tag_name_without_tag) - << Name << TagName << getLangOpts().CPlusPlus - << FixItHint::CreateInsertion(NameLoc, FixItTagName); - break; - } - - Result.clear(LookupOrdinaryName); + if (!getLangOpts().CPlusPlus && !SecondTry && + isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) { + break; } // Perform typo correction to determine if there is another name that is @@ -575,6 +635,19 @@ Corrected: if (!SecondTry) { SecondTry = true; CorrectionCandidateCallback DefaultValidator; + // Try to limit which sets of keywords should be included in typo + // correction based on what the next token is. + DefaultValidator.WantTypeSpecifiers = + NextToken.is(tok::l_paren) || NextToken.is(tok::less) || + NextToken.is(tok::identifier) || NextToken.is(tok::star) || + NextToken.is(tok::amp) || NextToken.is(tok::l_square); + DefaultValidator.WantExpressionKeywords = + NextToken.is(tok::l_paren) || NextToken.is(tok::identifier) || + NextToken.is(tok::arrow) || NextToken.is(tok::period); + DefaultValidator.WantRemainingKeywords = + NextToken.is(tok::l_paren) || NextToken.is(tok::semi) || + NextToken.is(tok::identifier) || NextToken.is(tok::l_brace); + DefaultValidator.WantCXXNamedCasts = false; if (TypoCorrection Corrected = CorrectTypo(Result.getLookupNameInfo(), Result.getLookupKind(), S, &SS, DefaultValidator)) { @@ -740,7 +813,7 @@ Corrected: if (TypeDecl *Type = dyn_cast(FirstDecl)) { DiagnoseUseOfDecl(Type, NameLoc); QualType T = Context.getTypeDeclType(Type); - return ParsedType::make(T); + return ParsedType::make(T); } ObjCInterfaceDecl *Class = dyn_cast(FirstDecl); @@ -764,6 +837,23 @@ Corrected: QualType T = Context.getObjCInterfaceType(Class); return ParsedType::make(T); } + + // Check for a tag type hidden by a non-type decl in a few cases where it + // seems likely a type is wanted instead of the non-type that was found. + if (!getLangOpts().ObjC1 && FirstDecl && !isa(FirstDecl) && + !isa(FirstDecl)) { + bool NextIsOp = NextToken.is(tok::amp) || NextToken.is(tok::star); + if ((NextToken.is(tok::identifier) || + (NextIsOp && FirstDecl->isFunctionOrFunctionTemplate())) && + isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) { + FirstDecl = (*Result.begin())->getUnderlyingDecl(); + if (TypeDecl *Type = dyn_cast(FirstDecl)) { + DiagnoseUseOfDecl(Type, NameLoc); + QualType T = Context.getTypeDeclType(Type); + return ParsedType::make(T); + } + } + } if (!Result.empty() && (*Result.begin())->isCXXClassMember()) return BuildPossibleImplicitMemberExpr(SS, SourceLocation(), Result, 0); @@ -1132,9 +1222,9 @@ void Sema::MarkUnusedFileScopedDecl(const DeclaratorDecl *D) { return; // First should already be in the vector. } - if (ShouldWarnIfUnusedFileScopedDecl(D)) - UnusedFileScopedDecls.push_back(D); - } + if (ShouldWarnIfUnusedFileScopedDecl(D)) + UnusedFileScopedDecls.push_back(D); +} static bool ShouldDiagnoseUnusedDecl(const NamedDecl *D) { if (D->isInvalidDecl()) @@ -1281,7 +1371,7 @@ void Sema::ActOnEndFunctionDeclarator() { /// /// \param IdLoc The location of the name in the translation unit. /// -/// \param TypoCorrection If true, this routine will attempt typo correction +/// \param DoTypoCorrection If true, this routine will attempt typo correction /// if there is no class with the given name. /// /// \returns The declaration of the named Objective-C class, or NULL if the @@ -1484,12 +1574,22 @@ void Sema::MergeTypedefNameDecl(TypedefNameDecl *New, LookupResult &OldDecls) { switch (TypeID->getLength()) { default: break; case 2: - if (!TypeID->isStr("id")) - break; - Context.setObjCIdRedefinitionType(New->getUnderlyingType()); - // Install the built-in type for 'id', ignoring the current definition. - New->setTypeForDecl(Context.getObjCIdType().getTypePtr()); - return; + { + if (!TypeID->isStr("id")) + break; + QualType T = New->getUnderlyingType(); + if (!T->isPointerType()) + break; + if (!T->isVoidPointerType()) { + QualType PT = T->getAs()->getPointeeType(); + if (!PT->isStructureType()) + break; + } + Context.setObjCIdRedefinitionType(T); + // Install the built-in type for 'id', ignoring the current definition. + New->setTypeForDecl(Context.getObjCIdType().getTypePtr()); + return; + } case 5: if (!TypeID->isStr("Class")) break; @@ -1599,6 +1699,13 @@ void Sema::MergeTypedefNameDecl(TypedefNameDecl *New, LookupResult &OldDecls) { /// attribute. static bool DeclHasAttr(const Decl *D, const Attr *A) { + // There can be multiple AvailabilityAttr in a Decl. Make sure we copy + // all of them. It is mergeAvailabilityAttr in SemaDeclAttr.cpp that is + // responsible for making sure they are consistent. + const AvailabilityAttr *AA = dyn_cast(A); + if (AA) + return false; + const OwnershipAttr *OA = dyn_cast(A); const AnnotateAttr *Ann = dyn_cast(A); for (Decl::attr_iterator i = D->attr_begin(), e = D->attr_end(); i != e; ++i) @@ -1617,9 +1724,90 @@ DeclHasAttr(const Decl *D, const Attr *A) { return false; } +bool Sema::mergeDeclAttribute(Decl *D, InheritableAttr *Attr) { + InheritableAttr *NewAttr = NULL; + if (AvailabilityAttr *AA = dyn_cast(Attr)) + NewAttr = mergeAvailabilityAttr(D, AA->getRange(), AA->getPlatform(), + AA->getIntroduced(), AA->getDeprecated(), + AA->getObsoleted(), AA->getUnavailable(), + AA->getMessage()); + else if (VisibilityAttr *VA = dyn_cast(Attr)) + NewAttr = mergeVisibilityAttr(D, VA->getRange(), VA->getVisibility()); + else if (DLLImportAttr *ImportA = dyn_cast(Attr)) + NewAttr = mergeDLLImportAttr(D, ImportA->getRange()); + else if (DLLExportAttr *ExportA = dyn_cast(Attr)) + NewAttr = mergeDLLExportAttr(D, ExportA->getRange()); + else if (FormatAttr *FA = dyn_cast(Attr)) + NewAttr = mergeFormatAttr(D, FA->getRange(), FA->getType(), + FA->getFormatIdx(), FA->getFirstArg()); + else if (SectionAttr *SA = dyn_cast(Attr)) + NewAttr = mergeSectionAttr(D, SA->getRange(), SA->getName()); + else if (!DeclHasAttr(D, Attr)) + NewAttr = cast(Attr->clone(Context)); + + if (NewAttr) { + NewAttr->setInherited(true); + D->addAttr(NewAttr); + return true; + } + + return false; +} + +static const Decl *getDefinition(const Decl *D) { + if (const TagDecl *TD = dyn_cast(D)) + return TD->getDefinition(); + if (const VarDecl *VD = dyn_cast(D)) + return VD->getDefinition(); + if (const FunctionDecl *FD = dyn_cast(D)) { + const FunctionDecl* Def; + if (FD->hasBody(Def)) + return Def; + } + return NULL; +} + +static bool hasAttribute(const Decl *D, attr::Kind Kind) { + for (Decl::attr_iterator I = D->attr_begin(), E = D->attr_end(); + I != E; ++I) { + Attr *Attribute = *I; + if (Attribute->getKind() == Kind) + return true; + } + return false; +} + +/// checkNewAttributesAfterDef - If we already have a definition, check that +/// there are no new attributes in this declaration. +static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) { + if (!New->hasAttrs()) + return; + + const Decl *Def = getDefinition(Old); + if (!Def || Def == New) + return; + + AttrVec &NewAttributes = New->getAttrs(); + for (unsigned I = 0, E = NewAttributes.size(); I != E;) { + const Attr *NewAttribute = NewAttributes[I]; + if (hasAttribute(Def, NewAttribute->getKind())) { + ++I; + continue; // regular attr merging will take care of validating this. + } + S.Diag(NewAttribute->getLocation(), + diag::warn_attribute_precede_definition); + S.Diag(Def->getLocation(), diag::note_previous_definition); + NewAttributes.erase(NewAttributes.begin() + I); + --E; + } +} + /// mergeDeclAttributes - Copy attributes from the Old decl to the New one. void Sema::mergeDeclAttributes(Decl *New, Decl *Old, bool MergeDeprecation) { + // attributes declared post-definition are currently ignored + checkNewAttributesAfterDef(*this, New, Old); + if (!Old->hasAttrs()) return; @@ -1640,12 +1828,8 @@ void Sema::mergeDeclAttributes(Decl *New, Decl *Old, isa(*i))) continue; - if (!DeclHasAttr(New, *i)) { - InheritableAttr *newAttr = cast((*i)->clone(Context)); - newAttr->setInherited(true); - New->addAttr(newAttr); + if (mergeDeclAttribute(New, *i)) foundAny = true; - } } if (!foundAny) New->dropAttrs(); @@ -1909,22 +2093,27 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, Decl *OldD, Scope *S) { Diag(Old->getLocation(), PrevDiag) << Old << Old->getType(); return true; } - + // C++ [class.mem]p1: // [...] A member shall not be declared twice in the // member-specification, except that a nested class or member // class template can be declared and then later defined. - unsigned NewDiag; - if (isa(OldMethod)) - NewDiag = diag::err_constructor_redeclared; - else if (isa(NewMethod)) - NewDiag = diag::err_destructor_redeclared; - else if (isa(NewMethod)) - NewDiag = diag::err_conv_function_redeclared; - else - NewDiag = diag::err_member_redeclared; + if (ActiveTemplateInstantiations.empty()) { + unsigned NewDiag; + if (isa(OldMethod)) + NewDiag = diag::err_constructor_redeclared; + else if (isa(NewMethod)) + NewDiag = diag::err_destructor_redeclared; + else if (isa(NewMethod)) + NewDiag = diag::err_conv_function_redeclared; + else + NewDiag = diag::err_member_redeclared; - Diag(New->getLocation(), NewDiag); + Diag(New->getLocation(), NewDiag); + } else { + Diag(New->getLocation(), diag::err_member_redeclared_in_instantiation) + << New << New->getType(); + } Diag(Old->getLocation(), PrevDiag) << Old << Old->getType(); // Complain if this is an explicit declaration of a special @@ -1941,7 +2130,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, Decl *OldD, Scope *S) { << New << getSpecialMember(OldMethod); return true; } - } else if (OldMethod->isExplicitlyDefaulted()) { + } else if (OldMethod->isExplicitlyDefaulted() && !isFriend) { Diag(NewMethod->getLocation(), diag::err_definition_of_explicitly_defaulted_member) << getSpecialMember(OldMethod); @@ -2142,18 +2331,16 @@ bool Sema::MergeCompatibleFunctionDecls(FunctionDecl *New, FunctionDecl *Old, void Sema::mergeObjCMethodDecls(ObjCMethodDecl *newMethod, ObjCMethodDecl *oldMethod) { - // We don't want to merge unavailable and deprecated attributes - // except from interface to implementation. - bool mergeDeprecation = isa(newMethod->getDeclContext()); - // Merge the attributes. - mergeDeclAttributes(newMethod, oldMethod, mergeDeprecation); + // Merge the attributes, including deprecated/unavailable + mergeDeclAttributes(newMethod, oldMethod, /* mergeDeprecation */true); // Merge attributes from the parameters. - ObjCMethodDecl::param_const_iterator oi = oldMethod->param_begin(); + ObjCMethodDecl::param_const_iterator oi = oldMethod->param_begin(), + oe = oldMethod->param_end(); for (ObjCMethodDecl::param_iterator ni = newMethod->param_begin(), ne = newMethod->param_end(); - ni != ne; ++ni, ++oi) + ni != ne && oi != oe; ++ni, ++oi) mergeParamDeclAttributes(*ni, *oi, Context); CheckObjCMethodOverride(newMethod, oldMethod, true); @@ -2552,6 +2739,8 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS, } } + ActOnDocumentableDecl(TagD); + return TagD; } @@ -2873,7 +3062,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS, Context.getTypeDeclType(Record), TInfo, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + /*InitStyle=*/ICIS_NoInit); Anon->setAccess(AS); if (getLangOpts().CPlusPlus) FieldCollector->Add(cast(Anon)); @@ -2970,7 +3159,7 @@ Decl *Sema::BuildMicrosoftCAnonymousStruct(Scope *S, DeclSpec &DS, Context.getTypeDeclType(Record), TInfo, /*BitWidth=*/0, /*Mutable=*/false, - /*HasInit=*/false); + /*InitStyle=*/ICIS_NoInit); Anon->setImplicit(); // Add the anonymous struct object to the current context. @@ -3225,7 +3414,7 @@ Decl *Sema::ActOnDeclarator(Scope *S, Declarator &D) { Decl *Dcl = HandleDeclarator(S, D, MultiTemplateParamsArg(*this)); if (OriginalLexicalContext && OriginalLexicalContext->isObjCContainer() && - Dcl->getDeclContext()->isFileContext()) + Dcl && Dcl->getDeclContext()->isFileContext()) Dcl->setTopLevelDeclInObjCContainer(); return Dcl; @@ -3794,8 +3983,6 @@ Sema::ActOnTypedefNameDecl(Scope *S, DeclContext *DC, TypedefNameDecl *NewTD, Context.setsigjmp_bufDecl(NewTD); else if (II->isStr("ucontext_t")) Context.setucontext_tDecl(NewTD); - else if (II->isStr("__builtin_va_list")) - Context.setBuiltinVaListType(Context.getTypedefType(NewTD)); } return NewTD; @@ -4173,18 +4360,6 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC, CheckMemberSpecialization(NewVD, Previous)) NewVD->setInvalidDecl(); } - - // attributes declared post-definition are currently ignored - // FIXME: This should be handled in attribute merging, not - // here. - if (Previous.isSingleResult()) { - VarDecl *Def = dyn_cast(Previous.getFoundDecl()); - if (Def && (Def = Def->getDefinition()) && - Def != NewVD && D.hasAttributes()) { - Diag(NewVD->getLocation(), diag::warn_attribute_precede_definition); - Diag(Def->getLocation(), diag::note_previous_definition); - } - } // If this is a locally-scoped extern C variable, update the map of // such variables. @@ -4334,6 +4509,15 @@ bool Sema::CheckVariableDeclaration(VarDecl *NewVD, return false; } + // OpenCL v1.2 s6.8 -- The static qualifier is valid only in program + // scope. + if ((getLangOpts().OpenCLVersion >= 120) + && NewVD->isStaticLocal()) { + Diag(NewVD->getLocation(), diag::err_static_function_scope); + NewVD->setInvalidDecl(); + return false; + } + if (NewVD->hasLocalStorage() && T.isObjCGCWeak() && !NewVD->hasAttr()) { if (getLangOpts().getGC() != LangOptions::NonGC) @@ -4418,7 +4602,7 @@ bool Sema::CheckVariableDeclaration(VarDecl *NewVD, if (NewVD->isConstexpr() && !T->isDependentType() && RequireLiteralType(NewVD->getLocation(), T, - PDiag(diag::err_constexpr_var_non_literal))) { + diag::err_constexpr_var_non_literal)) { NewVD->setInvalidDecl(); return false; } @@ -4471,11 +4655,6 @@ static bool FindOverriddenMethod(const CXXBaseSpecifier *Specifier, return false; } -static bool hasDelayedExceptionSpec(CXXMethodDecl *Method) { - const FunctionProtoType *Proto =Method->getType()->getAs(); - return Proto && Proto->getExceptionSpecType() == EST_Delayed; -} - /// AddOverriddenMethods - See if a method overrides any in the base classes, /// and if so, check that it's a valid override and remember it. bool Sema::AddOverriddenMethods(CXXRecordDecl *DC, CXXMethodDecl *MD) { @@ -4491,8 +4670,7 @@ bool Sema::AddOverriddenMethods(CXXRecordDecl *DC, CXXMethodDecl *MD) { if (CXXMethodDecl *OldMD = dyn_cast(*I)) { MD->addOverriddenMethod(OldMD->getCanonicalDecl()); if (!CheckOverridingFunctionReturnType(MD, OldMD) && - (hasDelayedExceptionSpec(MD) || - !CheckOverridingFunctionExceptionSpec(MD, OldMD)) && + !CheckOverridingFunctionExceptionSpec(MD, OldMD) && !CheckIfOverriddenFunctionIsMarkedFinal(MD, OldMD)) { AddedAny = true; } @@ -4520,22 +4698,39 @@ namespace { // Also only accept corrections that have the same parent decl. class DifferentNameValidatorCCC : public CorrectionCandidateCallback { public: - DifferentNameValidatorCCC(CXXRecordDecl *Parent) - : ExpectedParent(Parent ? Parent->getCanonicalDecl() : 0) {} + DifferentNameValidatorCCC(ASTContext &Context, FunctionDecl *TypoFD, + CXXRecordDecl *Parent) + : Context(Context), OriginalFD(TypoFD), + ExpectedParent(Parent ? Parent->getCanonicalDecl() : 0) {} virtual bool ValidateCandidate(const TypoCorrection &candidate) { if (candidate.getEditDistance() == 0) return false; - if (CXXMethodDecl *MD = candidate.getCorrectionDeclAs()) { - CXXRecordDecl *Parent = MD->getParent(); - return Parent && Parent->getCanonicalDecl() == ExpectedParent; + llvm::SmallVector MismatchedParams; + for (TypoCorrection::const_decl_iterator CDecl = candidate.begin(), + CDeclEnd = candidate.end(); + CDecl != CDeclEnd; ++CDecl) { + FunctionDecl *FD = dyn_cast(*CDecl); + + if (FD && !FD->hasBody() && + hasSimilarParameters(Context, FD, OriginalFD, MismatchedParams)) { + if (CXXMethodDecl *MD = dyn_cast(FD)) { + CXXRecordDecl *Parent = MD->getParent(); + if (Parent && Parent->getCanonicalDecl() == ExpectedParent) + return true; + } else if (!ExpectedParent) { + return true; + } + } } - return !ExpectedParent; + return false; } private: + ASTContext &Context; + FunctionDecl *OriginalFD; CXXRecordDecl *ExpectedParent; }; @@ -4571,7 +4766,8 @@ static NamedDecl* DiagnoseInvalidRedeclaration( assert(!Prev.isAmbiguous() && "Cannot have an ambiguity in previous-declaration lookup"); CXXMethodDecl *MD = dyn_cast(NewFD); - DifferentNameValidatorCCC Validator(MD ? MD->getParent() : 0); + DifferentNameValidatorCCC Validator(SemaRef.Context, NewFD, + MD ? MD->getParent() : 0); if (!Prev.empty()) { for (LookupResult::iterator Func = Prev.begin(), FuncEnd = Prev.end(); Func != FuncEnd; ++Func) { @@ -4601,8 +4797,8 @@ static NamedDecl* DiagnoseInvalidRedeclaration( CDeclEnd = Correction.end(); CDecl != CDeclEnd; ++CDecl) { FunctionDecl *FD = dyn_cast(*CDecl); - if (FD && hasSimilarParameters(SemaRef.Context, FD, NewFD, - MismatchedParams)) { + if (FD && !FD->hasBody() && + hasSimilarParameters(SemaRef.Context, FD, NewFD, MismatchedParams)) { Previous.addDecl(FD); } } @@ -4640,19 +4836,23 @@ static NamedDecl* DiagnoseInvalidRedeclaration( } } - if (Correction) - SemaRef.Diag(NewFD->getLocation(), DiagMsg) + if (Correction) { + SourceRange FixItLoc(NewFD->getLocation()); + CXXScopeSpec &SS = ExtraArgs.D.getCXXScopeSpec(); + if (Correction.getCorrectionSpecifier() && SS.isValid()) + FixItLoc.setBegin(SS.getBeginLoc()); + SemaRef.Diag(NewFD->getLocStart(), DiagMsg) << Name << NewDC << Correction.getQuoted(SemaRef.getLangOpts()) << FixItHint::CreateReplacement( - NewFD->getLocation(), - Correction.getAsString(SemaRef.getLangOpts())); - else + FixItLoc, Correction.getAsString(SemaRef.getLangOpts())); + } else { SemaRef.Diag(NewFD->getLocation(), DiagMsg) << Name << NewDC << NewFD->getLocation(); + } bool NewFDisConst = false; if (CXXMethodDecl *NewMD = dyn_cast(NewFD)) - NewFDisConst = NewMD->getTypeQualifiers() & Qualifiers::Const; + NewFDisConst = NewMD->isConst(); for (llvm::SmallVector, 1>::iterator NearMatch = NearMatches.begin(), NearMatchEnd = NearMatches.end(); @@ -4660,7 +4860,7 @@ static NamedDecl* DiagnoseInvalidRedeclaration( FunctionDecl *FD = NearMatch->first; bool FDisConst = false; if (CXXMethodDecl *MD = dyn_cast(FD)) - FDisConst = MD->getTypeQualifiers() & Qualifiers::Const; + FDisConst = MD->isConst(); if (unsigned Idx = NearMatch->second) { ParmVarDecl *FDParam = FD->getParamDecl(Idx-1); @@ -4911,7 +5111,11 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, FunctionTemplateDecl *FunctionTemplate = 0; bool isExplicitSpecialization = false; bool isFunctionTemplateSpecialization = false; + bool isDependentClassScopeExplicitSpecialization = false; + bool HasExplicitTemplateArgs = false; + TemplateArgumentListInfo TemplateArgs; + bool isVirtualOkay = false; FunctionDecl *NewFD = CreateNewFunctionDecl(*this, D, DC, R, TInfo, SC, @@ -5041,56 +5245,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, FunctionTemplate->setInvalidDecl(); } - // If we see "T var();" at block scope, where T is a class type, it is - // probably an attempt to initialize a variable, not a function declaration. - // We don't catch this case earlier, since there is no ambiguity here. - if (!FunctionTemplate && D.getFunctionDefinitionKind() == FDK_Declaration && - CurContext->isFunctionOrMethod() && - D.getNumTypeObjects() == 1 && D.isFunctionDeclarator() && - D.getDeclSpec().getStorageClassSpecAsWritten() - == DeclSpec::SCS_unspecified) { - QualType T = R->getAs()->getResultType(); - DeclaratorChunk &C = D.getTypeObject(0); - if (!T->isVoidType() && C.Fun.NumArgs == 0 && !C.Fun.isVariadic && - !C.Fun.TrailingReturnType && - C.Fun.getExceptionSpecType() == EST_None) { - SourceRange ParenRange(C.Loc, C.EndLoc); - Diag(C.Loc, diag::warn_empty_parens_are_function_decl) << ParenRange; - - // If the declaration looks like: - // T var1, - // f(); - // and name lookup finds a function named 'f', then the ',' was - // probably intended to be a ';'. - if (!D.isFirstDeclarator() && D.getIdentifier()) { - FullSourceLoc Comma(D.getCommaLoc(), SourceMgr); - FullSourceLoc Name(D.getIdentifierLoc(), SourceMgr); - if (Comma.getFileID() != Name.getFileID() || - Comma.getSpellingLineNumber() != Name.getSpellingLineNumber()) { - LookupResult Result(*this, D.getIdentifier(), SourceLocation(), - LookupOrdinaryName); - if (LookupName(Result, S)) - Diag(D.getCommaLoc(), diag::note_empty_parens_function_call) - << FixItHint::CreateReplacement(D.getCommaLoc(), ";") << NewFD; - } - } - const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); - // Empty parens mean value-initialization, and no parens mean default - // initialization. These are equivalent if the default constructor is - // user-provided, or if zero-initialization is a no-op. - if (RD && RD->hasDefinition() && - (RD->isEmpty() || RD->hasUserProvidedDefaultConstructor())) - Diag(C.Loc, diag::note_empty_parens_default_ctor) - << FixItHint::CreateRemoval(ParenRange); - else if (const char *Init = getFixItZeroInitializerForType(T)) - Diag(C.Loc, diag::note_empty_parens_zero_initialize) - << FixItHint::CreateReplacement(ParenRange, Init); - else if (LangOpts.CPlusPlus0x) - Diag(C.Loc, diag::note_empty_parens_zero_initialize) - << FixItHint::CreateReplacement(ParenRange, "{}"); - } - } - // C++ [dcl.fct.spec]p5: // The virtual specifier shall only be used in declarations of // nonstatic class member functions that appear within a @@ -5333,6 +5487,10 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, NewFD->setInvalidDecl(); } + // Handle attributes. + ProcessDeclAttributes(S, NewFD, D, + /*NonInheritable=*/false, /*Inheritable=*/true); + if (!getLangOpts().CPlusPlus) { // Perform semantic checking on the function declaration. bool isExplicitSpecialization=false; @@ -5348,8 +5506,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } else { // If the declarator is a template-id, translate the parser's template // argument list into our AST format. - bool HasExplicitTemplateArgs = false; - TemplateArgumentListInfo TemplateArgs; if (D.getName().getKind() == UnqualifiedId::IK_TemplateId) { TemplateIdAnnotation *TemplateId = D.getName().TemplateId; TemplateArgs.setLAngleLoc(TemplateId->LAngleLoc); @@ -5580,25 +5736,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << D.getCXXScopeSpec().getRange(); } } - - - // Handle attributes. We need to have merged decls when handling attributes - // (for example to check for conflicts, etc). - // FIXME: This needs to happen before we merge declarations. Then, - // let attribute merging cope with attribute conflicts. - ProcessDeclAttributes(S, NewFD, D, - /*NonInheritable=*/false, /*Inheritable=*/true); - - // attributes declared post-definition are currently ignored - // FIXME: This should happen during attribute merging - if (D.isRedeclaration() && Previous.isSingleResult()) { - const FunctionDecl *Def; - FunctionDecl *PrevFD = dyn_cast(Previous.getFoundDecl()); - if (PrevFD && PrevFD->isDefined(Def) && D.hasAttributes()) { - Diag(NewFD->getLocation(), diag::warn_attribute_precede_definition); - Diag(Def->getLocation(), diag::note_previous_definition); - } - } AddKnownFunctionAttributes(NewFD); @@ -5644,6 +5781,14 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } } + // OpenCL v1.2 s6.8 static is invalid for kernel functions. + if ((getLangOpts().OpenCLVersion >= 120) + && NewFD->hasAttr() + && (SC == SC_Static)) { + Diag(D.getIdentifierLoc(), diag::err_static_kernel); + D.setInvalidType(); + } + MarkUnusedFileScopedDecl(NewFD); if (getLangOpts().CUDA) @@ -5664,8 +5809,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, if (isDependentClassScopeExplicitSpecialization) { ClassScopeFunctionSpecializationDecl *NewSpec = ClassScopeFunctionSpecializationDecl::Create( - Context, CurContext, SourceLocation(), - cast(NewFD)); + Context, CurContext, SourceLocation(), + cast(NewFD), + HasExplicitTemplateArgs, TemplateArgs); CurContext->addDecl(NewSpec); AddToScope = false; } @@ -5683,12 +5829,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, /// that have been instantiated via C++ template instantiation (called /// via InstantiateDecl). /// -/// \param IsExplicitSpecialiation whether this new function declaration is +/// \param IsExplicitSpecialization whether this new function declaration is /// an explicit specialization of the previous declaration. /// /// This sets NewFD->isInvalidDecl() to true if there was an error. /// -/// Returns true if the function declaration is a redeclaration. +/// \returns true if the function declaration is a redeclaration. bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, LookupResult &Previous, bool IsExplicitSpecialization) { @@ -5882,10 +6028,12 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, // compatible, and if it does, warn the user. if (NewFD->isExternC()) { QualType R = NewFD->getResultType(); - if (!R.isPODType(Context) && - !R->isVoidType()) - Diag( NewFD->getLocation(), diag::warn_return_value_udt ) - << NewFD << R; + if (R->isIncompleteType() && !R->isVoidType()) + Diag(NewFD->getLocation(), diag::warn_return_value_udt_incomplete) + << NewFD << R; + else if (!R.isPODType(Context) && !R->isVoidType() && + !R->isObjCObjectPointerType()) + Diag(NewFD->getLocation(), diag::warn_return_value_udt) << NewFD << R; } } return Redeclaration; @@ -6040,55 +6188,73 @@ namespace { } } - void VisitExpr(Expr *E) { - if (isa(*E)) return; - if (isRecordType) { - Expr *expr = E; - if (MemberExpr *ME = dyn_cast(E)) { - ValueDecl *VD = ME->getMemberDecl(); - if (isa(VD) || isa(VD)) return; - expr = ME->getBase(); - } - if (DeclRefExpr *DRE = dyn_cast(expr)) { + // Sometimes, the expression passed in lacks the casts that are used + // to determine which DeclRefExpr's to check. Assume that the casts + // are present and continue visiting the expression. + void HandleExpr(Expr *E) { + // Skip checking T a = a where T is not a record type. Doing so is a + // way to silence uninitialized warnings. + if (isRecordType) + if (DeclRefExpr *DRE = dyn_cast(E)) HandleDeclRefExpr(DRE); - return; - } + + if (ConditionalOperator *CO = dyn_cast(E)) { + HandleValue(CO->getTrueExpr()); + HandleValue(CO->getFalseExpr()); + } + + Visit(E); + } + + // For most expressions, the cast is directly above the DeclRefExpr. + // For conditional operators, the cast can be outside the conditional + // operator if both expressions are DeclRefExpr's. + void HandleValue(Expr *E) { + E = E->IgnoreParenImpCasts(); + if (DeclRefExpr* DRE = dyn_cast(E)) { + HandleDeclRefExpr(DRE); + return; + } + + if (ConditionalOperator *CO = dyn_cast(E)) { + HandleValue(CO->getTrueExpr()); + HandleValue(CO->getFalseExpr()); } - Inherited::VisitExpr(E); + } + + void VisitImplicitCastExpr(ImplicitCastExpr *E) { + if ((!isRecordType && E->getCastKind() == CK_LValueToRValue) || + (isRecordType && E->getCastKind() == CK_NoOp)) + HandleValue(E->getSubExpr()); + + Inherited::VisitImplicitCastExpr(E); } void VisitMemberExpr(MemberExpr *E) { + // Don't warn on arrays since they can be treated as pointers. if (E->getType()->canDecayToPointerType()) return; + ValueDecl *VD = E->getMemberDecl(); - if (isa(VD) || isa(VD)) + CXXMethodDecl *MD = dyn_cast(VD); + if (isa(VD) || (MD && !MD->isStatic())) if (DeclRefExpr *DRE = dyn_cast(E->getBase()->IgnoreParenImpCasts())) { HandleDeclRefExpr(DRE); return; } - Inherited::VisitMemberExpr(E); - } - void VisitImplicitCastExpr(ImplicitCastExpr *E) { - if ((!isRecordType &&E->getCastKind() == CK_LValueToRValue) || - (isRecordType && E->getCastKind() == CK_NoOp)) { - Expr* SubExpr = E->getSubExpr()->IgnoreParenImpCasts(); - if (MemberExpr *ME = dyn_cast(SubExpr)) - SubExpr = ME->getBase()->IgnoreParenImpCasts(); - if (DeclRefExpr *DRE = dyn_cast(SubExpr)) { - HandleDeclRefExpr(DRE); - return; - } - } - Inherited::VisitImplicitCastExpr(E); + Inherited::VisitMemberExpr(E); } void VisitUnaryOperator(UnaryOperator *E) { // For POD record types, addresses of its own members are well-defined. - if (isRecordType && isPODType) return; + if (E->getOpcode() == UO_AddrOf && isRecordType && isPODType && + isa(E->getSubExpr()->IgnoreParens())) return; Inherited::VisitUnaryOperator(E); } - + + void VisitObjCMessageExpr(ObjCMessageExpr *E) { return; } + void HandleDeclRefExpr(DeclRefExpr *DRE) { Decl* ReferenceDecl = DRE->getDecl(); if (OrigDecl != ReferenceDecl) return; @@ -6105,7 +6271,7 @@ namespace { /// CheckSelfReference - Warns if OrigDecl is used in expression E. void Sema::CheckSelfReference(Decl* OrigDecl, Expr *E) { - SelfReferenceChecker(*this, OrigDecl).VisitExpr(E); + SelfReferenceChecker(*this, OrigDecl).HandleExpr(E); } /// AddInitializerToDecl - Adds the initializer Init to the @@ -6145,13 +6311,19 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, // Check for self-references within variable initializers. // Variables declared within a function/method body are handled // by a dataflow analysis. - if (!VDecl->hasLocalStorage() && !VDecl->isStaticLocal()) + // Record types initialized by initializer list are handled here. + // Initialization by constructors are handled in TryConstructorInitialization. + if (!VDecl->hasLocalStorage() && !VDecl->isStaticLocal() && + (isa(Init) || !VDecl->getType()->isRecordType())) CheckSelfReference(RealDecl, Init); ParenListExpr *CXXDirectInit = dyn_cast(Init); // C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for. - if (TypeMayContainAuto && VDecl->getType()->getContainedAutoType()) { + AutoType *Auto = 0; + if (TypeMayContainAuto && + (Auto = VDecl->getType()->getContainedAutoType()) && + !Auto->isDeduced()) { Expr *DeduceInit = Init; // Initializer could be a C++ direct-initializer. Deduction only works if it // contains exactly one expression. @@ -6192,6 +6364,17 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl)) VDecl->setInvalidDecl(); + // Warn if we deduced 'id'. 'auto' usually implies type-safety, but using + // 'id' instead of a specific object type prevents most of our usual checks. + // We only want to warn outside of template instantiations, though: + // inside a template, the 'id' could have come from a parameter. + if (ActiveTemplateInstantiations.empty() && + DeducedType->getType()->isObjCIdType()) { + SourceLocation Loc = DeducedType->getTypeLoc().getBeginLoc(); + Diag(Loc, diag::warn_auto_var_is_id) + << VDecl->getDeclName() << DeduceInit->getSourceRange(); + } + // If this is a redeclaration, check that the type we just deduced matches // the previously declared type. if (VarDecl *Old = VDecl->getPreviousDecl()) @@ -6906,9 +7089,55 @@ Sema::BuildDeclaratorGroup(Decl **Group, unsigned NumDecls, } } + ActOnDocumentableDecls(Group, NumDecls); + return DeclGroupPtrTy::make(DeclGroupRef::Create(Context, Group, NumDecls)); } +void Sema::ActOnDocumentableDecl(Decl *D) { + ActOnDocumentableDecls(&D, 1); +} + +void Sema::ActOnDocumentableDecls(Decl **Group, unsigned NumDecls) { + // Don't parse the comment if Doxygen diagnostics are ignored. + if (NumDecls == 0 || !Group[0]) + return; + + if (Diags.getDiagnosticLevel(diag::warn_doc_param_not_found, + Group[0]->getLocation()) + == DiagnosticsEngine::Ignored) + return; + + if (NumDecls >= 2) { + // This is a decl group. Normally it will contain only declarations + // procuded from declarator list. But in case we have any definitions or + // additional declaration references: + // 'typedef struct S {} S;' + // 'typedef struct S *S;' + // 'struct S *pS;' + // FinalizeDeclaratorGroup adds these as separate declarations. + Decl *MaybeTagDecl = Group[0]; + if (MaybeTagDecl && isa(MaybeTagDecl)) { + Group++; + NumDecls--; + } + } + + // See if there are any new comments that are not attached to a decl. + ArrayRef Comments = Context.getRawCommentList().getComments(); + if (!Comments.empty() && + !Comments.back()->isAttached()) { + // There is at least one comment that not attached to a decl. + // Maybe it should be attached to one of these decls? + // + // Note that this way we pick up not only comments that precede the + // declaration, but also comments that *follow* the declaration -- thanks to + // the lookahead in the lexer: we've consumed the semicolon and looked + // ahead through comments. + for (unsigned i = 0; i != NumDecls; ++i) + Context.getCommentForDecl(Group[i]); + } +} /// ActOnParamDeclarator - Called from Parser::ParseFunctionDeclarator() /// to introduce parameters into function prototype scope. @@ -7132,9 +7361,10 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc, // Parameter declarators cannot be interface types. All ObjC objects are // passed by reference. if (T->isObjCObjectType()) { + SourceLocation TypeEndLoc = TSInfo->getTypeLoc().getLocEnd(); Diag(NameLoc, diag::err_object_cannot_be_passed_returned_by_value) << 1 << T - << FixItHint::CreateInsertion(NameLoc, "*"); + << FixItHint::CreateInsertion(TypeEndLoc, "*"); T = Context.getObjCObjectPointerType(T); New->setType(T); } @@ -7225,6 +7455,10 @@ static bool ShouldWarnAboutMissingPrototype(const FunctionDecl *FD) { if (FD->isFunctionTemplateSpecialization()) return false; + // Don't warn for OpenCL kernels. + if (FD->hasAttr()) + return false; + bool MissingPrototype = true; for (const FunctionDecl *Prev = FD->getPreviousDecl(); Prev; Prev = Prev->getPreviousDecl()) { @@ -7253,6 +7487,7 @@ void Sema::CheckForFunctionRedefinition(FunctionDecl *FD) { else Diag(FD->getLocation(), diag::err_redefinition) << FD->getDeclName(); Diag(Definition->getLocation(), diag::note_previous_definition); + FD->setInvalidDecl(); } } @@ -7388,6 +7623,7 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) { << FD->getName() << "dllimport"; } } + ActOnDocumentableDecl(FD); return FD; } @@ -7463,7 +7699,12 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, if (CXXConstructorDecl *Constructor = dyn_cast(FD)) MarkVTableUsed(FD->getLocation(), Constructor->getParent()); - computeNRVO(Body, getCurFunction()); + // Try to apply the named return value optimization. We have to check + // if we can do this here because lambdas keep return statements around + // to deduce an implicit return type. + if (getLangOpts().CPlusPlus && FD->getResultType()->isRecordType() && + !FD->isDependentContext()) + computeNRVO(Body, getCurFunction()); } assert((FD == getCurFunctionDecl() || getCurLambda()->CallOperator == FD) && @@ -7471,8 +7712,6 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, } else if (ObjCMethodDecl *MD = dyn_cast_or_null(dcl)) { assert(MD == getCurMethodDecl() && "Method parsing confused"); MD->setBody(Body); - if (Body) - MD->setEndLoc(Body->getLocEnd()); if (!MD->isInvalidDecl()) { DiagnoseUnusedParameters(MD->param_begin(), MD->param_end()); DiagnoseSizeOfParametersAndReturnValue(MD->param_begin(), MD->param_end(), @@ -7481,22 +7720,24 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, if (Body) computeNRVO(Body, getCurFunction()); } - if (ObjCShouldCallSuperDealloc) { + if (getCurFunction()->ObjCShouldCallSuperDealloc) { Diag(MD->getLocEnd(), diag::warn_objc_missing_super_dealloc); - ObjCShouldCallSuperDealloc = false; + getCurFunction()->ObjCShouldCallSuperDealloc = false; } - if (ObjCShouldCallSuperFinalize) { + if (getCurFunction()->ObjCShouldCallSuperFinalize) { Diag(MD->getLocEnd(), diag::warn_objc_missing_super_finalize); - ObjCShouldCallSuperFinalize = false; + getCurFunction()->ObjCShouldCallSuperFinalize = false; } } else { return 0; } - assert(!ObjCShouldCallSuperDealloc && "This should only be set for " - "ObjC methods, which should have been handled in the block above."); - assert(!ObjCShouldCallSuperFinalize && "This should only be set for " - "ObjC methods, which should have been handled in the block above."); + assert(!getCurFunction()->ObjCShouldCallSuperDealloc && + "This should only be set for ObjC methods, which should have been " + "handled in the block above."); + assert(!getCurFunction()->ObjCShouldCallSuperFinalize && + "This should only be set for ObjC methods, which should have been " + "handled in the block above."); // Verify and clean out per-function state. if (Body) { @@ -7630,10 +7871,10 @@ NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc, (void)Error; // Silence warning. assert(!Error && "Error setting up implicit decl!"); Declarator D(DS, Declarator::BlockContext); - D.AddTypeInfo(DeclaratorChunk::getFunction(false, false, SourceLocation(), 0, - 0, 0, true, SourceLocation(), + D.AddTypeInfo(DeclaratorChunk::getFunction(false, false, false, + SourceLocation(), 0, 0, 0, true, + SourceLocation(), SourceLocation(), SourceLocation(), SourceLocation(), - SourceLocation(), EST_None, SourceLocation(), 0, 0, 0, 0, Loc, Loc, D), DS.getAttributes(), @@ -7733,6 +7974,13 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) { "printf", 2, Name->isStr("vasprintf") ? 0 : 3)); } + + if (Name->isStr("__CFStringMakeConstantString")) { + // We already have a __builtin___CFStringMakeConstantString, + // but builds that use -fno-constant-cfstrings don't go through that. + if (!FD->getAttr()) + FD->addAttr(::new (Context) FormatArgAttr(FD->getLocation(), Context, 1)); + } } TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, QualType T, @@ -8566,9 +8814,10 @@ CreateNewDecl: // many points during the parsing of a struct declaration (because // the #pragma tokens are effectively skipped over during the // parsing of the struct). - AddAlignmentAttributesForRecord(RD); - - AddMsStructLayoutForRecord(RD); + if (TUK == TUK_Definition) { + AddAlignmentAttributesForRecord(RD); + AddMsStructLayoutForRecord(RD); + } } if (ModulePrivateLoc.isValid()) { @@ -8653,6 +8902,13 @@ CreateNewDecl: InFunctionDeclarator && Name) DeclsInPrototypeScope.push_back(New); + if (PrevDecl) + mergeDeclAttributes(New, PrevDecl); + + // If there's a #pragma GCC visibility in scope, set the visibility of this + // record. + AddPushedVisibilityAttribute(New); + OwnedDecl = true; return New; } @@ -8663,6 +8919,12 @@ void Sema::ActOnTagStartDefinition(Scope *S, Decl *TagD) { // Enter the tag context. PushDeclContext(S, Tag); + + ActOnDocumentableDecl(TagD); + + // If there's a #pragma GCC visibility in scope, set the visibility of this + // record. + AddPushedVisibilityAttribute(Tag); } Decl *Sema::ActOnObjCContainerStartDefinition(Decl *IDecl) { @@ -8849,7 +9111,7 @@ Decl *Sema::ActOnField(Scope *S, Decl *TagD, SourceLocation DeclStart, Declarator &D, Expr *BitfieldWidth) { FieldDecl *Res = HandleField(S, cast_or_null(TagD), DeclStart, D, static_cast(BitfieldWidth), - /*HasInit=*/false, AS_public); + /*InitStyle=*/ICIS_NoInit, AS_public); return Res; } @@ -8857,7 +9119,8 @@ Decl *Sema::ActOnField(Scope *S, Decl *TagD, SourceLocation DeclStart, /// FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record, SourceLocation DeclStart, - Declarator &D, Expr *BitWidth, bool HasInit, + Declarator &D, Expr *BitWidth, + InClassInitStyle InitStyle, AccessSpecifier AS) { IdentifierInfo *II = D.getIdentifier(); SourceLocation Loc = DeclStart; @@ -8919,7 +9182,7 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record, = (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_mutable); SourceLocation TSSL = D.getLocStart(); FieldDecl *NewFD - = CheckFieldDecl(II, T, TInfo, Record, Loc, Mutable, BitWidth, HasInit, + = CheckFieldDecl(II, T, TInfo, Record, Loc, Mutable, BitWidth, InitStyle, TSSL, AS, PrevDecl, &D); if (NewFD->isInvalidDecl()) @@ -8952,7 +9215,8 @@ FieldDecl *Sema::HandleField(Scope *S, RecordDecl *Record, FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T, TypeSourceInfo *TInfo, RecordDecl *Record, SourceLocation Loc, - bool Mutable, Expr *BitWidth, bool HasInit, + bool Mutable, Expr *BitWidth, + InClassInitStyle InitStyle, SourceLocation TSSL, AccessSpecifier AS, NamedDecl *PrevDecl, Declarator *D) { @@ -9042,7 +9306,7 @@ FieldDecl *Sema::CheckFieldDecl(DeclarationName Name, QualType T, } FieldDecl *NewFD = FieldDecl::Create(Context, Record, TSSL, Loc, II, T, TInfo, - BitWidth, Mutable, HasInit); + BitWidth, Mutable, InitStyle); if (InvalidDecl) NewFD->setInvalidDecl(); @@ -9213,10 +9477,9 @@ void Sema::DiagnoseNontrivial(const RecordType* T, CXXSpecialMember member) { case CXXCopyAssignment: if (RD->hasUserDeclaredCopyAssignment()) { - // FIXME: this should use the location of the copy - // assignment, not the type. - SourceLocation TyLoc = RD->getLocStart(); - Diag(TyLoc, diag::note_nontrivial_user_defined) << QT << member; + SourceLocation AssignLoc = + RD->getCopyAssignmentOperator(0)->getLocation(); + Diag(AssignLoc, diag::note_nontrivial_user_defined) << QT << member; return; } break; @@ -9295,12 +9558,12 @@ void Sema::DiagnoseNontrivial(const RecordType* T, CXXSpecialMember member) { typedef RecordDecl::field_iterator field_iter; for (field_iter fi = RD->field_begin(), fe = RD->field_end(); fi != fe; ++fi) { - QualType EltTy = Context.getBaseElementType((*fi)->getType()); + QualType EltTy = Context.getBaseElementType(fi->getType()); if (const RecordType *EltRT = EltTy->getAs()) { CXXRecordDecl* EltRD = cast(EltRT->getDecl()); if (!(EltRD->*hasTrivial)()) { - SourceLocation FLoc = (*fi)->getLocation(); + SourceLocation FLoc = fi->getLocation(); Diag(FLoc, diag::note_nontrivial_has_nontrivial) << QT << 0 << member; DiagnoseNontrivial(EltRT, member); return; @@ -9316,7 +9579,7 @@ void Sema::DiagnoseNontrivial(const RecordType* T, CXXSpecialMember member) { case Qualifiers::OCL_Autoreleasing: case Qualifiers::OCL_Weak: case Qualifiers::OCL_Strong: - Diag((*fi)->getLocation(), diag::note_nontrivial_objc_ownership) + Diag(fi->getLocation(), diag::note_nontrivial_objc_ownership) << QT << EltTy.getObjCLifetime(); return; } @@ -9390,7 +9653,7 @@ Decl *Sema::ActOnIvar(Scope *S, ObjCContainerDecl *EnclosingContext; if (ObjCImplementationDecl *IMPDecl = dyn_cast(EnclosingDecl)) { - if (!LangOpts.ObjCNonFragileABI2) { + if (LangOpts.ObjCRuntime.isFragile()) { // Case of ivar declared in an implementation. Context is that of its class. EnclosingContext = IMPDecl->getClassInterface(); assert(EnclosingContext && "Implementation has no class interface!"); @@ -9400,7 +9663,7 @@ Decl *Sema::ActOnIvar(Scope *S, } else { if (ObjCCategoryDecl *CDecl = dyn_cast(EnclosingDecl)) { - if (!LangOpts.ObjCNonFragileABI2 || !CDecl->IsClassExtension()) { + if (LangOpts.ObjCRuntime.isFragile() || !CDecl->IsClassExtension()) { Diag(Loc, diag::err_misplaced_ivar) << CDecl->IsClassExtension(); return 0; } @@ -9443,7 +9706,11 @@ Decl *Sema::ActOnIvar(Scope *S, S->AddDecl(NewID); IdResolver.AddDecl(NewID); } - + + if (LangOpts.ObjCRuntime.isNonFragile() && + !NewID->isInvalidDecl() && isa(EnclosingDecl)) + Diag(Loc, diag::warn_ivars_in_interface); + return NewID; } @@ -9453,7 +9720,7 @@ Decl *Sema::ActOnIvar(Scope *S, /// then add an implicit `char :0` ivar to the end of that interface. void Sema::ActOnLastBitfield(SourceLocation DeclLoc, SmallVectorImpl &AllIvarDecls) { - if (!LangOpts.ObjCNonFragileABI2 || AllIvarDecls.empty()) + if (LangOpts.ObjCRuntime.isFragile() || AllIvarDecls.empty()) return; Decl *ivarDecl = AllIvarDecls[AllIvarDecls.size()-1]; @@ -9492,11 +9759,23 @@ void Sema::ActOnFields(Scope* S, AttributeList *Attr) { assert(EnclosingDecl && "missing record or interface decl"); - // If the decl this is being inserted into is invalid, then it may be a - // redeclaration or some other bogus case. Don't try to add fields to it. - if (EnclosingDecl->isInvalidDecl()) - return; - + // If this is an Objective-C @implementation or category and we have + // new fields here we should reset the layout of the interface since + // it will now change. + if (!Fields.empty() && isa(EnclosingDecl)) { + ObjCContainerDecl *DC = cast(EnclosingDecl); + switch (DC->getKind()) { + default: break; + case Decl::ObjCCategory: + Context.ResetObjCLayout(cast(DC)->getClassInterface()); + break; + case Decl::ObjCImplementation: + Context. + ResetObjCLayout(cast(DC)->getClassInterface()); + break; + } + } + RecordDecl *Record = dyn_cast(EnclosingDecl); // Start counting up the number of named members; make sure to include @@ -9704,7 +9983,7 @@ void Sema::ActOnFields(Scope* S, // However, here we check whether this particular class is only // non-POD because of the presence of an Objective-C pointer member. // If so, objects of this type cannot be shared between code compiled - // with instant objects and code compiled with manual retain/release. + // with ARC and code compiled with manual retain/release. if (getLangOpts().ObjCAutoRefCount && CXXRecord->hasObjectMember() && CXXRecord->getLinkage() == ExternalLinkage) { @@ -9848,11 +10127,6 @@ void Sema::ActOnFields(Scope* S, if (Attr) ProcessDeclAttributeList(S, Record, Attr); - - // If there's a #pragma GCC visibility in scope, and this isn't a subclass, - // set the visibility of this record. - if (Record && !Record->getDeclContext()->isRecord()) - AddPushedVisibilityAttribute(Record); } /// \brief Determine whether the given integral value is representable within @@ -10106,15 +10380,16 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst, } } - // C++ [class.mem]p13: - // If T is the name of a class, then each of the following shall have a - // name different from T: - // - every enumerator of every member of class T that is an enumerated - // type + // C++ [class.mem]p15: + // If T is the name of a class, then each of the following shall have a name + // different from T: + // - every enumerator of every member of class T that is an unscoped + // enumerated type if (CXXRecordDecl *Record = dyn_cast( TheEnumDecl->getDeclContext()->getRedeclContext())) - if (Record->getIdentifier() && Record->getIdentifier() == Id) + if (!TheEnumDecl->isScoped() && + Record->getIdentifier() && Record->getIdentifier() == Id) Diag(IdLoc, diag::err_member_name_of_class) << Id; EnumConstantDecl *New = @@ -10129,9 +10404,62 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst, PushOnScopeChains(New, S); } + ActOnDocumentableDecl(New); + return New; } +// Emits a warning if every element in the enum is the same value and if +// every element is initialized with a integer or boolean literal. +static void CheckForUniqueEnumValues(Sema &S, Decl **Elements, + unsigned NumElements, EnumDecl *Enum, + QualType EnumType) { + if (S.Diags.getDiagnosticLevel(diag::warn_identical_enum_values, + Enum->getLocation()) == + DiagnosticsEngine::Ignored) + return; + + if (NumElements < 2) + return; + + if (!Enum->getIdentifier()) + return; + + llvm::APSInt FirstVal; + + for (unsigned i = 0; i != NumElements; ++i) { + EnumConstantDecl *ECD = cast_or_null(Elements[i]); + if (!ECD) + return; + + Expr *InitExpr = ECD->getInitExpr(); + if (!InitExpr) + return; + InitExpr = InitExpr->IgnoreImpCasts(); + if (!isa(InitExpr) && !isa(InitExpr)) + return; + + if (i == 0) { + FirstVal = ECD->getInitVal(); + continue; + } + + if (!llvm::APSInt::isSameValue(FirstVal, ECD->getInitVal())) + return; + } + + S.Diag(Enum->getLocation(), diag::warn_identical_enum_values) + << EnumType << FirstVal.toString(10) + << Enum->getSourceRange(); + + EnumConstantDecl *Last = cast(Elements[NumElements - 1]), + *Next = cast(Elements[NumElements - 2]); + + S.Diag(Last->getLocation(), diag::note_identical_enum_values) + << FixItHint::CreateReplacement(Last->getInitExpr()->getSourceRange(), + Next->getName()); +} + void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc, SourceLocation RBraceLoc, Decl *EnumDeclX, Decl **Elements, unsigned NumElements, @@ -10355,6 +10683,7 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc, if (InFunctionDeclarator) DeclsInPrototypeScope.push_back(Enum); + CheckForUniqueEnumValues(*this, Elements, NumElements, Enum, EnumType); } Decl *Sema::ActOnFileScopeAsmDecl(Expr *expr, diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp index 5c6ddd2..22bff86 100644 --- a/lib/Sema/SemaDeclAttr.cpp +++ b/lib/Sema/SemaDeclAttr.cpp @@ -14,6 +14,7 @@ #include "clang/Sema/SemaInternal.h" #include "TargetAttributesSema.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclObjC.h" @@ -42,7 +43,8 @@ enum AttributeDeclKind { ExpectedMethod, ExpectedVariableFunctionOrLabel, ExpectedFieldOrGlobalVar, - ExpectedStruct + ExpectedStruct, + ExpectedTLSVar }; //===----------------------------------------------------------------------===// @@ -82,7 +84,7 @@ static bool isFunction(const Decl *D) { /// type (function or function-typed variable) or an Objective-C /// method. static bool isFunctionOrMethod(const Decl *D) { - return isFunction(D)|| isa(D); + return isFunction(D) || isa(D); } /// isFunctionOrMethodOrBlock - Return true if the given decl has function @@ -238,17 +240,45 @@ static bool isIntOrBool(Expr *Exp) { return QT->isBooleanType() || QT->isIntegerType(); } -/// + +// Check to see if the type is a smart pointer of some kind. We assume +// it's a smart pointer if it defines both operator-> and operator*. +static bool threadSafetyCheckIsSmartPointer(Sema &S, const RecordType* RT) { + DeclContextLookupConstResult Res1 = RT->getDecl()->lookup( + S.Context.DeclarationNames.getCXXOperatorName(OO_Star)); + if (Res1.first == Res1.second) + return false; + + DeclContextLookupConstResult Res2 = RT->getDecl()->lookup( + S.Context.DeclarationNames.getCXXOperatorName(OO_Arrow)); + if (Res2.first == Res2.second) + return false; + + return true; +} + /// \brief Check if passed in Decl is a pointer type. /// Note that this function may produce an error message. /// \return true if the Decl is a pointer type; false otherwise -/// -static bool checkIsPointer(Sema &S, const Decl *D, const AttributeList &Attr) { +static bool threadSafetyCheckIsPointer(Sema &S, const Decl *D, + const AttributeList &Attr) { if (const ValueDecl *vd = dyn_cast(D)) { QualType QT = vd->getType(); if (QT->isAnyPointerType()) return true; - S.Diag(Attr.getLoc(), diag::warn_pointer_attribute_wrong_type) + + if (const RecordType *RT = QT->getAs()) { + // If it's an incomplete type, it could be a smart pointer; skip it. + // (We don't want to force template instantiation if we can avoid it, + // since that would alter the order in which templates are instantiated.) + if (RT->isIncompleteType()) + return true; + + if (threadSafetyCheckIsSmartPointer(S, RT)) + return true; + } + + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_pointer) << Attr.getName()->getName() << QT; } else { S.Diag(Attr.getLoc(), diag::err_attribute_can_be_applied_only_to_value_decl) @@ -270,35 +300,60 @@ static const RecordType *getRecordType(QualType QT) { return 0; } + +static bool checkBaseClassIsLockableCallback(const CXXBaseSpecifier *Specifier, + CXXBasePath &Path, void *Unused) { + const RecordType *RT = Specifier->getType()->getAs(); + if (RT->getDecl()->getAttr()) + return true; + return false; +} + + /// \brief Thread Safety Analysis: Checks that the passed in RecordType -/// resolves to a lockable object. May flag an error. +/// resolves to a lockable object. static void checkForLockableRecord(Sema &S, Decl *D, const AttributeList &Attr, QualType Ty) { const RecordType *RT = getRecordType(Ty); - + // Warn if could not get record type for this argument. if (!RT) { - S.Diag(Attr.getLoc(), diag::warn_attribute_argument_not_class) + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_argument_not_class) << Attr.getName() << Ty.getAsString(); return; } - // Don't check for lockable if the class hasn't been defined yet. + + // Don't check for lockable if the class hasn't been defined yet. if (RT->isIncompleteType()) return; - // Warn if the type is not lockable. - if (!RT->getDecl()->getAttr()) { - S.Diag(Attr.getLoc(), diag::warn_attribute_argument_not_lockable) - << Attr.getName() << Ty.getAsString(); + + // Allow smart pointers to be used as lockable objects. + // FIXME -- Check the type that the smart pointer points to. + if (threadSafetyCheckIsSmartPointer(S, RT)) + return; + + // Check if the type is lockable. + RecordDecl *RD = RT->getDecl(); + if (RD->getAttr()) return; + + // Else check if any base classes are lockable. + if (CXXRecordDecl *CRD = dyn_cast(RD)) { + CXXBasePaths BPaths(false, false); + if (CRD->lookupInBases(checkBaseClassIsLockableCallback, 0, BPaths)) + return; } + + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_argument_not_lockable) + << Attr.getName() << Ty.getAsString(); } /// \brief Thread Safety Analysis: Checks that all attribute arguments, starting -/// from Sidx, resolve to a lockable object. May flag an error. +/// from Sidx, resolve to a lockable object. /// \param Sidx The attribute argument index to start checking with. /// \param ParamIdxOk Whether an argument can be indexing into a function /// parameter list. -static bool checkAttrArgsAreLockableObjs(Sema &S, Decl *D, +static void checkAttrArgsAreLockableObjs(Sema &S, Decl *D, const AttributeList &Attr, SmallVectorImpl &Args, int Sidx = 0, @@ -307,13 +362,33 @@ static bool checkAttrArgsAreLockableObjs(Sema &S, Decl *D, Expr *ArgExp = Attr.getArg(Idx); if (ArgExp->isTypeDependent()) { - // FIXME -- need to processs this again on template instantiation + // FIXME -- need to check this again on template instantiation Args.push_back(ArgExp); continue; } + if (StringLiteral *StrLit = dyn_cast(ArgExp)) { + // Ignore empty strings without warnings + if (StrLit->getLength() == 0) + continue; + + // We allow constant strings to be used as a placeholder for expressions + // that are not valid C++ syntax, but warn that they are ignored. + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_ignored) << + Attr.getName(); + continue; + } + QualType ArgTy = ArgExp->getType(); + // A pointer to member expression of the form &MyClass::mu is treated + // specially -- we need to look at the type of the member. + if (UnaryOperator *UOp = dyn_cast(ArgExp)) + if (UOp->getOpcode() == UO_AddrOf) + if (DeclRefExpr *DRE = dyn_cast(UOp->getSubExpr())) + if (DRE->getDecl()->isCXXInstanceMember()) + ArgTy = DRE->getDecl()->getType(); + // First see if we can just cast to record type, or point to record type. const RecordType *RT = getRecordType(ArgTy); @@ -329,7 +404,7 @@ static bool checkAttrArgsAreLockableObjs(Sema &S, Decl *D, if(!ArgValue.isStrictlyPositive() || ParamIdxFromOne > NumParams) { S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_range) << Attr.getName() << Idx + 1 << NumParams; - return false; + continue; } ArgTy = FD->getParamDecl(ParamIdxFromZero)->getType(); } @@ -339,7 +414,6 @@ static bool checkAttrArgsAreLockableObjs(Sema &S, Decl *D, Args.push_back(ArgExp); } - return true; } //===----------------------------------------------------------------------===// @@ -350,78 +424,125 @@ static bool checkAttrArgsAreLockableObjs(Sema &S, Decl *D, // least add some helper functions to check most argument patterns (# // and types of args). -static void handleGuardedVarAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool pointer = false) { +enum ThreadAttributeDeclKind { + ThreadExpectedFieldOrGlobalVar, + ThreadExpectedFunctionOrMethod, + ThreadExpectedClassOrStruct +}; + +static bool checkGuardedVarAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr) { assert(!Attr.isInvalid()); if (!checkAttributeNumArgs(S, Attr, 0)) - return; + return false; // D must be either a member field or global (potentially shared) variable. if (!mayBeSharedVariable(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFieldOrGlobalVar; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFieldOrGlobalVar; + return false; } - if (pointer && !checkIsPointer(S, D, Attr)) + return true; +} + +static void handleGuardedVarAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (!checkGuardedVarAttrCommon(S, D, Attr)) return; - if (pointer) - D->addAttr(::new (S.Context) PtGuardedVarAttr(Attr.getRange(), S.Context)); - else - D->addAttr(::new (S.Context) GuardedVarAttr(Attr.getRange(), S.Context)); + D->addAttr(::new (S.Context) GuardedVarAttr(Attr.getRange(), S.Context)); } -static void handleGuardedByAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool pointer = false) { - assert(!Attr.isInvalid()); +static void handlePtGuardedVarAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + if (!checkGuardedVarAttrCommon(S, D, Attr)) + return; - if (!checkAttributeNumArgs(S, Attr, 1)) + if (!threadSafetyCheckIsPointer(S, D, Attr)) return; - Expr *Arg = Attr.getArg(0); + D->addAttr(::new (S.Context) PtGuardedVarAttr(Attr.getRange(), S.Context)); +} + +static bool checkGuardedByAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr, + Expr* &Arg) { + assert(!Attr.isInvalid()); + + if (!checkAttributeNumArgs(S, Attr, 1)) + return false; // D must be either a member field or global (potentially shared) variable. if (!mayBeSharedVariable(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFieldOrGlobalVar; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFieldOrGlobalVar; + return false; } - if (pointer && !checkIsPointer(S, D, Attr)) - return; + SmallVector Args; + // check that all arguments are lockable objects + checkAttrArgsAreLockableObjs(S, D, Attr, Args); + unsigned Size = Args.size(); + if (Size != 1) + return false; - if (!Arg->isTypeDependent()) { - checkForLockableRecord(S, D, Attr, Arg->getType()); - } + Arg = Args[0]; - if (pointer) - D->addAttr(::new (S.Context) PtGuardedByAttr(Attr.getRange(), - S.Context, Arg)); - else - D->addAttr(::new (S.Context) GuardedByAttr(Attr.getRange(), S.Context, Arg)); + return true; +} + +static void handleGuardedByAttr(Sema &S, Decl *D, const AttributeList &Attr) { + Expr *Arg = 0; + if (!checkGuardedByAttrCommon(S, D, Attr, Arg)) + return; + + D->addAttr(::new (S.Context) GuardedByAttr(Attr.getRange(), S.Context, Arg)); } +static void handlePtGuardedByAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + Expr *Arg = 0; + if (!checkGuardedByAttrCommon(S, D, Attr, Arg)) + return; + + if (!threadSafetyCheckIsPointer(S, D, Attr)) + return; + + D->addAttr(::new (S.Context) PtGuardedByAttr(Attr.getRange(), + S.Context, Arg)); +} -static void handleLockableAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool scoped = false) { +static bool checkLockableAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr) { assert(!Attr.isInvalid()); if (!checkAttributeNumArgs(S, Attr, 0)) - return; + return false; // FIXME: Lockable structs for C code. if (!isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedClass; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedClassOrStruct; + return false; } - if (scoped) - D->addAttr(::new (S.Context) ScopedLockableAttr(Attr.getRange(), S.Context)); - else - D->addAttr(::new (S.Context) LockableAttr(Attr.getRange(), S.Context)); + return true; +} + +static void handleLockableAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (!checkLockableAttrCommon(S, D, Attr)) + return; + + D->addAttr(::new (S.Context) LockableAttr(Attr.getRange(), S.Context)); +} + +static void handleScopedLockableAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + if (!checkLockableAttrCommon(S, D, Attr)) + return; + + D->addAttr(::new (S.Context) ScopedLockableAttr(Attr.getRange(), S.Context)); } static void handleNoThreadSafetyAttr(Sema &S, Decl *D, @@ -432,8 +553,8 @@ static void handleNoThreadSafetyAttr(Sema &S, Decl *D, return; if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; return; } @@ -442,7 +563,7 @@ static void handleNoThreadSafetyAttr(Sema &S, Decl *D, } static void handleNoAddressSafetyAttr(Sema &S, Decl *D, - const AttributeList &Attr) { + const AttributeList &Attr) { assert(!Attr.isInvalid()); if (!checkAttributeNumArgs(S, Attr, 0)) @@ -455,154 +576,212 @@ static void handleNoAddressSafetyAttr(Sema &S, Decl *D, } D->addAttr(::new (S.Context) NoAddressSafetyAnalysisAttr(Attr.getRange(), - S.Context)); + S.Context)); } -static void handleAcquireOrderAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool before) { +static bool checkAcquireOrderAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr, + SmallVector &Args) { assert(!Attr.isInvalid()); if (!checkAttributeAtLeastNumArgs(S, Attr, 1)) - return; + return false; // D must be either a member field or global (potentially shared) variable. ValueDecl *VD = dyn_cast(D); if (!VD || !mayBeSharedVariable(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFieldOrGlobalVar; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFieldOrGlobalVar; + return false; } - // Check that this attribute only applies to lockable types + // Check that this attribute only applies to lockable types. QualType QT = VD->getType(); if (!QT->isDependentType()) { const RecordType *RT = getRecordType(QT); if (!RT || !RT->getDecl()->getAttr()) { - S.Diag(Attr.getLoc(), diag::warn_attribute_decl_not_lockable) - << Attr.getName(); - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_lockable) + << Attr.getName(); + return false; } } + // Check that all arguments are lockable objects. + checkAttrArgsAreLockableObjs(S, D, Attr, Args); + if (Args.size() == 0) + return false; + + return true; +} + +static void handleAcquiredAfterAttr(Sema &S, Decl *D, + const AttributeList &Attr) { SmallVector Args; - // check that all arguments are lockable objects - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args)) + if (!checkAcquireOrderAttrCommon(S, D, Attr, Args)) return; - unsigned Size = Args.size(); - assert(Size == Attr.getNumArgs()); - Expr **StartArg = Size == 0 ? 0 : &Args[0]; + Expr **StartArg = &Args[0]; + D->addAttr(::new (S.Context) AcquiredAfterAttr(Attr.getRange(), S.Context, + StartArg, Args.size())); +} - if (before) - D->addAttr(::new (S.Context) AcquiredBeforeAttr(Attr.getRange(), S.Context, - StartArg, Size)); - else - D->addAttr(::new (S.Context) AcquiredAfterAttr(Attr.getRange(), S.Context, - StartArg, Size)); +static void handleAcquiredBeforeAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + SmallVector Args; + if (!checkAcquireOrderAttrCommon(S, D, Attr, Args)) + return; + + Expr **StartArg = &Args[0]; + D->addAttr(::new (S.Context) AcquiredBeforeAttr(Attr.getRange(), S.Context, + StartArg, Args.size())); } -static void handleLockFunAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool exclusive = false) { +static bool checkLockFunAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr, + SmallVector &Args) { assert(!Attr.isInvalid()); // zero or more arguments ok // check that the attribute is applied to a function if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; + return false; } // check that all arguments are lockable objects + checkAttrArgsAreLockableObjs(S, D, Attr, Args, 0, /*ParamIdxOk=*/true); + + return true; +} + +static void handleSharedLockFunctionAttr(Sema &S, Decl *D, + const AttributeList &Attr) { SmallVector Args; - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args, 0, /*ParamIdxOk=*/true)) + if (!checkLockFunAttrCommon(S, D, Attr, Args)) return; unsigned Size = Args.size(); - assert(Size == Attr.getNumArgs()); Expr **StartArg = Size == 0 ? 0 : &Args[0]; + D->addAttr(::new (S.Context) SharedLockFunctionAttr(Attr.getRange(), + S.Context, + StartArg, Size)); +} - if (exclusive) - D->addAttr(::new (S.Context) ExclusiveLockFunctionAttr(Attr.getRange(), - S.Context, StartArg, - Size)); - else - D->addAttr(::new (S.Context) SharedLockFunctionAttr(Attr.getRange(), - S.Context, StartArg, - Size)); +static void handleExclusiveLockFunctionAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + SmallVector Args; + if (!checkLockFunAttrCommon(S, D, Attr, Args)) + return; + + unsigned Size = Args.size(); + Expr **StartArg = Size == 0 ? 0 : &Args[0]; + D->addAttr(::new (S.Context) ExclusiveLockFunctionAttr(Attr.getRange(), + S.Context, + StartArg, Size)); } -static void handleTrylockFunAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool exclusive = false) { +static bool checkTryLockFunAttrCommon(Sema &S, Decl *D, + const AttributeList &Attr, + SmallVector &Args) { assert(!Attr.isInvalid()); if (!checkAttributeAtLeastNumArgs(S, Attr, 1)) - return; - + return false; if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; + return false; } if (!isIntOrBool(Attr.getArg(0))) { S.Diag(Attr.getLoc(), diag::err_attribute_first_argument_not_int_or_bool) - << Attr.getName(); - return; + << Attr.getName(); + return false; } - SmallVector Args; // check that all arguments are lockable objects - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args, 1)) + checkAttrArgsAreLockableObjs(S, D, Attr, Args, 1); + + return true; +} + +static void handleSharedTrylockFunctionAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + SmallVector Args; + if (!checkTryLockFunAttrCommon(S, D, Attr, Args)) return; unsigned Size = Args.size(); Expr **StartArg = Size == 0 ? 0 : &Args[0]; + D->addAttr(::new (S.Context) SharedTrylockFunctionAttr(Attr.getRange(), + S.Context, + Attr.getArg(0), + StartArg, Size)); +} - if (exclusive) - D->addAttr(::new (S.Context) ExclusiveTrylockFunctionAttr(Attr.getRange(), - S.Context, - Attr.getArg(0), - StartArg, Size)); - else - D->addAttr(::new (S.Context) SharedTrylockFunctionAttr(Attr.getRange(), - S.Context, - Attr.getArg(0), - StartArg, Size)); +static void handleExclusiveTrylockFunctionAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + SmallVector Args; + if (!checkTryLockFunAttrCommon(S, D, Attr, Args)) + return; + + unsigned Size = Args.size(); + Expr **StartArg = Size == 0 ? 0 : &Args[0]; + D->addAttr(::new (S.Context) ExclusiveTrylockFunctionAttr(Attr.getRange(), + S.Context, + Attr.getArg(0), + StartArg, Size)); } -static void handleLocksRequiredAttr(Sema &S, Decl *D, const AttributeList &Attr, - bool exclusive = false) { +static bool checkLocksRequiredCommon(Sema &S, Decl *D, + const AttributeList &Attr, + SmallVector &Args) { assert(!Attr.isInvalid()); if (!checkAttributeAtLeastNumArgs(S, Attr, 1)) - return; + return false; if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; - return; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; + return false; } // check that all arguments are lockable objects + checkAttrArgsAreLockableObjs(S, D, Attr, Args); + if (Args.size() == 0) + return false; + + return true; +} + +static void handleExclusiveLocksRequiredAttr(Sema &S, Decl *D, + const AttributeList &Attr) { SmallVector Args; - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args)) + if (!checkLocksRequiredCommon(S, D, Attr, Args)) return; - unsigned Size = Args.size(); - assert(Size == Attr.getNumArgs()); - Expr **StartArg = Size == 0 ? 0 : &Args[0]; + Expr **StartArg = &Args[0]; + D->addAttr(::new (S.Context) ExclusiveLocksRequiredAttr(Attr.getRange(), + S.Context, + StartArg, + Args.size())); +} - if (exclusive) - D->addAttr(::new (S.Context) ExclusiveLocksRequiredAttr(Attr.getRange(), - S.Context, StartArg, - Size)); - else - D->addAttr(::new (S.Context) SharedLocksRequiredAttr(Attr.getRange(), - S.Context, StartArg, - Size)); +static void handleSharedLocksRequiredAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + SmallVector Args; + if (!checkLocksRequiredCommon(S, D, Attr, Args)) + return; + + Expr **StartArg = &Args[0]; + D->addAttr(::new (S.Context) SharedLocksRequiredAttr(Attr.getRange(), + S.Context, + StartArg, + Args.size())); } static void handleUnlockFunAttr(Sema &S, Decl *D, @@ -612,18 +791,15 @@ static void handleUnlockFunAttr(Sema &S, Decl *D, // zero or more arguments ok if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; return; } // check that all arguments are lockable objects SmallVector Args; - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args, 0, /*ParamIdxOk=*/true)) - return; - + checkAttrArgsAreLockableObjs(S, D, Attr, Args, 0, /*ParamIdxOk=*/true); unsigned Size = Args.size(); - assert(Size == Attr.getNumArgs()); Expr **StartArg = Size == 0 ? 0 : &Args[0]; D->addAttr(::new (S.Context) UnlockFunctionAttr(Attr.getRange(), S.Context, @@ -639,8 +815,8 @@ static void handleLockReturnedAttr(Sema &S, Decl *D, Expr *Arg = Attr.getArg(0); if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; return; } @@ -648,9 +824,14 @@ static void handleLockReturnedAttr(Sema &S, Decl *D, return; // check that the argument is lockable object - checkForLockableRecord(S, D, Attr, Arg->getType()); + SmallVector Args; + checkAttrArgsAreLockableObjs(S, D, Attr, Args); + unsigned Size = Args.size(); + if (Size == 0) + return; - D->addAttr(::new (S.Context) LockReturnedAttr(Attr.getRange(), S.Context, Arg)); + D->addAttr(::new (S.Context) LockReturnedAttr(Attr.getRange(), S.Context, + Args[0])); } static void handleLocksExcludedAttr(Sema &S, Decl *D, @@ -661,19 +842,18 @@ static void handleLocksExcludedAttr(Sema &S, Decl *D, return; if (!isa(D) && !isa(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunctionOrMethod; + S.Diag(Attr.getLoc(), diag::warn_thread_attribute_wrong_decl_type) + << Attr.getName() << ThreadExpectedFunctionOrMethod; return; } // check that all arguments are lockable objects SmallVector Args; - if (!checkAttrArgsAreLockableObjs(S, D, Attr, Args)) - return; - + checkAttrArgsAreLockableObjs(S, D, Attr, Args); unsigned Size = Args.size(); - assert(Size == Attr.getNumArgs()); - Expr **StartArg = Size == 0 ? 0 : &Args[0]; + if (Size == 0) + return; + Expr **StartArg = &Args[0]; D->addAttr(::new (S.Context) LocksExcludedAttr(Attr.getRange(), S.Context, StartArg, Size)); @@ -698,12 +878,12 @@ static void handleExtVectorTypeAttr(Sema &S, Scope *scope, Decl *D, SourceLocation TemplateKWLoc; UnqualifiedId id; id.setIdentifier(Attr.getParameterName(), Attr.getLoc()); - + ExprResult Size = S.ActOnIdExpression(scope, SS, TemplateKWLoc, id, false, false); if (Size.isInvalid()) return; - + sizeExpr = Size.get(); } else { // check the attribute arguments. @@ -854,6 +1034,75 @@ static void possibleTransparentUnionPointerType(QualType &T) { } } +static void handleAllocSizeAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (!isFunctionOrMethod(D)) { + S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) + << "alloc_size" << ExpectedFunctionOrMethod; + return; + } + + if (!checkAttributeAtLeastNumArgs(S, Attr, 1)) + return; + + // In C++ the implicit 'this' function parameter also counts, and they are + // counted from one. + bool HasImplicitThisParam = isInstanceMethod(D); + unsigned NumArgs = getFunctionOrMethodNumArgs(D) + HasImplicitThisParam; + + SmallVector SizeArgs; + + for (AttributeList::arg_iterator I = Attr.arg_begin(), + E = Attr.arg_end(); I!=E; ++I) { + // The argument must be an integer constant expression. + Expr *Ex = *I; + llvm::APSInt ArgNum; + if (Ex->isTypeDependent() || Ex->isValueDependent() || + !Ex->isIntegerConstantExpr(ArgNum, S.Context)) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_not_int) + << "alloc_size" << Ex->getSourceRange(); + return; + } + + uint64_t x = ArgNum.getZExtValue(); + + if (x < 1 || x > NumArgs) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_out_of_bounds) + << "alloc_size" << I.getArgNum() << Ex->getSourceRange(); + return; + } + + --x; + if (HasImplicitThisParam) { + if (x == 0) { + S.Diag(Attr.getLoc(), + diag::err_attribute_invalid_implicit_this_argument) + << "alloc_size" << Ex->getSourceRange(); + return; + } + --x; + } + + // check if the function argument is of an integer type + QualType T = getFunctionOrMethodArgType(D, x).getNonReferenceType(); + if (!T->isIntegerType()) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_not_int) + << "alloc_size" << Ex->getSourceRange(); + return; + } + + SizeArgs.push_back(x); + } + + // check if the function returns a pointer + if (!getFunctionType(D)->getResultType()->isAnyPointerType()) { + S.Diag(Attr.getLoc(), diag::warn_ns_attribute_wrong_return_type) + << "alloc_size" << 0 /*function*/<< 1 /*pointer*/ << D->getSourceRange(); + } + + D->addAttr(::new (S.Context) AllocSizeAttr(Attr.getRange(), S.Context, + SizeArgs.data(), SizeArgs.size())); +} + static void handleNonNullAttr(Sema &S, Decl *D, const AttributeList &Attr) { // GCC ignores the nonnull attribute on K&R style function prototypes, so we // ignore it as well @@ -1226,6 +1475,46 @@ static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) { Str->getString())); } +static void handleColdAttr(Sema &S, Decl *D, const AttributeList &Attr) { + // Check the attribute arguments. + if (!checkAttributeNumArgs(S, Attr, 0)) + return; + + if (!isa(D)) { + S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) + << Attr.getName() << ExpectedFunction; + return; + } + + if (D->hasAttr()) { + S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) + << Attr.getName() << "hot"; + return; + } + + D->addAttr(::new (S.Context) ColdAttr(Attr.getRange(), S.Context)); +} + +static void handleHotAttr(Sema &S, Decl *D, const AttributeList &Attr) { + // Check the attribute arguments. + if (!checkAttributeNumArgs(S, Attr, 0)) + return; + + if (!isa(D)) { + S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) + << Attr.getName() << ExpectedFunction; + return; + } + + if (D->hasAttr()) { + S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) + << Attr.getName() << "cold"; + return; + } + + D->addAttr(::new (S.Context) HotAttr(Attr.getRange(), S.Context)); +} + static void handleNakedAttr(Sema &S, Decl *D, const AttributeList &Attr) { // Check the attribute arguments. if (!checkAttributeNumArgs(S, Attr, 0)) @@ -1257,6 +1546,42 @@ static void handleAlwaysInlineAttr(Sema &S, Decl *D, D->addAttr(::new (S.Context) AlwaysInlineAttr(Attr.getRange(), S.Context)); } +static void handleTLSModelAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + // Check the attribute arguments. + if (Attr.getNumArgs() != 1) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << 1; + return; + } + + Expr *Arg = Attr.getArg(0); + Arg = Arg->IgnoreParenCasts(); + StringLiteral *Str = dyn_cast(Arg); + + // Check that it is a string. + if (!Str) { + S.Diag(Attr.getLoc(), diag::err_attribute_not_string) << "tls_model"; + return; + } + + if (!isa(D) || !cast(D)->isThreadSpecified()) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_decl_type) + << Attr.getName() << ExpectedTLSVar; + return; + } + + // Check that the value. + StringRef Model = Str->getString(); + if (Model != "global-dynamic" && Model != "local-dynamic" + && Model != "initial-exec" && Model != "local-exec") { + S.Diag(Attr.getLoc(), diag::err_attr_tlsmodel_arg); + return; + } + + D->addAttr(::new (S.Context) TLSModelAttr(Attr.getRange(), S.Context, + Model)); +} + static void handleMallocAttr(Sema &S, Decl *D, const AttributeList &Attr) { // Check the attribute arguments. if (Attr.hasParameterOrArguments()) { @@ -1427,7 +1752,7 @@ static void handleUnusedAttr(Sema &S, Decl *D, const AttributeList &Attr) { } if (!isa(D) && !isa(D) && !isFunctionOrMethod(D) && - !isa(D) && !isa(D)) { + !isa(D) && !isa(D) && !isa(D)) { S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) << Attr.getName() << ExpectedVariableFunctionOrLabel; return; @@ -1534,47 +1859,28 @@ static void handleDestructorAttr(Sema &S, Decl *D, const AttributeList &Attr) { priority)); } -static void handleDeprecatedAttr(Sema &S, Decl *D, const AttributeList &Attr) { +template +static void handleAttrWithMessage(Sema &S, Decl *D, const AttributeList &Attr, + const char *Name) { unsigned NumArgs = Attr.getNumArgs(); if (NumArgs > 1) { S.Diag(Attr.getLoc(), diag::err_attribute_too_many_arguments) << 1; return; } - - // Handle the case where deprecated attribute has a text message. + + // Handle the case where the attribute has a text message. StringRef Str; if (NumArgs == 1) { StringLiteral *SE = dyn_cast(Attr.getArg(0)); if (!SE) { S.Diag(Attr.getArg(0)->getLocStart(), diag::err_attribute_not_string) - << "deprecated"; + << Name; return; } Str = SE->getString(); } - D->addAttr(::new (S.Context) DeprecatedAttr(Attr.getRange(), S.Context, Str)); -} - -static void handleUnavailableAttr(Sema &S, Decl *D, const AttributeList &Attr) { - unsigned NumArgs = Attr.getNumArgs(); - if (NumArgs > 1) { - S.Diag(Attr.getLoc(), diag::err_attribute_too_many_arguments) << 1; - return; - } - - // Handle the case where unavailable attribute has a text message. - StringRef Str; - if (NumArgs == 1) { - StringLiteral *SE = dyn_cast(Attr.getArg(0)); - if (!SE) { - S.Diag(Attr.getArg(0)->getLocStart(), - diag::err_attribute_not_string) << "unavailable"; - return; - } - Str = SE->getString(); - } - D->addAttr(::new (S.Context) UnavailableAttr(Attr.getRange(), S.Context, Str)); + D->addAttr(::new (S.Context) AttrTy(Attr.getRange(), S.Context, Str)); } static void handleArcWeakrefUnavailableAttr(Sema &S, Decl *D, @@ -1622,64 +1928,180 @@ static void handleObjCRequiresPropertyDefsAttr(Sema &S, Decl *D, Attr.getRange(), S.Context)); } -static void handleAvailabilityAttr(Sema &S, Decl *D, - const AttributeList &Attr) { - IdentifierInfo *Platform = Attr.getParameterName(); - SourceLocation PlatformLoc = Attr.getParameterLoc(); - +static bool checkAvailabilityAttr(Sema &S, SourceRange Range, + IdentifierInfo *Platform, + VersionTuple Introduced, + VersionTuple Deprecated, + VersionTuple Obsoleted) { StringRef PlatformName = AvailabilityAttr::getPrettyPlatformName(Platform->getName()); - if (PlatformName.empty()) { - S.Diag(PlatformLoc, diag::warn_availability_unknown_platform) - << Platform; - + if (PlatformName.empty()) PlatformName = Platform->getName(); - } - - AvailabilityChange Introduced = Attr.getAvailabilityIntroduced(); - AvailabilityChange Deprecated = Attr.getAvailabilityDeprecated(); - AvailabilityChange Obsoleted = Attr.getAvailabilityObsoleted(); - bool IsUnavailable = Attr.getUnavailableLoc().isValid(); // Ensure that Introduced <= Deprecated <= Obsoleted (although not all // of these steps are needed). - if (Introduced.isValid() && Deprecated.isValid() && - !(Introduced.Version <= Deprecated.Version)) { - S.Diag(Introduced.KeywordLoc, diag::warn_availability_version_ordering) - << 1 << PlatformName << Deprecated.Version.getAsString() - << 0 << Introduced.Version.getAsString(); - return; + if (!Introduced.empty() && !Deprecated.empty() && + !(Introduced <= Deprecated)) { + S.Diag(Range.getBegin(), diag::warn_availability_version_ordering) + << 1 << PlatformName << Deprecated.getAsString() + << 0 << Introduced.getAsString(); + return true; } - if (Introduced.isValid() && Obsoleted.isValid() && - !(Introduced.Version <= Obsoleted.Version)) { - S.Diag(Introduced.KeywordLoc, diag::warn_availability_version_ordering) - << 2 << PlatformName << Obsoleted.Version.getAsString() - << 0 << Introduced.Version.getAsString(); - return; + if (!Introduced.empty() && !Obsoleted.empty() && + !(Introduced <= Obsoleted)) { + S.Diag(Range.getBegin(), diag::warn_availability_version_ordering) + << 2 << PlatformName << Obsoleted.getAsString() + << 0 << Introduced.getAsString(); + return true; } - if (Deprecated.isValid() && Obsoleted.isValid() && - !(Deprecated.Version <= Obsoleted.Version)) { - S.Diag(Deprecated.KeywordLoc, diag::warn_availability_version_ordering) - << 2 << PlatformName << Obsoleted.Version.getAsString() - << 1 << Deprecated.Version.getAsString(); - return; + if (!Deprecated.empty() && !Obsoleted.empty() && + !(Deprecated <= Obsoleted)) { + S.Diag(Range.getBegin(), diag::warn_availability_version_ordering) + << 2 << PlatformName << Obsoleted.getAsString() + << 1 << Deprecated.getAsString(); + return true; } + return false; +} + +AvailabilityAttr *Sema::mergeAvailabilityAttr(Decl *D, SourceRange Range, + IdentifierInfo *Platform, + VersionTuple Introduced, + VersionTuple Deprecated, + VersionTuple Obsoleted, + bool IsUnavailable, + StringRef Message) { + VersionTuple MergedIntroduced = Introduced; + VersionTuple MergedDeprecated = Deprecated; + VersionTuple MergedObsoleted = Obsoleted; + bool FoundAny = false; + + if (D->hasAttrs()) { + AttrVec &Attrs = D->getAttrs(); + for (unsigned i = 0, e = Attrs.size(); i != e;) { + const AvailabilityAttr *OldAA = dyn_cast(Attrs[i]); + if (!OldAA) { + ++i; + continue; + } + + IdentifierInfo *OldPlatform = OldAA->getPlatform(); + if (OldPlatform != Platform) { + ++i; + continue; + } + + FoundAny = true; + VersionTuple OldIntroduced = OldAA->getIntroduced(); + VersionTuple OldDeprecated = OldAA->getDeprecated(); + VersionTuple OldObsoleted = OldAA->getObsoleted(); + bool OldIsUnavailable = OldAA->getUnavailable(); + StringRef OldMessage = OldAA->getMessage(); + + if ((!OldIntroduced.empty() && !Introduced.empty() && + OldIntroduced != Introduced) || + (!OldDeprecated.empty() && !Deprecated.empty() && + OldDeprecated != Deprecated) || + (!OldObsoleted.empty() && !Obsoleted.empty() && + OldObsoleted != Obsoleted) || + (OldIsUnavailable != IsUnavailable) || + (OldMessage != Message)) { + Diag(OldAA->getLocation(), diag::warn_mismatched_availability); + Diag(Range.getBegin(), diag::note_previous_attribute); + Attrs.erase(Attrs.begin() + i); + --e; + continue; + } + + VersionTuple MergedIntroduced2 = MergedIntroduced; + VersionTuple MergedDeprecated2 = MergedDeprecated; + VersionTuple MergedObsoleted2 = MergedObsoleted; + + if (MergedIntroduced2.empty()) + MergedIntroduced2 = OldIntroduced; + if (MergedDeprecated2.empty()) + MergedDeprecated2 = OldDeprecated; + if (MergedObsoleted2.empty()) + MergedObsoleted2 = OldObsoleted; + + if (checkAvailabilityAttr(*this, OldAA->getRange(), Platform, + MergedIntroduced2, MergedDeprecated2, + MergedObsoleted2)) { + Attrs.erase(Attrs.begin() + i); + --e; + continue; + } + + MergedIntroduced = MergedIntroduced2; + MergedDeprecated = MergedDeprecated2; + MergedObsoleted = MergedObsoleted2; + ++i; + } + } + + if (FoundAny && + MergedIntroduced == Introduced && + MergedDeprecated == Deprecated && + MergedObsoleted == Obsoleted) + return NULL; + + if (!checkAvailabilityAttr(*this, Range, Platform, MergedIntroduced, + MergedDeprecated, MergedObsoleted)) { + return ::new (Context) AvailabilityAttr(Range, Context, Platform, + Introduced, Deprecated, + Obsoleted, IsUnavailable, Message); + } + return NULL; +} + +static void handleAvailabilityAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + IdentifierInfo *Platform = Attr.getParameterName(); + SourceLocation PlatformLoc = Attr.getParameterLoc(); + + if (AvailabilityAttr::getPrettyPlatformName(Platform->getName()).empty()) + S.Diag(PlatformLoc, diag::warn_availability_unknown_platform) + << Platform; + + AvailabilityChange Introduced = Attr.getAvailabilityIntroduced(); + AvailabilityChange Deprecated = Attr.getAvailabilityDeprecated(); + AvailabilityChange Obsoleted = Attr.getAvailabilityObsoleted(); + bool IsUnavailable = Attr.getUnavailableLoc().isValid(); StringRef Str; const StringLiteral *SE = dyn_cast_or_null(Attr.getMessageExpr()); if (SE) Str = SE->getString(); - - D->addAttr(::new (S.Context) AvailabilityAttr(Attr.getRange(), S.Context, - Platform, - Introduced.Version, - Deprecated.Version, - Obsoleted.Version, - IsUnavailable, - Str)); + + AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(D, Attr.getRange(), + Platform, + Introduced.Version, + Deprecated.Version, + Obsoleted.Version, + IsUnavailable, Str); + if (NewAttr) + D->addAttr(NewAttr); +} + +VisibilityAttr *Sema::mergeVisibilityAttr(Decl *D, SourceRange Range, + VisibilityAttr::VisibilityType Vis) { + if (isa(D)) { + Diag(Range.getBegin(), diag::warn_attribute_ignored) << "visibility"; + return NULL; + } + VisibilityAttr *ExistingAttr = D->getAttr(); + if (ExistingAttr) { + VisibilityAttr::VisibilityType ExistingVis = ExistingAttr->getVisibility(); + if (ExistingVis == Vis) + return NULL; + Diag(ExistingAttr->getLocation(), diag::err_mismatched_visibility); + Diag(Range.getBegin(), diag::note_previous_attribute); + D->dropAttr(); + } + return ::new (Context) VisibilityAttr(Range, Context, Vis); } static void handleVisibilityAttr(Sema &S, Decl *D, const AttributeList &Attr) { @@ -1720,7 +2142,9 @@ static void handleVisibilityAttr(Sema &S, Decl *D, const AttributeList &Attr) { return; } - D->addAttr(::new (S.Context) VisibilityAttr(Attr.getRange(), S.Context, type)); + VisibilityAttr *NewAttr = S.mergeVisibilityAttr(D, Attr.getRange(), type); + if (NewAttr) + D->addAttr(NewAttr); } static void handleObjCMethodFamilyAttr(Sema &S, Decl *decl, @@ -1803,7 +2227,15 @@ static void handleObjCNSObject(Sema &S, Decl *D, const AttributeList &Attr) { return; } } - else if (!isa(D)) { + else if (ObjCPropertyDecl *PD = dyn_cast(D)) { + QualType T = PD->getType(); + if (!T->isPointerType() || + !T->getAs()->getPointeeType()->isRecordType()) { + S.Diag(PD->getLocation(), diag::err_nsobject_attribute); + return; + } + } + else { // It is okay to include this attribute on properties, e.g.: // // @property (retain, nonatomic) struct Bork *Q __attribute__((NSObject)); @@ -2028,11 +2460,14 @@ static void handleWeakImportAttr(Sema &S, Decl *D, const AttributeList &Attr) { D->addAttr(::new (S.Context) WeakImportAttr(Attr.getRange(), S.Context)); } -static void handleReqdWorkGroupSize(Sema &S, Decl *D, - const AttributeList &Attr) { +// Handles reqd_work_group_size and work_group_size_hint. +static void handleWorkGroupSize(Sema &S, Decl *D, + const AttributeList &Attr) { + assert(Attr.getKind() == AttributeList::AT_ReqdWorkGroupSize + || Attr.getKind() == AttributeList::AT_WorkGroupSizeHint); + // Attribute has 3 arguments. - if (!checkAttributeNumArgs(S, Attr, 3)) - return; + if (!checkAttributeNumArgs(S, Attr, 3)) return; unsigned WGSize[3]; for (unsigned i = 0; i < 3; ++i) { @@ -2041,14 +2476,54 @@ static void handleReqdWorkGroupSize(Sema &S, Decl *D, if (E->isTypeDependent() || E->isValueDependent() || !E->isIntegerConstantExpr(ArgNum, S.Context)) { S.Diag(Attr.getLoc(), diag::err_attribute_argument_not_int) - << "reqd_work_group_size" << E->getSourceRange(); + << Attr.getName()->getName() << E->getSourceRange(); return; } WGSize[i] = (unsigned) ArgNum.getZExtValue(); } - D->addAttr(::new (S.Context) ReqdWorkGroupSizeAttr(Attr.getRange(), S.Context, - WGSize[0], WGSize[1], - WGSize[2])); + + if (Attr.getKind() == AttributeList::AT_ReqdWorkGroupSize + && D->hasAttr()) { + ReqdWorkGroupSizeAttr *A = D->getAttr(); + if (!(A->getXDim() == WGSize[0] && + A->getYDim() == WGSize[1] && + A->getZDim() == WGSize[2])) { + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << + Attr.getName(); + } + } + + if (Attr.getKind() == AttributeList::AT_WorkGroupSizeHint + && D->hasAttr()) { + WorkGroupSizeHintAttr *A = D->getAttr(); + if (!(A->getXDim() == WGSize[0] && + A->getYDim() == WGSize[1] && + A->getZDim() == WGSize[2])) { + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << + Attr.getName(); + } + } + + if (Attr.getKind() == AttributeList::AT_ReqdWorkGroupSize) + D->addAttr(::new (S.Context) + ReqdWorkGroupSizeAttr(Attr.getRange(), S.Context, + WGSize[0], WGSize[1], WGSize[2])); + else + D->addAttr(::new (S.Context) + WorkGroupSizeHintAttr(Attr.getRange(), S.Context, + WGSize[0], WGSize[1], WGSize[2])); +} + +SectionAttr *Sema::mergeSectionAttr(Decl *D, SourceRange Range, + StringRef Name) { + if (SectionAttr *ExistingAttr = D->getAttr()) { + if (ExistingAttr->getName() == Name) + return NULL; + Diag(ExistingAttr->getLocation(), diag::warn_mismatched_section); + Diag(Range.getBegin(), diag::note_previous_attribute); + return NULL; + } + return ::new (Context) SectionAttr(Range, Context, Name); } static void handleSectionAttr(Sema &S, Decl *D, const AttributeList &Attr) { @@ -2078,9 +2553,10 @@ static void handleSectionAttr(Sema &S, Decl *D, const AttributeList &Attr) { S.Diag(SE->getLocStart(), diag::err_attribute_section_local_variable); return; } - - D->addAttr(::new (S.Context) SectionAttr(Attr.getRange(), S.Context, - SE->getString())); + SectionAttr *NewAttr = S.mergeSectionAttr(D, Attr.getRange(), + SE->getString()); + if (NewAttr) + D->addAttr(NewAttr); } @@ -2269,26 +2745,19 @@ enum FormatAttrKind { /// getFormatAttrKind - Map from format attribute names to supported format /// types. static FormatAttrKind getFormatAttrKind(StringRef Format) { - // Check for formats that get handled specially. - if (Format == "NSString") - return NSStringFormat; - if (Format == "CFString") - return CFStringFormat; - if (Format == "strftime") - return StrftimeFormat; - - // Otherwise, check for supported formats. - if (Format == "scanf" || Format == "printf" || Format == "printf0" || - Format == "strfmon" || Format == "cmn_err" || Format == "vcmn_err" || - Format == "zcmn_err" || - Format == "kprintf") // OpenBSD. - return SupportedFormat; - - if (Format == "gcc_diag" || Format == "gcc_cdiag" || - Format == "gcc_cxxdiag" || Format == "gcc_tdiag") - return IgnoredFormat; - - return InvalidFormat; + return llvm::StringSwitch(Format) + // Check for formats that get handled specially. + .Case("NSString", NSStringFormat) + .Case("CFString", CFStringFormat) + .Case("strftime", StrftimeFormat) + + // Otherwise, check for supported formats. + .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat) + .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat) + .Case("kprintf", SupportedFormat) // OpenBSD. + + .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat) + .Default(InvalidFormat); } /// Handle __attribute__((init_priority(priority))) attributes based on @@ -2340,6 +2809,29 @@ static void handleInitPriorityAttr(Sema &S, Decl *D, prioritynum)); } +FormatAttr *Sema::mergeFormatAttr(Decl *D, SourceRange Range, StringRef Format, + int FormatIdx, int FirstArg) { + // Check whether we already have an equivalent format attribute. + for (specific_attr_iterator + i = D->specific_attr_begin(), + e = D->specific_attr_end(); + i != e ; ++i) { + FormatAttr *f = *i; + if (f->getType() == Format && + f->getFormatIdx() == FormatIdx && + f->getFirstArg() == FirstArg) { + // If we don't have a valid location for this attribute, adopt the + // location. + if (f->getLocation().isInvalid()) + f->setRange(Range); + return NULL; + } + } + + return ::new (Context) FormatAttr(Range, Context, Format, FormatIdx, + FirstArg); +} + /// Handle __attribute__((format(type,idx,firstarg))) attributes based on /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &Attr) { @@ -2475,26 +2967,11 @@ static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &Attr) { return; } - // Check whether we already have an equivalent format attribute. - for (specific_attr_iterator - i = D->specific_attr_begin(), - e = D->specific_attr_end(); - i != e ; ++i) { - FormatAttr *f = *i; - if (f->getType() == Format && - f->getFormatIdx() == (int)Idx.getZExtValue() && - f->getFirstArg() == (int)FirstArg.getZExtValue()) { - // If we don't have a valid location for this attribute, adopt the - // location. - if (f->getLocation().isInvalid()) - f->setRange(Attr.getRange()); - return; - } - } - - D->addAttr(::new (S.Context) FormatAttr(Attr.getRange(), S.Context, Format, + FormatAttr *NewAttr = S.mergeFormatAttr(D, Attr.getRange(), Format, Idx.getZExtValue(), - FirstArg.getZExtValue())); + FirstArg.getZExtValue()); + if (NewAttr) + D->addAttr(NewAttr); } static void handleTransparentUnionAttr(Sema &S, Decl *D, @@ -2596,37 +3073,41 @@ static void handleAlignedAttr(Sema &S, Decl *D, const AttributeList &Attr) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << 1; return; } - + //FIXME: The C++0x version of this attribute has more limited applicabilty // than GNU's, and should error out when it is used to specify a // weaker alignment, rather than being silently ignored. if (Attr.getNumArgs() == 0) { - D->addAttr(::new (S.Context) AlignedAttr(Attr.getRange(), S.Context, true, 0)); + D->addAttr(::new (S.Context) AlignedAttr(Attr.getRange(), S.Context, + true, 0, Attr.isDeclspecAttribute())); return; } - S.AddAlignedAttr(Attr.getRange(), D, Attr.getArg(0)); + S.AddAlignedAttr(Attr.getRange(), D, Attr.getArg(0), + Attr.isDeclspecAttribute()); } -void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E) { +void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E, + bool isDeclSpec) { // FIXME: Handle pack-expansions here. if (DiagnoseUnexpandedParameterPack(E)) return; if (E->isTypeDependent() || E->isValueDependent()) { // Save dependent expressions in the AST to be instantiated. - D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, true, E)); + D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, true, E, + isDeclSpec)); return; } - + SourceLocation AttrLoc = AttrRange.getBegin(); // FIXME: Cache the number on the Attr object? llvm::APSInt Alignment(32); - ExprResult ICE = - VerifyIntegerConstantExpression(E, &Alignment, - PDiag(diag::err_attribute_argument_not_int) << "aligned", - /*AllowFold*/ false); + ExprResult ICE + = VerifyIntegerConstantExpression(E, &Alignment, + diag::err_aligned_attribute_argument_not_int, + /*AllowFold*/ false); if (ICE.isInvalid()) return; if (!llvm::isPowerOf2_64(Alignment.getZExtValue())) { @@ -2634,14 +3115,26 @@ void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E) { << E->getSourceRange(); return; } + if (isDeclSpec) { + // We've already verified it's a power of 2, now let's make sure it's + // 8192 or less. + if (Alignment.getZExtValue() > 8192) { + Diag(AttrLoc, diag::err_attribute_aligned_greater_than_8192) + << E->getSourceRange(); + return; + } + } - D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, true, ICE.take())); + D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, true, ICE.take(), + isDeclSpec)); } -void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, TypeSourceInfo *TS) { +void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, TypeSourceInfo *TS, + bool isDeclSpec) { // FIXME: Cache the number on the Attr object if non-dependent? // FIXME: Perform checking of type validity - D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, false, TS)); + D->addAttr(::new (Context) AlignedAttr(AttrRange, Context, false, TS, + isDeclSpec)); return; } @@ -2820,9 +3313,15 @@ static void handleNoDebugAttr(Sema &S, Decl *D, const AttributeList &Attr) { if (!checkAttributeNumArgs(S, Attr, 0)) return; - if (!isFunctionOrMethod(D)) { - S.Diag(Attr.getLoc(), diag::warn_attribute_wrong_decl_type) - << Attr.getName() << ExpectedFunction; + if (const VarDecl *VD = dyn_cast(D)) { + if (!VD->hasGlobalStorage()) + S.Diag(Attr.getLoc(), + diag::warn_attribute_requires_functions_or_static_globals) + << Attr.getName(); + } else if (!isFunctionOrMethod(D)) { + S.Diag(Attr.getLoc(), + diag::warn_attribute_requires_functions_or_static_globals) + << Attr.getName(); return; } @@ -3008,22 +3507,22 @@ static void handleCallConvAttr(Sema &S, Decl *D, const AttributeList &Attr) { } switch (Attr.getKind()) { - case AttributeList::AT_fastcall: + case AttributeList::AT_FastCall: D->addAttr(::new (S.Context) FastCallAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_stdcall: + case AttributeList::AT_StdCall: D->addAttr(::new (S.Context) StdCallAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_thiscall: + case AttributeList::AT_ThisCall: D->addAttr(::new (S.Context) ThisCallAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_cdecl: + case AttributeList::AT_CDecl: D->addAttr(::new (S.Context) CDeclAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_pascal: + case AttributeList::AT_Pascal: D->addAttr(::new (S.Context) PascalAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_pcs: { + case AttributeList::AT_Pcs: { Expr *Arg = Attr.getArg(0); StringLiteral *Str = dyn_cast(Arg); if (!Str || !Str->isAscii()) { @@ -3062,7 +3561,7 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC) { return true; if ((attr.getNumArgs() != 0 && - !(attr.getKind() == AttributeList::AT_pcs && attr.getNumArgs() == 1)) || + !(attr.getKind() == AttributeList::AT_Pcs && attr.getNumArgs() == 1)) || attr.getParameterName()) { Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << 0; attr.setInvalid(); @@ -3072,12 +3571,12 @@ bool Sema::CheckCallingConvAttr(const AttributeList &attr, CallingConv &CC) { // TODO: diagnose uses of these conventions on the wrong target. Or, better // move to TargetAttributesSema one day. switch (attr.getKind()) { - case AttributeList::AT_cdecl: CC = CC_C; break; - case AttributeList::AT_fastcall: CC = CC_X86FastCall; break; - case AttributeList::AT_stdcall: CC = CC_X86StdCall; break; - case AttributeList::AT_thiscall: CC = CC_X86ThisCall; break; - case AttributeList::AT_pascal: CC = CC_X86Pascal; break; - case AttributeList::AT_pcs: { + case AttributeList::AT_CDecl: CC = CC_C; break; + case AttributeList::AT_FastCall: CC = CC_X86FastCall; break; + case AttributeList::AT_StdCall: CC = CC_X86StdCall; break; + case AttributeList::AT_ThisCall: CC = CC_X86ThisCall; break; + case AttributeList::AT_Pascal: CC = CC_X86Pascal; break; + case AttributeList::AT_Pcs: { Expr *Arg = attr.getArg(0); StringLiteral *Str = dyn_cast(Arg); if (!Str || !Str->isAscii()) { @@ -3228,7 +3727,7 @@ static void handleNSConsumedAttr(Sema &S, Decl *D, const AttributeList &Attr) { } bool typeOK, cf; - if (Attr.getKind() == AttributeList::AT_ns_consumed) { + if (Attr.getKind() == AttributeList::AT_NSConsumed) { typeOK = isValidSubjectOfNSAttribute(S, param->getType()); cf = false; } else { @@ -3269,7 +3768,7 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, else if (ObjCPropertyDecl *PD = dyn_cast(D)) returnType = PD->getType(); else if (S.getLangOpts().ObjCAutoRefCount && hasDeclarator(D) && - (Attr.getKind() == AttributeList::AT_ns_returns_retained)) + (Attr.getKind() == AttributeList::AT_NSReturnsRetained)) return; // ignore: was handled as a type attribute else if (FunctionDecl *FD = dyn_cast(D)) returnType = FD->getResultType(); @@ -3284,15 +3783,15 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, bool cf; switch (Attr.getKind()) { default: llvm_unreachable("invalid ownership attribute"); - case AttributeList::AT_ns_returns_autoreleased: - case AttributeList::AT_ns_returns_retained: - case AttributeList::AT_ns_returns_not_retained: + case AttributeList::AT_NSReturnsAutoreleased: + case AttributeList::AT_NSReturnsRetained: + case AttributeList::AT_NSReturnsNotRetained: typeOK = isValidSubjectOfNSAttribute(S, returnType); cf = false; break; - case AttributeList::AT_cf_returns_retained: - case AttributeList::AT_cf_returns_not_retained: + case AttributeList::AT_CFReturnsRetained: + case AttributeList::AT_CFReturnsNotRetained: typeOK = isValidSubjectOfCFAttribute(S, returnType); cf = true; break; @@ -3307,23 +3806,23 @@ static void handleNSReturnsRetainedAttr(Sema &S, Decl *D, switch (Attr.getKind()) { default: llvm_unreachable("invalid ownership attribute"); - case AttributeList::AT_ns_returns_autoreleased: + case AttributeList::AT_NSReturnsAutoreleased: D->addAttr(::new (S.Context) NSReturnsAutoreleasedAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_cf_returns_not_retained: + case AttributeList::AT_CFReturnsNotRetained: D->addAttr(::new (S.Context) CFReturnsNotRetainedAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_ns_returns_not_retained: + case AttributeList::AT_NSReturnsNotRetained: D->addAttr(::new (S.Context) NSReturnsNotRetainedAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_cf_returns_retained: + case AttributeList::AT_CFReturnsRetained: D->addAttr(::new (S.Context) CFReturnsRetainedAttr(Attr.getRange(), S.Context)); return; - case AttributeList::AT_ns_returns_retained: + case AttributeList::AT_NSReturnsRetained: D->addAttr(::new (S.Context) NSReturnsRetainedAttr(Attr.getRange(), S.Context)); return; @@ -3336,8 +3835,8 @@ static void handleObjCReturnsInnerPointerAttr(Sema &S, Decl *D, ObjCMethodDecl *method = dyn_cast(D); - if (!isa(method)) { - S.Diag(method->getLocStart(), diag::err_attribute_wrong_decl_type) + if (!method) { + S.Diag(D->getLocStart(), diag::err_attribute_wrong_decl_type) << SourceRange(loc, loc) << attr.getName() << ExpectedMethod; return; } @@ -3367,7 +3866,7 @@ static void handleCFTransferAttr(Sema &S, Decl *D, const AttributeList &A) { return; } - bool IsAudited = (A.getKind() == AttributeList::AT_cf_audited_transfer); + bool IsAudited = (A.getKind() == AttributeList::AT_CFAuditedTransfer); // Check whether there's a conflicting attribute already present. Attr *Existing; @@ -3478,22 +3977,6 @@ static void handleObjCPreciseLifetimeAttr(Sema &S, Decl *D, ObjCPreciseLifetimeAttr(Attr.getRange(), S.Context)); } -static bool isKnownDeclSpecAttr(const AttributeList &Attr) { - switch (Attr.getKind()) { - default: - return false; - case AttributeList::AT_dllimport: - case AttributeList::AT_dllexport: - case AttributeList::AT_uuid: - case AttributeList::AT_deprecated: - case AttributeList::AT_noreturn: - case AttributeList::AT_nothrow: - case AttributeList::AT_naked: - case AttributeList::AT_noinline: - return true; - } -} - //===----------------------------------------------------------------------===// // Microsoft specific attribute handlers. //===----------------------------------------------------------------------===// @@ -3552,6 +4035,45 @@ static void handleUuidAttr(Sema &S, Decl *D, const AttributeList &Attr) { S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << "uuid"; } +static void handleInheritanceAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (S.LangOpts.MicrosoftExt) { + AttributeList::Kind Kind = Attr.getKind(); + if (Kind == AttributeList::AT_SingleInheritance) + D->addAttr( + ::new (S.Context) SingleInheritanceAttr(Attr.getRange(), S.Context)); + else if (Kind == AttributeList::AT_MultipleInheritance) + D->addAttr( + ::new (S.Context) MultipleInheritanceAttr(Attr.getRange(), S.Context)); + else if (Kind == AttributeList::AT_VirtualInheritance) + D->addAttr( + ::new (S.Context) VirtualInheritanceAttr(Attr.getRange(), S.Context)); + } else + S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName(); +} + +static void handlePortabilityAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (S.LangOpts.MicrosoftExt) { + AttributeList::Kind Kind = Attr.getKind(); + if (Kind == AttributeList::AT_Ptr32) + D->addAttr( + ::new (S.Context) Ptr32Attr(Attr.getRange(), S.Context)); + else if (Kind == AttributeList::AT_Ptr64) + D->addAttr( + ::new (S.Context) Ptr64Attr(Attr.getRange(), S.Context)); + else if (Kind == AttributeList::AT_Win64) + D->addAttr( + ::new (S.Context) Win64Attr(Attr.getRange(), S.Context)); + } else + S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName(); +} + +static void handleForceInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) { + if (S.LangOpts.MicrosoftExt) + D->addAttr(::new (S.Context) ForceInlineAttr(Attr.getRange(), S.Context)); + else + S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName(); +} + //===----------------------------------------------------------------------===// // Top Level Sema Entry Points //===----------------------------------------------------------------------===// @@ -3559,9 +4081,9 @@ static void handleUuidAttr(Sema &S, Decl *D, const AttributeList &Attr) { static void ProcessNonInheritableDeclAttr(Sema &S, Scope *scope, Decl *D, const AttributeList &Attr) { switch (Attr.getKind()) { - case AttributeList::AT_device: handleDeviceAttr (S, D, Attr); break; - case AttributeList::AT_host: handleHostAttr (S, D, Attr); break; - case AttributeList::AT_overloadable:handleOverloadableAttr(S, D, Attr); break; + case AttributeList::AT_CUDADevice: handleDeviceAttr (S, D, Attr); break; + case AttributeList::AT_CUDAHost: handleHostAttr (S, D, Attr); break; + case AttributeList::AT_Overloadable:handleOverloadableAttr(S, D, Attr); break; default: break; } @@ -3570,227 +4092,254 @@ static void ProcessNonInheritableDeclAttr(Sema &S, Scope *scope, Decl *D, static void ProcessInheritableDeclAttr(Sema &S, Scope *scope, Decl *D, const AttributeList &Attr) { switch (Attr.getKind()) { - case AttributeList::AT_ibaction: handleIBAction(S, D, Attr); break; - case AttributeList::AT_iboutlet: handleIBOutlet(S, D, Attr); break; - case AttributeList::AT_iboutletcollection: + case AttributeList::AT_IBAction: handleIBAction(S, D, Attr); break; + case AttributeList::AT_IBOutlet: handleIBOutlet(S, D, Attr); break; + case AttributeList::AT_IBOutletCollection: handleIBOutletCollection(S, D, Attr); break; - case AttributeList::AT_address_space: - case AttributeList::AT_opencl_image_access: - case AttributeList::AT_objc_gc: - case AttributeList::AT_vector_size: - case AttributeList::AT_neon_vector_type: - case AttributeList::AT_neon_polyvector_type: + case AttributeList::AT_AddressSpace: + case AttributeList::AT_OpenCLImageAccess: + case AttributeList::AT_ObjCGC: + case AttributeList::AT_VectorSize: + case AttributeList::AT_NeonVectorType: + case AttributeList::AT_NeonPolyVectorType: // Ignore these, these are type attributes, handled by // ProcessTypeAttributes. break; - case AttributeList::AT_device: - case AttributeList::AT_host: - case AttributeList::AT_overloadable: + case AttributeList::AT_CUDADevice: + case AttributeList::AT_CUDAHost: + case AttributeList::AT_Overloadable: // Ignore, this is a non-inheritable attribute, handled // by ProcessNonInheritableDeclAttr. break; - case AttributeList::AT_alias: handleAliasAttr (S, D, Attr); break; - case AttributeList::AT_aligned: handleAlignedAttr (S, D, Attr); break; - case AttributeList::AT_always_inline: + case AttributeList::AT_Alias: handleAliasAttr (S, D, Attr); break; + case AttributeList::AT_Aligned: handleAlignedAttr (S, D, Attr); break; + case AttributeList::AT_AllocSize: handleAllocSizeAttr (S, D, Attr); break; + case AttributeList::AT_AlwaysInline: handleAlwaysInlineAttr (S, D, Attr); break; - case AttributeList::AT_analyzer_noreturn: + case AttributeList::AT_AnalyzerNoReturn: handleAnalyzerNoReturnAttr (S, D, Attr); break; - case AttributeList::AT_annotate: handleAnnotateAttr (S, D, Attr); break; - case AttributeList::AT_availability:handleAvailabilityAttr(S, D, Attr); break; - case AttributeList::AT_carries_dependency: + case AttributeList::AT_TLSModel: handleTLSModelAttr (S, D, Attr); break; + case AttributeList::AT_Annotate: handleAnnotateAttr (S, D, Attr); break; + case AttributeList::AT_Availability:handleAvailabilityAttr(S, D, Attr); break; + case AttributeList::AT_CarriesDependency: handleDependencyAttr (S, D, Attr); break; - case AttributeList::AT_common: handleCommonAttr (S, D, Attr); break; - case AttributeList::AT_constant: handleConstantAttr (S, D, Attr); break; - case AttributeList::AT_constructor: handleConstructorAttr (S, D, Attr); break; - case AttributeList::AT_deprecated: handleDeprecatedAttr (S, D, Attr); break; - case AttributeList::AT_destructor: handleDestructorAttr (S, D, Attr); break; - case AttributeList::AT_ext_vector_type: + case AttributeList::AT_Common: handleCommonAttr (S, D, Attr); break; + case AttributeList::AT_CUDAConstant:handleConstantAttr (S, D, Attr); break; + case AttributeList::AT_Constructor: handleConstructorAttr (S, D, Attr); break; + case AttributeList::AT_Deprecated: + handleAttrWithMessage(S, D, Attr, "deprecated"); + break; + case AttributeList::AT_Destructor: handleDestructorAttr (S, D, Attr); break; + case AttributeList::AT_ExtVectorType: handleExtVectorTypeAttr(S, scope, D, Attr); break; - case AttributeList::AT_format: handleFormatAttr (S, D, Attr); break; - case AttributeList::AT_format_arg: handleFormatArgAttr (S, D, Attr); break; - case AttributeList::AT_global: handleGlobalAttr (S, D, Attr); break; - case AttributeList::AT_gnu_inline: handleGNUInlineAttr (S, D, Attr); break; - case AttributeList::AT_launch_bounds: + case AttributeList::AT_Format: handleFormatAttr (S, D, Attr); break; + case AttributeList::AT_FormatArg: handleFormatArgAttr (S, D, Attr); break; + case AttributeList::AT_CUDAGlobal: handleGlobalAttr (S, D, Attr); break; + case AttributeList::AT_GNUInline: handleGNUInlineAttr (S, D, Attr); break; + case AttributeList::AT_CUDALaunchBounds: handleLaunchBoundsAttr(S, D, Attr); break; - case AttributeList::AT_mode: handleModeAttr (S, D, Attr); break; - case AttributeList::AT_malloc: handleMallocAttr (S, D, Attr); break; - case AttributeList::AT_may_alias: handleMayAliasAttr (S, D, Attr); break; - case AttributeList::AT_nocommon: handleNoCommonAttr (S, D, Attr); break; - case AttributeList::AT_nonnull: handleNonNullAttr (S, D, Attr); break; + case AttributeList::AT_Mode: handleModeAttr (S, D, Attr); break; + case AttributeList::AT_Malloc: handleMallocAttr (S, D, Attr); break; + case AttributeList::AT_MayAlias: handleMayAliasAttr (S, D, Attr); break; + case AttributeList::AT_NoCommon: handleNoCommonAttr (S, D, Attr); break; + case AttributeList::AT_NonNull: handleNonNullAttr (S, D, Attr); break; case AttributeList::AT_ownership_returns: case AttributeList::AT_ownership_takes: case AttributeList::AT_ownership_holds: handleOwnershipAttr (S, D, Attr); break; - case AttributeList::AT_naked: handleNakedAttr (S, D, Attr); break; - case AttributeList::AT_noreturn: handleNoReturnAttr (S, D, Attr); break; - case AttributeList::AT_nothrow: handleNothrowAttr (S, D, Attr); break; - case AttributeList::AT_shared: handleSharedAttr (S, D, Attr); break; - case AttributeList::AT_vecreturn: handleVecReturnAttr (S, D, Attr); break; - - case AttributeList::AT_objc_ownership: + case AttributeList::AT_Cold: handleColdAttr (S, D, Attr); break; + case AttributeList::AT_Hot: handleHotAttr (S, D, Attr); break; + case AttributeList::AT_Naked: handleNakedAttr (S, D, Attr); break; + case AttributeList::AT_NoReturn: handleNoReturnAttr (S, D, Attr); break; + case AttributeList::AT_NoThrow: handleNothrowAttr (S, D, Attr); break; + case AttributeList::AT_CUDAShared: handleSharedAttr (S, D, Attr); break; + case AttributeList::AT_VecReturn: handleVecReturnAttr (S, D, Attr); break; + + case AttributeList::AT_ObjCOwnership: handleObjCOwnershipAttr(S, D, Attr); break; - case AttributeList::AT_objc_precise_lifetime: + case AttributeList::AT_ObjCPreciseLifetime: handleObjCPreciseLifetimeAttr(S, D, Attr); break; - case AttributeList::AT_objc_returns_inner_pointer: + case AttributeList::AT_ObjCReturnsInnerPointer: handleObjCReturnsInnerPointerAttr(S, D, Attr); break; - case AttributeList::AT_ns_bridged: + case AttributeList::AT_NSBridged: handleNSBridgedAttr(S, scope, D, Attr); break; - case AttributeList::AT_cf_audited_transfer: - case AttributeList::AT_cf_unknown_transfer: + case AttributeList::AT_CFAuditedTransfer: + case AttributeList::AT_CFUnknownTransfer: handleCFTransferAttr(S, D, Attr); break; // Checker-specific. - case AttributeList::AT_cf_consumed: - case AttributeList::AT_ns_consumed: handleNSConsumedAttr (S, D, Attr); break; - case AttributeList::AT_ns_consumes_self: + case AttributeList::AT_CFConsumed: + case AttributeList::AT_NSConsumed: handleNSConsumedAttr (S, D, Attr); break; + case AttributeList::AT_NSConsumesSelf: handleNSConsumesSelfAttr(S, D, Attr); break; - case AttributeList::AT_ns_returns_autoreleased: - case AttributeList::AT_ns_returns_not_retained: - case AttributeList::AT_cf_returns_not_retained: - case AttributeList::AT_ns_returns_retained: - case AttributeList::AT_cf_returns_retained: + case AttributeList::AT_NSReturnsAutoreleased: + case AttributeList::AT_NSReturnsNotRetained: + case AttributeList::AT_CFReturnsNotRetained: + case AttributeList::AT_NSReturnsRetained: + case AttributeList::AT_CFReturnsRetained: handleNSReturnsRetainedAttr(S, D, Attr); break; - case AttributeList::AT_reqd_work_group_size: - handleReqdWorkGroupSize(S, D, Attr); break; + case AttributeList::AT_WorkGroupSizeHint: + case AttributeList::AT_ReqdWorkGroupSize: + handleWorkGroupSize(S, D, Attr); break; - case AttributeList::AT_init_priority: + case AttributeList::AT_InitPriority: handleInitPriorityAttr(S, D, Attr); break; - case AttributeList::AT_packed: handlePackedAttr (S, D, Attr); break; - case AttributeList::AT_ms_struct: handleMsStructAttr (S, D, Attr); break; - case AttributeList::AT_section: handleSectionAttr (S, D, Attr); break; - case AttributeList::AT_unavailable: handleUnavailableAttr (S, D, Attr); break; - case AttributeList::AT_objc_arc_weak_reference_unavailable: + case AttributeList::AT_Packed: handlePackedAttr (S, D, Attr); break; + case AttributeList::AT_Section: handleSectionAttr (S, D, Attr); break; + case AttributeList::AT_Unavailable: + handleAttrWithMessage(S, D, Attr, "unavailable"); + break; + case AttributeList::AT_ArcWeakrefUnavailable: handleArcWeakrefUnavailableAttr (S, D, Attr); break; - case AttributeList::AT_objc_root_class: + case AttributeList::AT_ObjCRootClass: handleObjCRootClassAttr(S, D, Attr); break; - case AttributeList::AT_objc_requires_property_definitions: + case AttributeList::AT_ObjCRequiresPropertyDefs: handleObjCRequiresPropertyDefsAttr (S, D, Attr); break; - case AttributeList::AT_unused: handleUnusedAttr (S, D, Attr); break; - case AttributeList::AT_returns_twice: + case AttributeList::AT_Unused: handleUnusedAttr (S, D, Attr); break; + case AttributeList::AT_ReturnsTwice: handleReturnsTwiceAttr(S, D, Attr); break; - case AttributeList::AT_used: handleUsedAttr (S, D, Attr); break; - case AttributeList::AT_visibility: handleVisibilityAttr (S, D, Attr); break; - case AttributeList::AT_warn_unused_result: handleWarnUnusedResult(S, D, Attr); + case AttributeList::AT_Used: handleUsedAttr (S, D, Attr); break; + case AttributeList::AT_Visibility: handleVisibilityAttr (S, D, Attr); break; + case AttributeList::AT_WarnUnusedResult: handleWarnUnusedResult(S, D, Attr); break; - case AttributeList::AT_weak: handleWeakAttr (S, D, Attr); break; - case AttributeList::AT_weakref: handleWeakRefAttr (S, D, Attr); break; - case AttributeList::AT_weak_import: handleWeakImportAttr (S, D, Attr); break; - case AttributeList::AT_transparent_union: + case AttributeList::AT_Weak: handleWeakAttr (S, D, Attr); break; + case AttributeList::AT_WeakRef: handleWeakRefAttr (S, D, Attr); break; + case AttributeList::AT_WeakImport: handleWeakImportAttr (S, D, Attr); break; + case AttributeList::AT_TransparentUnion: handleTransparentUnionAttr(S, D, Attr); break; - case AttributeList::AT_objc_exception: + case AttributeList::AT_ObjCException: handleObjCExceptionAttr(S, D, Attr); break; - case AttributeList::AT_objc_method_family: + case AttributeList::AT_ObjCMethodFamily: handleObjCMethodFamilyAttr(S, D, Attr); break; - case AttributeList::AT_NSObject: handleObjCNSObject (S, D, Attr); break; - case AttributeList::AT_blocks: handleBlocksAttr (S, D, Attr); break; - case AttributeList::AT_sentinel: handleSentinelAttr (S, D, Attr); break; - case AttributeList::AT_const: handleConstAttr (S, D, Attr); break; - case AttributeList::AT_pure: handlePureAttr (S, D, Attr); break; - case AttributeList::AT_cleanup: handleCleanupAttr (S, D, Attr); break; - case AttributeList::AT_nodebug: handleNoDebugAttr (S, D, Attr); break; - case AttributeList::AT_noinline: handleNoInlineAttr (S, D, Attr); break; - case AttributeList::AT_regparm: handleRegparmAttr (S, D, Attr); break; + case AttributeList::AT_ObjCNSObject:handleObjCNSObject (S, D, Attr); break; + case AttributeList::AT_Blocks: handleBlocksAttr (S, D, Attr); break; + case AttributeList::AT_Sentinel: handleSentinelAttr (S, D, Attr); break; + case AttributeList::AT_Const: handleConstAttr (S, D, Attr); break; + case AttributeList::AT_Pure: handlePureAttr (S, D, Attr); break; + case AttributeList::AT_Cleanup: handleCleanupAttr (S, D, Attr); break; + case AttributeList::AT_NoDebug: handleNoDebugAttr (S, D, Attr); break; + case AttributeList::AT_NoInline: handleNoInlineAttr (S, D, Attr); break; + case AttributeList::AT_Regparm: handleRegparmAttr (S, D, Attr); break; case AttributeList::IgnoredAttribute: // Just ignore break; - case AttributeList::AT_no_instrument_function: // Interacts with -pg. + case AttributeList::AT_NoInstrumentFunction: // Interacts with -pg. handleNoInstrumentFunctionAttr(S, D, Attr); break; - case AttributeList::AT_stdcall: - case AttributeList::AT_cdecl: - case AttributeList::AT_fastcall: - case AttributeList::AT_thiscall: - case AttributeList::AT_pascal: - case AttributeList::AT_pcs: + case AttributeList::AT_StdCall: + case AttributeList::AT_CDecl: + case AttributeList::AT_FastCall: + case AttributeList::AT_ThisCall: + case AttributeList::AT_Pascal: + case AttributeList::AT_Pcs: handleCallConvAttr(S, D, Attr); break; - case AttributeList::AT_opencl_kernel_function: + case AttributeList::AT_OpenCLKernel: handleOpenCLKernelAttr(S, D, Attr); break; - case AttributeList::AT_uuid: + + // Microsoft attributes: + case AttributeList::AT_MsStruct: + handleMsStructAttr(S, D, Attr); + break; + case AttributeList::AT_Uuid: handleUuidAttr(S, D, Attr); break; + case AttributeList::AT_SingleInheritance: + case AttributeList::AT_MultipleInheritance: + case AttributeList::AT_VirtualInheritance: + handleInheritanceAttr(S, D, Attr); + break; + case AttributeList::AT_Win64: + case AttributeList::AT_Ptr32: + case AttributeList::AT_Ptr64: + handlePortabilityAttr(S, D, Attr); + break; + case AttributeList::AT_ForceInline: + handleForceInlineAttr(S, D, Attr); + break; // Thread safety attributes: - case AttributeList::AT_guarded_var: + case AttributeList::AT_GuardedVar: handleGuardedVarAttr(S, D, Attr); break; - case AttributeList::AT_pt_guarded_var: - handleGuardedVarAttr(S, D, Attr, /*pointer = */true); + case AttributeList::AT_PtGuardedVar: + handlePtGuardedVarAttr(S, D, Attr); break; - case AttributeList::AT_scoped_lockable: - handleLockableAttr(S, D, Attr, /*scoped = */true); + case AttributeList::AT_ScopedLockable: + handleScopedLockableAttr(S, D, Attr); break; - case AttributeList::AT_no_address_safety_analysis: + case AttributeList::AT_NoAddressSafetyAnalysis: handleNoAddressSafetyAttr(S, D, Attr); break; - case AttributeList::AT_no_thread_safety_analysis: + case AttributeList::AT_NoThreadSafetyAnalysis: handleNoThreadSafetyAttr(S, D, Attr); break; - case AttributeList::AT_lockable: + case AttributeList::AT_Lockable: handleLockableAttr(S, D, Attr); break; - case AttributeList::AT_guarded_by: + case AttributeList::AT_GuardedBy: handleGuardedByAttr(S, D, Attr); break; - case AttributeList::AT_pt_guarded_by: - handleGuardedByAttr(S, D, Attr, /*pointer = */true); + case AttributeList::AT_PtGuardedBy: + handlePtGuardedByAttr(S, D, Attr); break; - case AttributeList::AT_exclusive_lock_function: - handleLockFunAttr(S, D, Attr, /*exclusive = */true); + case AttributeList::AT_ExclusiveLockFunction: + handleExclusiveLockFunctionAttr(S, D, Attr); break; - case AttributeList::AT_exclusive_locks_required: - handleLocksRequiredAttr(S, D, Attr, /*exclusive = */true); + case AttributeList::AT_ExclusiveLocksRequired: + handleExclusiveLocksRequiredAttr(S, D, Attr); break; - case AttributeList::AT_exclusive_trylock_function: - handleTrylockFunAttr(S, D, Attr, /*exclusive = */true); + case AttributeList::AT_ExclusiveTrylockFunction: + handleExclusiveTrylockFunctionAttr(S, D, Attr); break; - case AttributeList::AT_lock_returned: + case AttributeList::AT_LockReturned: handleLockReturnedAttr(S, D, Attr); break; - case AttributeList::AT_locks_excluded: + case AttributeList::AT_LocksExcluded: handleLocksExcludedAttr(S, D, Attr); break; - case AttributeList::AT_shared_lock_function: - handleLockFunAttr(S, D, Attr); + case AttributeList::AT_SharedLockFunction: + handleSharedLockFunctionAttr(S, D, Attr); break; - case AttributeList::AT_shared_locks_required: - handleLocksRequiredAttr(S, D, Attr); + case AttributeList::AT_SharedLocksRequired: + handleSharedLocksRequiredAttr(S, D, Attr); break; - case AttributeList::AT_shared_trylock_function: - handleTrylockFunAttr(S, D, Attr); + case AttributeList::AT_SharedTrylockFunction: + handleSharedTrylockFunctionAttr(S, D, Attr); break; - case AttributeList::AT_unlock_function: + case AttributeList::AT_UnlockFunction: handleUnlockFunAttr(S, D, Attr); break; - case AttributeList::AT_acquired_before: - handleAcquireOrderAttr(S, D, Attr, /*before = */true); + case AttributeList::AT_AcquiredBefore: + handleAcquiredBeforeAttr(S, D, Attr); break; - case AttributeList::AT_acquired_after: - handleAcquireOrderAttr(S, D, Attr, /*before = */false); + case AttributeList::AT_AcquiredAfter: + handleAcquiredAfterAttr(S, D, Attr); break; default: // Ask target about the attribute. const TargetAttributesSema &TargetAttrs = S.getTargetAttributesSema(); if (!TargetAttrs.ProcessDeclAttribute(scope, D, Attr, S)) - S.Diag(Attr.getLoc(), diag::warn_unknown_attribute_ignored) - << Attr.getName(); + S.Diag(Attr.getLoc(), Attr.isDeclspecAttribute() ? + diag::warn_unhandled_ms_attribute_ignored : + diag::warn_unknown_attribute_ignored) << Attr.getName(); break; } } @@ -3805,8 +4354,11 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, if (Attr.isInvalid()) return; - if (Attr.isDeclspecAttribute() && !isKnownDeclSpecAttr(Attr)) - // FIXME: Try to deal with other __declspec attributes! + // Type attributes are still treated as declaration attributes by + // ParseMicrosoftTypeAttributes and ParseBorlandTypeAttributes. We don't + // want to process them, however, because we will simply warn about ignoring + // them. So instead, we will bail out early. + if (Attr.isMSTypespecAttribute()) return; if (NonInheritable) @@ -3840,7 +4392,7 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D, bool Sema::ProcessAccessDeclAttributeList(AccessSpecDecl *ASDecl, const AttributeList *AttrList) { for (const AttributeList* l = AttrList; l; l = l->getNext()) { - if (l->getKind() == AttributeList::AT_annotate) { + if (l->getKind() == AttributeList::AT_Annotate) { handleAnnotateAttr(*this, ASDecl, *l); } else { Diag(l->getLoc(), diag::err_only_annotate_after_access_spec); @@ -3880,7 +4432,7 @@ void Sema::checkUnusedDeclAttributes(Declarator &D) { } /// DeclClonePragmaWeak - clone existing decl (maybe definition), -/// #pragma weak needs a non-definition decl and source may not have one +/// \#pragma weak needs a non-definition decl and source may not have one. NamedDecl * Sema::DeclClonePragmaWeak(NamedDecl *ND, IdentifierInfo *II, SourceLocation Loc) { assert(isa(ND) || isa(ND)); @@ -3930,7 +4482,7 @@ NamedDecl * Sema::DeclClonePragmaWeak(NamedDecl *ND, IdentifierInfo *II, return NewD; } -/// DeclApplyPragmaWeak - A declaration (maybe definition) needs #pragma weak +/// DeclApplyPragmaWeak - A declaration (maybe definition) needs \#pragma weak /// applied to it, possibly with an alias. void Sema::DeclApplyPragmaWeak(Scope *S, NamedDecl *ND, WeakInfo &W) { if (W.getUsed()) return; // only do this once @@ -4018,7 +4570,7 @@ static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &diag, } if (S.getLangOpts().ObjCAutoRefCount) if (const FunctionDecl *FD = dyn_cast(decl)) { - // FIXME. we may want to supress diagnostics for all + // FIXME: we may want to suppress diagnostics for all // kind of forbidden type messages on unavailable functions. if (FD->hasAttr() && diag.getForbiddenTypeDiagnostic() == @@ -4033,57 +4585,29 @@ static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &diag, diag.Triggered = true; } -// This duplicates a vector push_back but hides the need to know the -// size of the type. -void Sema::DelayedDiagnostics::add(const DelayedDiagnostic &diag) { - assert(StackSize <= StackCapacity); - - // Grow the stack if necessary. - if (StackSize == StackCapacity) { - unsigned newCapacity = 2 * StackCapacity + 2; - char *newBuffer = new char[newCapacity * sizeof(DelayedDiagnostic)]; - const char *oldBuffer = (const char*) Stack; - - if (StackCapacity) - memcpy(newBuffer, oldBuffer, StackCapacity * sizeof(DelayedDiagnostic)); - - delete[] oldBuffer; - Stack = reinterpret_cast(newBuffer); - StackCapacity = newCapacity; - } - - assert(StackSize < StackCapacity); - new (&Stack[StackSize++]) DelayedDiagnostic(diag); -} - -void Sema::DelayedDiagnostics::popParsingDecl(Sema &S, ParsingDeclState state, - Decl *decl) { - DelayedDiagnostics &DD = S.DelayedDiagnostics; - - // Check the invariants. - assert(DD.StackSize >= state.SavedStackSize); - assert(state.SavedStackSize >= DD.ActiveStackBase); - assert(DD.ParsingDepth > 0); - - // Drop the parsing depth. - DD.ParsingDepth--; - - // If there are no active diagnostics, we're done. - if (DD.StackSize == DD.ActiveStackBase) - return; - - // We only want to actually emit delayed diagnostics when we - // successfully parsed a decl. - if (decl) { - // We emit all the active diagnostics, not just those starting - // from the saved state. The idea is this: we get one push for a - // decl spec and another for each declarator; in a decl group like: - // deprecated_typedef foo, *bar, baz(); - // only the declarator pops will be passed decls. This is correct; - // we really do need to consider delayed diagnostics from the decl spec - // for each of the different declarations. - for (unsigned i = DD.ActiveStackBase, e = DD.StackSize; i != e; ++i) { - DelayedDiagnostic &diag = DD.Stack[i]; +void Sema::PopParsingDeclaration(ParsingDeclState state, Decl *decl) { + assert(DelayedDiagnostics.getCurrentPool()); + DelayedDiagnosticPool &poppedPool = *DelayedDiagnostics.getCurrentPool(); + DelayedDiagnostics.popWithoutEmitting(state); + + // When delaying diagnostics to run in the context of a parsed + // declaration, we only want to actually emit anything if parsing + // succeeds. + if (!decl) return; + + // We emit all the active diagnostics in this pool or any of its + // parents. In general, we'll get one pool for the decl spec + // and a child pool for each declarator; in a decl group like: + // deprecated_typedef foo, *bar, baz(); + // only the declarator pops will be passed decls. This is correct; + // we really do need to consider delayed diagnostics from the decl spec + // for each of the different declarations. + const DelayedDiagnosticPool *pool = &poppedPool; + do { + for (DelayedDiagnosticPool::pool_iterator + i = pool->pool_begin(), e = pool->pool_end(); i != e; ++i) { + // This const_cast is a bit lame. Really, Triggered should be mutable. + DelayedDiagnostic &diag = const_cast(*i); if (diag.Triggered) continue; @@ -4091,25 +4615,28 @@ void Sema::DelayedDiagnostics::popParsingDecl(Sema &S, ParsingDeclState state, case DelayedDiagnostic::Deprecation: // Don't bother giving deprecation diagnostics if the decl is invalid. if (!decl->isInvalidDecl()) - S.HandleDelayedDeprecationCheck(diag, decl); + HandleDelayedDeprecationCheck(diag, decl); break; case DelayedDiagnostic::Access: - S.HandleDelayedAccessCheck(diag, decl); + HandleDelayedAccessCheck(diag, decl); break; case DelayedDiagnostic::ForbiddenType: - handleDelayedForbiddenType(S, diag, decl); + handleDelayedForbiddenType(*this, diag, decl); break; } } - } - - // Destroy all the delayed diagnostics we're about to pop off. - for (unsigned i = state.SavedStackSize, e = DD.StackSize; i != e; ++i) - DD.Stack[i].Destroy(); + } while ((pool = pool->getParent())); +} - DD.StackSize = state.SavedStackSize; +/// Given a set of delayed diagnostics, re-emit them as if they had +/// been delayed in the current context instead of in the given pool. +/// Essentially, this just moves them to the current pool. +void Sema::redelayDiagnostics(DelayedDiagnosticPool &pool) { + DelayedDiagnosticPool *curPool = DelayedDiagnostics.getCurrentPool(); + assert(curPool && "re-emitting in undelayed context not supported"); + curPool->steal(pool); } static bool isDeclDeprecated(Decl *D) { @@ -4123,24 +4650,36 @@ static bool isDeclDeprecated(Decl *D) { return false; } +static void +DoEmitDeprecationWarning(Sema &S, const NamedDecl *D, StringRef Message, + SourceLocation Loc, + const ObjCInterfaceDecl *UnknownObjCClass) { + DeclarationName Name = D->getDeclName(); + if (!Message.empty()) { + S.Diag(Loc, diag::warn_deprecated_message) << Name << Message; + S.Diag(D->getLocation(), + isa(D) ? diag::note_method_declared_at + : diag::note_previous_decl) << Name; + } else if (!UnknownObjCClass) { + S.Diag(Loc, diag::warn_deprecated) << D->getDeclName(); + S.Diag(D->getLocation(), + isa(D) ? diag::note_method_declared_at + : diag::note_previous_decl) << Name; + } else { + S.Diag(Loc, diag::warn_deprecated_fwdclass_message) << Name; + S.Diag(UnknownObjCClass->getLocation(), diag::note_forward_class); + } +} + void Sema::HandleDelayedDeprecationCheck(DelayedDiagnostic &DD, Decl *Ctx) { if (isDeclDeprecated(Ctx)) return; DD.Triggered = true; - if (!DD.getDeprecationMessage().empty()) - Diag(DD.Loc, diag::warn_deprecated_message) - << DD.getDeprecationDecl()->getDeclName() - << DD.getDeprecationMessage(); - else if (DD.getUnknownObjCClass()) { - Diag(DD.Loc, diag::warn_deprecated_fwdclass_message) - << DD.getDeprecationDecl()->getDeclName(); - Diag(DD.getUnknownObjCClass()->getLocation(), diag::note_forward_class); - } - else - Diag(DD.Loc, diag::warn_deprecated) - << DD.getDeprecationDecl()->getDeclName(); + DoEmitDeprecationWarning(*this, DD.getDeprecationDecl(), + DD.getDeprecationMessage(), DD.Loc, + DD.getUnknownObjCClass()); } void Sema::EmitDeprecationWarning(NamedDecl *D, StringRef Message, @@ -4157,15 +4696,5 @@ void Sema::EmitDeprecationWarning(NamedDecl *D, StringRef Message, // Otherwise, don't warn if our current context is deprecated. if (isDeclDeprecated(cast(getCurLexicalContext()))) return; - if (!Message.empty()) - Diag(Loc, diag::warn_deprecated_message) << D->getDeclName() - << Message; - else { - if (!UnknownObjCClass) - Diag(Loc, diag::warn_deprecated) << D->getDeclName(); - else { - Diag(Loc, diag::warn_deprecated_fwdclass_message) << D->getDeclName(); - Diag(UnknownObjCClass->getLocation(), diag::note_forward_class); - } - } + DoEmitDeprecationWarning(*this, D, Message, Loc, UnknownObjCClass); } diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index c861072..1d45a68 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -23,6 +23,7 @@ #include "clang/AST/CharUnits.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/DeclVisitor.h" +#include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" @@ -127,8 +128,8 @@ namespace { void Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc, CXXMethodDecl *Method) { - // If we have an MSAny or unknown spec already, don't bother. - if (!Method || ComputedEST == EST_MSAny || ComputedEST == EST_Delayed) + // If we have an MSAny spec already, don't bother. + if (!Method || ComputedEST == EST_MSAny) return; const FunctionProtoType *Proto @@ -140,7 +141,7 @@ void Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc, ExceptionSpecificationType EST = Proto->getExceptionSpecType(); // If this function can throw any exceptions, make a note of that. - if (EST == EST_Delayed || EST == EST_MSAny || EST == EST_None) { + if (EST == EST_MSAny || EST == EST_None) { ClearExceptions(); ComputedEST = EST; return; @@ -197,7 +198,7 @@ void Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc, } void Sema::ImplicitExceptionSpecification::CalledExpr(Expr *E) { - if (!E || ComputedEST == EST_MSAny || ComputedEST == EST_Delayed) + if (!E || ComputedEST == EST_MSAny) return; // FIXME: @@ -667,9 +668,9 @@ static bool CheckConstexprParameterTypes(Sema &SemaRef, SourceLocation ParamLoc = PD->getLocation(); if (!(*i)->isDependentType() && SemaRef.RequireLiteralType(ParamLoc, *i, - SemaRef.PDiag(diag::err_constexpr_non_literal_param) - << ArgIndex+1 << PD->getSourceRange() - << isa(FD))) + diag::err_constexpr_non_literal_param, + ArgIndex+1, PD->getSourceRange(), + isa(FD))) return false; } return true; @@ -725,7 +726,7 @@ bool Sema::CheckConstexprFunctionDecl(const FunctionDecl *NewFD) { QualType RT = NewFD->getResultType(); if (!RT->isDependentType() && RequireLiteralType(NewFD->getLocation(), RT, - PDiag(diag::err_constexpr_non_literal_return))) + diag::err_constexpr_non_literal_return)) return false; } @@ -920,7 +921,7 @@ bool Sema::CheckConstexprFunctionBody(const FunctionDecl *Dcl, Stmt *Body) { unsigned Fields = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++Fields) { - if ((*I)->isAnonymousStructOrUnion()) { + if (I->isAnonymousStructOrUnion()) { AnyAnonStructUnionMembers = true; break; } @@ -1055,8 +1056,7 @@ Sema::CheckBaseSpecifier(CXXRecordDecl *Class, // The class-name in a base-specifier shall not be an incompletely // defined class. if (RequireCompleteType(BaseLoc, BaseType, - PDiag(diag::err_incomplete_base_class) - << SpecifierRange)) { + diag::err_incomplete_base_class, SpecifierRange)) { Class->setInvalidDecl(); return 0; } @@ -1119,6 +1119,8 @@ Sema::ActOnBaseSpecifier(Decl *classdecl, SourceRange SpecifierRange, Virtual, Access, TInfo, EllipsisLoc)) return BaseSpec; + else + Class->setInvalidDecl(); return true; } @@ -1403,32 +1405,50 @@ bool Sema::ActOnAccessSpecifier(AccessSpecifier Access, return ProcessAccessDeclAttributeList(ASDecl, Attrs); } -/// CheckOverrideControl - Check C++0x override control semantics. -void Sema::CheckOverrideControl(const Decl *D) { +/// CheckOverrideControl - Check C++11 override control semantics. +void Sema::CheckOverrideControl(Decl *D) { const CXXMethodDecl *MD = dyn_cast(D); - if (!MD || !MD->isVirtual()) + + // Do we know which functions this declaration might be overriding? + bool OverridesAreKnown = !MD || + (!MD->getParent()->hasAnyDependentBases() && + !MD->getType()->isDependentType()); + + if (!MD || !MD->isVirtual()) { + if (OverridesAreKnown) { + if (OverrideAttr *OA = D->getAttr()) { + Diag(OA->getLocation(), + diag::override_keyword_only_allowed_on_virtual_member_functions) + << "override" << FixItHint::CreateRemoval(OA->getLocation()); + D->dropAttr(); + } + if (FinalAttr *FA = D->getAttr()) { + Diag(FA->getLocation(), + diag::override_keyword_only_allowed_on_virtual_member_functions) + << "final" << FixItHint::CreateRemoval(FA->getLocation()); + D->dropAttr(); + } + } return; + } - if (MD->isDependentContext()) + if (!OverridesAreKnown) return; - // C++0x [class.virtual]p3: - // If a virtual function is marked with the virt-specifier override and does - // not override a member function of a base class, - // the program is ill-formed. - bool HasOverriddenMethods = + // C++11 [class.virtual]p5: + // If a virtual function is marked with the virt-specifier override and + // does not override a member function of a base class, the program is + // ill-formed. + bool HasOverriddenMethods = MD->begin_overridden_methods() != MD->end_overridden_methods(); - if (MD->hasAttr() && !HasOverriddenMethods) { - Diag(MD->getLocation(), - diag::err_function_marked_override_not_overriding) + if (MD->hasAttr() && !HasOverriddenMethods) + Diag(MD->getLocation(), diag::err_function_marked_override_not_overriding) << MD->getDeclName(); - return; - } } -/// CheckIfOverriddenFunctionIsMarkedFinal - Checks whether a virtual member +/// CheckIfOverriddenFunctionIsMarkedFinal - Checks whether a virtual member /// function overrides a virtual member function marked 'final', according to -/// C++0x [class.virtual]p3. +/// C++11 [class.virtual]p4. bool Sema::CheckIfOverriddenFunctionIsMarkedFinal(const CXXMethodDecl *New, const CXXMethodDecl *Old) { if (!Old->hasAttr()) @@ -1440,16 +1460,26 @@ bool Sema::CheckIfOverriddenFunctionIsMarkedFinal(const CXXMethodDecl *New, return true; } +static bool InitializationHasSideEffects(const FieldDecl &FD) { + const Type *T = FD.getType()->getBaseElementTypeUnsafe(); + // FIXME: Destruction of ObjC lifetime types has side-effects. + if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) + return !RD->isCompleteDefinition() || + !RD->hasTrivialDefaultConstructor() || + !RD->hasTrivialDestructor(); + return false; +} + /// ActOnCXXMemberDeclarator - This is invoked when a C++ class member /// declarator is parsed. 'AS' is the access specifier, 'BW' specifies the /// bitfield width if there is one, 'InitExpr' specifies the initializer if -/// one has been parsed, and 'HasDeferredInit' is true if an initializer is -/// present but parsing it has been deferred. +/// one has been parsed, and 'InitStyle' is set if an in-class initializer is +/// present (but parsing it has been deferred). Decl * Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D, MultiTemplateParamsArg TemplateParameterLists, Expr *BW, const VirtSpecifiers &VS, - bool HasDeferredInit) { + InClassInitStyle InitStyle) { const DeclSpec &DS = D.getDeclSpec(); DeclarationNameInfo NameInfo = GetNameForDeclarator(D); DeclarationName Name = NameInfo.getName(); @@ -1507,12 +1537,12 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D, CXXScopeSpec &SS = D.getCXXScopeSpec(); // Data members must have identifiers for names. - if (Name.getNameKind() != DeclarationName::Identifier) { + if (!Name.isIdentifier()) { Diag(Loc, diag::err_bad_variable_name) << Name; return 0; } - + IdentifierInfo *II = Name.getAsIdentifierInfo(); // Member field could not be with "template" keyword. @@ -1553,10 +1583,10 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D, } Member = HandleField(S, cast(CurContext), Loc, D, BitWidth, - HasDeferredInit, AS); + InitStyle, AS); assert(Member && "HandleField never returns null"); } else { - assert(!HasDeferredInit); + assert(InitStyle == ICIS_NoInit); Member = HandleDeclarator(S, D, move(TemplateParameterLists)); if (!Member) { @@ -1596,37 +1626,39 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D, FunTmpl->getTemplatedDecl()->setAccess(AS); } - if (VS.isOverrideSpecified()) { - CXXMethodDecl *MD = dyn_cast(Member); - if (!MD || !MD->isVirtual()) { - Diag(Member->getLocStart(), - diag::override_keyword_only_allowed_on_virtual_member_functions) - << "override" << FixItHint::CreateRemoval(VS.getOverrideLoc()); - } else - MD->addAttr(new (Context) OverrideAttr(VS.getOverrideLoc(), Context)); - } - if (VS.isFinalSpecified()) { - CXXMethodDecl *MD = dyn_cast(Member); - if (!MD || !MD->isVirtual()) { - Diag(Member->getLocStart(), - diag::override_keyword_only_allowed_on_virtual_member_functions) - << "final" << FixItHint::CreateRemoval(VS.getFinalLoc()); - } else - MD->addAttr(new (Context) FinalAttr(VS.getFinalLoc(), Context)); - } + if (VS.isOverrideSpecified()) + Member->addAttr(new (Context) OverrideAttr(VS.getOverrideLoc(), Context)); + if (VS.isFinalSpecified()) + Member->addAttr(new (Context) FinalAttr(VS.getFinalLoc(), Context)); if (VS.getLastLocation().isValid()) { // Update the end location of a method that has a virt-specifiers. if (CXXMethodDecl *MD = dyn_cast_or_null(Member)) MD->setRangeEnd(VS.getLastLocation()); } - + CheckOverrideControl(Member); assert((Name || isInstField) && "No identifier for non-field ?"); - if (isInstField) - FieldCollector->Add(cast(Member)); + if (isInstField) { + FieldDecl *FD = cast(Member); + FieldCollector->Add(FD); + + if (Diags.getDiagnosticLevel(diag::warn_unused_private_field, + FD->getLocation()) + != DiagnosticsEngine::Ignored) { + // Remember all explicit private FieldDecls that have a name, no side + // effects and are not part of a dependent type declaration. + if (!FD->isImplicit() && FD->getDeclName() && + FD->getAccess() == AS_private && + !FD->hasAttr() && + !FD->getParent()->isDependentContext() && + !InitializationHasSideEffects(*FD)) + UnusedPrivateFields.insert(FD); + } + } + return Member; } @@ -1635,9 +1667,11 @@ Sema::ActOnCXXMemberDeclarator(Scope *S, AccessSpecifier AS, Declarator &D, /// instantiating an in-class initializer in a class template. Such actions /// are deferred until the class is complete. void -Sema::ActOnCXXInClassMemberInitializer(Decl *D, SourceLocation EqualLoc, +Sema::ActOnCXXInClassMemberInitializer(Decl *D, SourceLocation InitLoc, Expr *InitExpr) { FieldDecl *FD = cast(D); + assert(FD->getInClassInitStyle() != ICIS_NoInit && + "must set init style when field is created"); if (!InitExpr) { FD->setInvalidDecl(); @@ -1660,9 +1694,9 @@ Sema::ActOnCXXInClassMemberInitializer(Decl *D, SourceLocation EqualLoc, Expr **Inits = &InitExpr; unsigned NumInits = 1; InitializedEntity Entity = InitializedEntity::InitializeMember(FD); - InitializationKind Kind = EqualLoc.isInvalid() + InitializationKind Kind = FD->getInClassInitStyle() == ICIS_ListInit ? InitializationKind::CreateDirectList(InitExpr->getLocStart()) - : InitializationKind::CreateCopy(InitExpr->getLocStart(), EqualLoc); + : InitializationKind::CreateCopy(InitExpr->getLocStart(), InitLoc); InitializationSequence Seq(*this, Entity, Kind, Inits, NumInits); Init = Seq.Perform(*this, Entity, Kind, MultiExprArg(Inits, NumInits)); if (Init.isInvalid()) { @@ -1670,7 +1704,7 @@ Sema::ActOnCXXInClassMemberInitializer(Decl *D, SourceLocation EqualLoc, return; } - CheckImplicitConversions(Init.get(), EqualLoc); + CheckImplicitConversions(Init.get(), InitLoc); } // C++0x [class.base.init]p7: @@ -2010,73 +2044,95 @@ static void CheckForDanglingReferenceOrPointer(Sema &S, ValueDecl *Member, << (unsigned)IsPointer; } -/// Checks an initializer expression for use of uninitialized fields, such as -/// containing the field that is being initialized. Returns true if there is an -/// uninitialized field was used an updates the SourceLocation parameter; false -/// otherwise. -static bool InitExprContainsUninitializedFields(const Stmt *S, - const ValueDecl *LhsField, - SourceLocation *L) { - assert(isa(LhsField) || isa(LhsField)); - - if (isa(S)) { - // Do not descend into function calls or constructors, as the use - // of an uninitialized field may be valid. One would have to inspect - // the contents of the function/ctor to determine if it is safe or not. - // i.e. Pass-by-value is never safe, but pass-by-reference and pointers - // may be safe, depending on what the function/ctor does. - return false; - } - if (const MemberExpr *ME = dyn_cast(S)) { - const NamedDecl *RhsField = ME->getMemberDecl(); - - if (const VarDecl *VD = dyn_cast(RhsField)) { - // The member expression points to a static data member. - assert(VD->isStaticDataMember() && - "Member points to non-static data member!"); - (void)VD; - return false; +namespace { + class UninitializedFieldVisitor + : public EvaluatedExprVisitor { + Sema &S; + ValueDecl *VD; + public: + typedef EvaluatedExprVisitor Inherited; + UninitializedFieldVisitor(Sema &S, ValueDecl *VD) : Inherited(S.Context), + S(S), VD(VD) { } - - if (isa(RhsField)) { - // The member expression points to an enum. - return false; + + void HandleExpr(Expr *E) { + if (!E) return; + + // Expressions like x(x) sometimes lack the surrounding expressions + // but need to be checked anyways. + HandleValue(E); + Visit(E); } - if (RhsField == LhsField) { - // Initializing a field with itself. Throw a warning. - // But wait; there are exceptions! - // Exception #1: The field may not belong to this record. - // e.g. Foo(const Foo& rhs) : A(rhs.A) {} - const Expr *base = ME->getBase(); - if (base != NULL && !isa(base->IgnoreParenCasts())) { - // Even though the field matches, it does not belong to this record. - return false; + void HandleValue(Expr *E) { + E = E->IgnoreParens(); + + if (MemberExpr *ME = dyn_cast(E)) { + if (isa(ME->getMemberDecl())) + return; + Expr *Base = E; + while (isa(Base)) { + ME = dyn_cast(Base); + if (VarDecl *VarD = dyn_cast(ME->getMemberDecl())) + if (VarD->hasGlobalStorage()) + return; + Base = ME->getBase(); + } + + if (VD == ME->getMemberDecl() && isa(Base)) { + S.Diag(ME->getExprLoc(), diag::warn_field_is_uninit); + return; + } + } + + if (ConditionalOperator *CO = dyn_cast(E)) { + HandleValue(CO->getTrueExpr()); + HandleValue(CO->getFalseExpr()); + return; + } + + if (BinaryConditionalOperator *BCO = + dyn_cast(E)) { + HandleValue(BCO->getCommon()); + HandleValue(BCO->getFalseExpr()); + return; + } + + if (BinaryOperator *BO = dyn_cast(E)) { + switch (BO->getOpcode()) { + default: + return; + case(BO_PtrMemD): + case(BO_PtrMemI): + HandleValue(BO->getLHS()); + return; + case(BO_Comma): + HandleValue(BO->getRHS()); + return; + } } - // None of the exceptions triggered; return true to indicate an - // uninitialized field was used. - *L = ME->getMemberLoc(); - return true; } - } else if (isa(S)) { - // sizeof/alignof doesn't reference contents, do not warn. - return false; - } else if (const UnaryOperator *UOE = dyn_cast(S)) { - // address-of doesn't reference contents (the pointer may be dereferenced - // in the same expression but it would be rare; and weird). - if (UOE->getOpcode() == UO_AddrOf) - return false; - } - for (Stmt::const_child_range it = S->children(); it; ++it) { - if (!*it) { - // An expression such as 'member(arg ?: "")' may trigger this. - continue; + + void VisitImplicitCastExpr(ImplicitCastExpr *E) { + if (E->getCastKind() == CK_LValueToRValue) + HandleValue(E->getSubExpr()); + + Inherited::VisitImplicitCastExpr(E); } - if (InitExprContainsUninitializedFields(*it, LhsField, L)) - return true; + + void VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { + Expr *Callee = E->getCallee(); + if (isa(Callee)) + HandleValue(Callee); + + Inherited::VisitCXXMemberCallExpr(E); + } + }; + static void CheckInitExprContainsUninitializedFields(Sema &S, Expr *E, + ValueDecl *VD) { + UninitializedFieldVisitor(S, VD).HandleExpr(E); } - return false; -} +} // namespace MemInitResult Sema::BuildMemberInitializer(ValueDecl *Member, Expr *Init, @@ -2106,18 +2162,17 @@ Sema::BuildMemberInitializer(ValueDecl *Member, Expr *Init, Args = InitList->getInits(); NumArgs = InitList->getNumInits(); } - for (unsigned i = 0; i < NumArgs; ++i) { - SourceLocation L; - if (InitExprContainsUninitializedFields(Args[i], Member, &L)) { - // FIXME: Return true in the case when other fields are used before being + + if (getDiagnostics().getDiagnosticLevel(diag::warn_field_is_uninit, IdLoc) + != DiagnosticsEngine::Ignored) + for (unsigned i = 0; i < NumArgs; ++i) + // FIXME: Warn about the case when other fields are used before being // uninitialized. For example, let this field be the i'th field. When // initializing the i'th field, throw a warning if any of the >= i'th // fields are used, as they are not yet initialized. // Right now we are only handling the case where the i'th field uses // itself in its initializer. - Diag(L, diag::warn_field_is_uninit); - } - } + CheckInitExprContainsUninitializedFields(*this, Args[i], Member); SourceRange InitRange = Init->getSourceRange(); @@ -2235,6 +2290,16 @@ Sema::BuildDelegatingInitializer(TypeSourceInfo *TInfo, Expr *Init, if (DelegationInit.isInvalid()) return true; + // If we are in a dependent context, template instantiation will + // perform this type-checking again. Just save the arguments that we + // received in a ParenListExpr. + // FIXME: This isn't quite ideal, since our ASTs don't capture all + // of the information that we have about the base + // initializer. However, deconstructing the ASTs is a dicey process, + // and this approach is far more likely to get the corner cases right. + if (CurContext->isDependentContext()) + DelegationInit = Owned(Init); + return new (Context) CXXCtorInitializer(Context, TInfo, InitRange.getBegin(), DelegationInit.takeAs(), InitRange.getEnd()); @@ -2694,7 +2759,7 @@ BuildImplicitMemberInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor, FieldBaseElementType->isObjCRetainableType() && FieldBaseElementType.getObjCLifetime() != Qualifiers::OCL_None && FieldBaseElementType.getObjCLifetime() != Qualifiers::OCL_ExplicitNone) { - // Instant objects: + // ARC: // Default-initialize Objective-C pointers to NULL. CXXMemberInit = new (SemaRef.Context) CXXCtorInitializer(SemaRef.Context, Field, @@ -2741,6 +2806,16 @@ struct BaseAndFieldInfo { llvm_unreachable("Invalid ImplicitInitializerKind!"); } + + bool addFieldInitializer(CXXCtorInitializer *Init) { + AllToInit.push_back(Init); + + // Check whether this initializer makes the field "used". + if (Init->getInit() && Init->getInit()->HasSideEffects(S.Context)) + S.UnusedPrivateFields.remove(Init->getAnyMember()); + + return false; + } }; } @@ -2778,12 +2853,10 @@ static bool CollectFieldInitializer(Sema &SemaRef, BaseAndFieldInfo &Info, IndirectFieldDecl *Indirect = 0) { // Overwhelmingly common case: we have a direct initializer for this field. - if (CXXCtorInitializer *Init = Info.AllBaseFields.lookup(Field)) { - Info.AllToInit.push_back(Init); - return false; - } + if (CXXCtorInitializer *Init = Info.AllBaseFields.lookup(Field)) + return Info.addFieldInitializer(Init); - // C++0x [class.base.init]p8: if the entity is a non-static data member that + // C++11 [class.base.init]p8: if the entity is a non-static data member that // has a brace-or-equal-initializer, the entity is initialized as specified // in [dcl.init]. if (Field->hasInClassInitializer() && !Info.isImplicitCopyOrMove()) { @@ -2798,8 +2871,7 @@ static bool CollectFieldInitializer(Sema &SemaRef, BaseAndFieldInfo &Info, SourceLocation(), SourceLocation(), 0, SourceLocation()); - Info.AllToInit.push_back(Init); - return false; + return Info.addFieldInitializer(Init); } // Don't build an implicit initializer for union members if none was @@ -2823,10 +2895,10 @@ static bool CollectFieldInitializer(Sema &SemaRef, BaseAndFieldInfo &Info, Indirect, Init)) return true; - if (Init) - Info.AllToInit.push_back(Init); + if (!Init) + return false; - return false; + return Info.addFieldInitializer(Init); } bool @@ -3397,19 +3469,33 @@ void Sema::ActOnDefaultCtorInitializers(Decl *CDtorDecl) { bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T, unsigned DiagID, AbstractDiagSelID SelID) { - if (SelID == -1) - return RequireNonAbstractType(Loc, T, PDiag(DiagID)); - else - return RequireNonAbstractType(Loc, T, PDiag(DiagID) << SelID); + class NonAbstractTypeDiagnoser : public TypeDiagnoser { + unsigned DiagID; + AbstractDiagSelID SelID; + + public: + NonAbstractTypeDiagnoser(unsigned DiagID, AbstractDiagSelID SelID) + : TypeDiagnoser(DiagID == 0), DiagID(DiagID), SelID(SelID) { } + + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) { + if (Suppressed) return; + if (SelID == -1) + S.Diag(Loc, DiagID) << T; + else + S.Diag(Loc, DiagID) << SelID << T; + } + } Diagnoser(DiagID, SelID); + + return RequireNonAbstractType(Loc, T, Diagnoser); } bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T, - const PartialDiagnostic &PD) { + TypeDiagnoser &Diagnoser) { if (!getLangOpts().CPlusPlus) return false; if (const ArrayType *AT = Context.getAsArrayType(T)) - return RequireNonAbstractType(Loc, AT->getElementType(), PD); + return RequireNonAbstractType(Loc, AT->getElementType(), Diagnoser); if (const PointerType *PT = T->getAs()) { // Find the innermost pointer type. @@ -3417,7 +3503,7 @@ bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T, PT = T; if (const ArrayType *AT = Context.getAsArrayType(PT->getPointeeType())) - return RequireNonAbstractType(Loc, AT->getElementType(), PD); + return RequireNonAbstractType(Loc, AT->getElementType(), Diagnoser); } const RecordType *RT = T->getAs(); @@ -3436,7 +3522,7 @@ bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T, if (!RD->isAbstract()) return false; - Diag(Loc, PD) << RD->getDeclName(); + Diagnoser.diagnose(*this, Loc, T); DiagnoseAbstractType(RD); return true; @@ -3732,7 +3818,7 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) { for (CXXRecordDecl::method_iterator M = Record->method_begin(), MEnd = Record->method_end(); M != MEnd; ++M) { - if (!(*M)->isStatic()) + if (!M->isStatic()) DiagnoseHiddenVirtualMethods(Record, *M); } } @@ -3762,8 +3848,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) { case TSK_Undeclared: case TSK_ExplicitSpecialization: - RequireLiteralType((*M)->getLocation(), Context.getRecordType(Record), - PDiag(diag::err_constexpr_method_non_literal)); + RequireLiteralType(M->getLocation(), Context.getRecordType(Record), + diag::err_constexpr_method_non_literal); break; } @@ -3781,558 +3867,354 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) { // instantiated (e.g. meta-functions). This doesn't apply to classes that // have inherited constructors. DeclareInheritedConstructors(Record); - - if (!Record->isDependentType()) - CheckExplicitlyDefaultedMethods(Record); } void Sema::CheckExplicitlyDefaultedMethods(CXXRecordDecl *Record) { for (CXXRecordDecl::method_iterator MI = Record->method_begin(), ME = Record->method_end(); - MI != ME; ++MI) { - if (!MI->isInvalidDecl() && MI->isExplicitlyDefaulted()) { - switch (getSpecialMember(*MI)) { - case CXXDefaultConstructor: - CheckExplicitlyDefaultedDefaultConstructor( - cast(*MI)); - break; - - case CXXDestructor: - CheckExplicitlyDefaultedDestructor(cast(*MI)); - break; - - case CXXCopyConstructor: - CheckExplicitlyDefaultedCopyConstructor(cast(*MI)); - break; - - case CXXCopyAssignment: - CheckExplicitlyDefaultedCopyAssignment(*MI); - break; - - case CXXMoveConstructor: - CheckExplicitlyDefaultedMoveConstructor(cast(*MI)); - break; - - case CXXMoveAssignment: - CheckExplicitlyDefaultedMoveAssignment(*MI); - break; - - case CXXInvalid: - llvm_unreachable("non-special member explicitly defaulted!"); - } - } - } - + MI != ME; ++MI) + if (!MI->isInvalidDecl() && MI->isExplicitlyDefaulted()) + CheckExplicitlyDefaultedSpecialMember(*MI); +} + +/// Is the special member function which would be selected to perform the +/// specified operation on the specified class type a constexpr constructor? +static bool specialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl, + Sema::CXXSpecialMember CSM, + bool ConstArg) { + Sema::SpecialMemberOverloadResult *SMOR = + S.LookupSpecialMember(ClassDecl, CSM, ConstArg, + false, false, false, false); + if (!SMOR || !SMOR->getMethod()) + // A constructor we wouldn't select can't be "involved in initializing" + // anything. + return true; + return SMOR->getMethod()->isConstexpr(); } -void Sema::CheckExplicitlyDefaultedDefaultConstructor(CXXConstructorDecl *CD) { - assert(CD->isExplicitlyDefaulted() && CD->isDefaultConstructor()); - - // Whether this was the first-declared instance of the constructor. - // This affects whether we implicitly add an exception spec (and, eventually, - // constexpr). It is also ill-formed to explicitly default a constructor such - // that it would be deleted. (C++0x [decl.fct.def.default]) - bool First = CD == CD->getCanonicalDecl(); +/// Determine whether the specified special member function would be constexpr +/// if it were implicitly defined. +static bool defaultedSpecialMemberIsConstexpr(Sema &S, CXXRecordDecl *ClassDecl, + Sema::CXXSpecialMember CSM, + bool ConstArg) { + if (!S.getLangOpts().CPlusPlus0x) + return false; - bool HadError = false; - if (CD->getNumParams() != 0) { - Diag(CD->getLocation(), diag::err_defaulted_default_ctor_params) - << CD->getSourceRange(); - HadError = true; - } + // C++11 [dcl.constexpr]p4: + // In the definition of a constexpr constructor [...] + switch (CSM) { + case Sema::CXXDefaultConstructor: + // Since default constructor lookup is essentially trivial (and cannot + // involve, for instance, template instantiation), we compute whether a + // defaulted default constructor is constexpr directly within CXXRecordDecl. + // + // This is important for performance; we need to know whether the default + // constructor is constexpr to determine whether the type is a literal type. + return ClassDecl->defaultedDefaultConstructorIsConstexpr(); - ImplicitExceptionSpecification Spec - = ComputeDefaultedDefaultCtorExceptionSpec(CD->getParent()); - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - if (EPI.ExceptionSpecType == EST_Delayed) { - // Exception specification depends on some deferred part of the class. We'll - // try again when the class's definition has been fully processed. - return; - } - const FunctionProtoType *CtorType = CD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); + case Sema::CXXCopyConstructor: + case Sema::CXXMoveConstructor: + // For copy or move constructors, we need to perform overload resolution. + break; - // C++11 [dcl.fct.def.default]p2: - // An explicitly-defaulted function may be declared constexpr only if it - // would have been implicitly declared as constexpr, - // Do not apply this rule to templates, since core issue 1358 makes such - // functions always instantiate to constexpr functions. - if (CD->isConstexpr() && - CD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { - if (!CD->getParent()->defaultedDefaultConstructorIsConstexpr()) { - Diag(CD->getLocStart(), diag::err_incorrect_defaulted_constexpr) - << CXXDefaultConstructor; - HadError = true; - } - } - // and may have an explicit exception-specification only if it is compatible - // with the exception-specification on the implicit declaration. - if (CtorType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXDefaultConstructor, - PDiag(), - ExceptionType, SourceLocation(), - CtorType, CD->getLocation())) { - HadError = true; - } + case Sema::CXXCopyAssignment: + case Sema::CXXMoveAssignment: + case Sema::CXXDestructor: + case Sema::CXXInvalid: + return false; } - // If a function is explicitly defaulted on its first declaration, - if (First) { - // -- it is implicitly considered to be constexpr if the implicit - // definition would be, - CD->setConstexpr(CD->getParent()->defaultedDefaultConstructorIsConstexpr()); + // -- if the class is a non-empty union, or for each non-empty anonymous + // union member of a non-union class, exactly one non-static data member + // shall be initialized; [DR1359] + // + // If we squint, this is guaranteed, since exactly one non-static data member + // will be initialized (if the constructor isn't deleted), we just don't know + // which one. + if (ClassDecl->isUnion()) + return true; - // -- it is implicitly considered to have the same - // exception-specification as if it had been implicitly declared - // - // FIXME: a compatible, but different, explicit exception specification - // will be silently overridden. We should issue a warning if this happens. - EPI.ExtInfo = CtorType->getExtInfo(); + // -- the class shall not have any virtual base classes; + if (ClassDecl->getNumVBases()) + return false; - // Such a function is also trivial if the implicitly-declared function - // would have been. - CD->setTrivial(CD->getParent()->hasTrivialDefaultConstructor()); - } + // -- every constructor involved in initializing [...] base class + // sub-objects shall be a constexpr constructor; + for (CXXRecordDecl::base_class_iterator B = ClassDecl->bases_begin(), + BEnd = ClassDecl->bases_end(); + B != BEnd; ++B) { + const RecordType *BaseType = B->getType()->getAs(); + if (!BaseType) continue; - if (HadError) { - CD->setInvalidDecl(); - return; + CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); + if (!specialMemberIsConstexpr(S, BaseClassDecl, CSM, ConstArg)) + return false; } - if (ShouldDeleteSpecialMember(CD, CXXDefaultConstructor)) { - if (First) { - CD->setDeletedAsWritten(); - } else { - Diag(CD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXDefaultConstructor; - CD->setInvalidDecl(); + // -- every constructor involved in initializing non-static data members + // [...] shall be a constexpr constructor; + // -- every non-static data member and base class sub-object shall be + // initialized + for (RecordDecl::field_iterator F = ClassDecl->field_begin(), + FEnd = ClassDecl->field_end(); + F != FEnd; ++F) { + if (F->isInvalidDecl()) + continue; + if (const RecordType *RecordTy = + S.Context.getBaseElementType(F->getType())->getAs()) { + CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); + if (!specialMemberIsConstexpr(S, FieldRecDecl, CSM, ConstArg)) + return false; } } -} - -void Sema::CheckExplicitlyDefaultedCopyConstructor(CXXConstructorDecl *CD) { - assert(CD->isExplicitlyDefaulted() && CD->isCopyConstructor()); - - // Whether this was the first-declared instance of the constructor. - bool First = CD == CD->getCanonicalDecl(); - - bool HadError = false; - if (CD->getNumParams() != 1) { - Diag(CD->getLocation(), diag::err_defaulted_copy_ctor_params) - << CD->getSourceRange(); - HadError = true; - } - - ImplicitExceptionSpecification Spec(*this); - bool Const; - llvm::tie(Spec, Const) = - ComputeDefaultedCopyCtorExceptionSpecAndConst(CD->getParent()); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - const FunctionProtoType *CtorType = CD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); - // Check for parameter type matching. - // This is a copy ctor so we know it's a cv-qualified reference to T. - QualType ArgType = CtorType->getArgType(0); - if (ArgType->getPointeeType().isVolatileQualified()) { - Diag(CD->getLocation(), diag::err_defaulted_copy_ctor_volatile_param); - HadError = true; - } - if (ArgType->getPointeeType().isConstQualified() && !Const) { - Diag(CD->getLocation(), diag::err_defaulted_copy_ctor_const_param); - HadError = true; - } + // All OK, it's constexpr! + return true; +} - // C++11 [dcl.fct.def.default]p2: - // An explicitly-defaulted function may be declared constexpr only if it - // would have been implicitly declared as constexpr, - // Do not apply this rule to templates, since core issue 1358 makes such - // functions always instantiate to constexpr functions. - if (CD->isConstexpr() && - CD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { - if (!CD->getParent()->defaultedCopyConstructorIsConstexpr()) { - Diag(CD->getLocStart(), diag::err_incorrect_defaulted_constexpr) - << CXXCopyConstructor; - HadError = true; - } - } - // and may have an explicit exception-specification only if it is compatible - // with the exception-specification on the implicit declaration. - if (CtorType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXCopyConstructor, - PDiag(), - ExceptionType, SourceLocation(), - CtorType, CD->getLocation())) { - HadError = true; - } +static Sema::ImplicitExceptionSpecification +computeImplicitExceptionSpec(Sema &S, SourceLocation Loc, CXXMethodDecl *MD) { + switch (S.getSpecialMember(MD)) { + case Sema::CXXDefaultConstructor: + return S.ComputeDefaultedDefaultCtorExceptionSpec(Loc, MD); + case Sema::CXXCopyConstructor: + return S.ComputeDefaultedCopyCtorExceptionSpec(MD); + case Sema::CXXCopyAssignment: + return S.ComputeDefaultedCopyAssignmentExceptionSpec(MD); + case Sema::CXXMoveConstructor: + return S.ComputeDefaultedMoveCtorExceptionSpec(MD); + case Sema::CXXMoveAssignment: + return S.ComputeDefaultedMoveAssignmentExceptionSpec(MD); + case Sema::CXXDestructor: + return S.ComputeDefaultedDtorExceptionSpec(MD); + case Sema::CXXInvalid: + break; } + llvm_unreachable("only special members have implicit exception specs"); +} - // If a function is explicitly defaulted on its first declaration, - if (First) { - // -- it is implicitly considered to be constexpr if the implicit - // definition would be, - CD->setConstexpr(CD->getParent()->defaultedCopyConstructorIsConstexpr()); +static void +updateExceptionSpec(Sema &S, FunctionDecl *FD, const FunctionProtoType *FPT, + const Sema::ImplicitExceptionSpecification &ExceptSpec) { + FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); + ExceptSpec.getEPI(EPI); + const FunctionProtoType *NewFPT = cast( + S.Context.getFunctionType(FPT->getResultType(), FPT->arg_type_begin(), + FPT->getNumArgs(), EPI)); + FD->setType(QualType(NewFPT, 0)); +} + +void Sema::EvaluateImplicitExceptionSpec(SourceLocation Loc, CXXMethodDecl *MD) { + const FunctionProtoType *FPT = MD->getType()->castAs(); + if (FPT->getExceptionSpecType() != EST_Unevaluated) + return; - // -- it is implicitly considered to have the same - // exception-specification as if it had been implicitly declared, and - // - // FIXME: a compatible, but different, explicit exception specification - // will be silently overridden. We should issue a warning if this happens. - EPI.ExtInfo = CtorType->getExtInfo(); + // Evaluate the exception specification. + ImplicitExceptionSpecification ExceptSpec = + computeImplicitExceptionSpec(*this, Loc, MD); - // -- [...] it shall have the same parameter type as if it had been - // implicitly declared. - CD->setType(Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI)); + // Update the type of the special member to use it. + updateExceptionSpec(*this, MD, FPT, ExceptSpec); - // Such a function is also trivial if the implicitly-declared function - // would have been. - CD->setTrivial(CD->getParent()->hasTrivialCopyConstructor()); - } + // A user-provided destructor can be defined outside the class. When that + // happens, be sure to update the exception specification on both + // declarations. + const FunctionProtoType *CanonicalFPT = + MD->getCanonicalDecl()->getType()->castAs(); + if (CanonicalFPT->getExceptionSpecType() == EST_Unevaluated) + updateExceptionSpec(*this, MD->getCanonicalDecl(), + CanonicalFPT, ExceptSpec); +} - if (HadError) { - CD->setInvalidDecl(); - return; - } +static bool isImplicitCopyCtorArgConst(Sema &S, CXXRecordDecl *ClassDecl); +static bool isImplicitCopyAssignmentArgConst(Sema &S, CXXRecordDecl *ClassDecl); - if (ShouldDeleteSpecialMember(CD, CXXCopyConstructor)) { - if (First) { - CD->setDeletedAsWritten(); - } else { - Diag(CD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXCopyConstructor; - CD->setInvalidDecl(); - } - } -} +void Sema::CheckExplicitlyDefaultedSpecialMember(CXXMethodDecl *MD) { + CXXRecordDecl *RD = MD->getParent(); + CXXSpecialMember CSM = getSpecialMember(MD); -void Sema::CheckExplicitlyDefaultedCopyAssignment(CXXMethodDecl *MD) { - assert(MD->isExplicitlyDefaulted()); + assert(MD->isExplicitlyDefaulted() && CSM != CXXInvalid && + "not an explicitly-defaulted special member"); - // Whether this was the first-declared instance of the operator + // Whether this was the first-declared instance of the constructor. + // This affects whether we implicitly add an exception spec and constexpr. bool First = MD == MD->getCanonicalDecl(); bool HadError = false; - if (MD->getNumParams() != 1) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_params) - << MD->getSourceRange(); - HadError = true; - } - QualType ReturnType = - MD->getType()->getAs()->getResultType(); - if (!ReturnType->isLValueReferenceType() || - !Context.hasSameType( - Context.getCanonicalType(ReturnType->getPointeeType()), - Context.getCanonicalType(Context.getTypeDeclType(MD->getParent())))) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_return_type); + // C++11 [dcl.fct.def.default]p1: + // A function that is explicitly defaulted shall + // -- be a special member function (checked elsewhere), + // -- have the same type (except for ref-qualifiers, and except that a + // copy operation can take a non-const reference) as an implicit + // declaration, and + // -- not have default arguments. + unsigned ExpectedParams = 1; + if (CSM == CXXDefaultConstructor || CSM == CXXDestructor) + ExpectedParams = 0; + if (MD->getNumParams() != ExpectedParams) { + // This also checks for default arguments: a copy or move constructor with a + // default argument is classified as a default constructor, and assignment + // operations and destructors can't have default arguments. + Diag(MD->getLocation(), diag::err_defaulted_special_member_params) + << CSM << MD->getSourceRange(); HadError = true; } - ImplicitExceptionSpecification Spec(*this); - bool Const; - llvm::tie(Spec, Const) = - ComputeDefaultedCopyCtorExceptionSpecAndConst(MD->getParent()); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - const FunctionProtoType *OperType = MD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); + const FunctionProtoType *Type = MD->getType()->getAs(); - QualType ArgType = OperType->getArgType(0); - if (!ArgType->isLValueReferenceType()) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_not_ref); - HadError = true; - } else { - if (ArgType->getPointeeType().isVolatileQualified()) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_volatile_param); + // Compute argument constness, constexpr, and triviality. + bool CanHaveConstParam = false; + bool Trivial; + switch (CSM) { + case CXXDefaultConstructor: + Trivial = RD->hasTrivialDefaultConstructor(); + break; + case CXXCopyConstructor: + CanHaveConstParam = isImplicitCopyCtorArgConst(*this, RD); + Trivial = RD->hasTrivialCopyConstructor(); + break; + case CXXCopyAssignment: + CanHaveConstParam = isImplicitCopyAssignmentArgConst(*this, RD); + Trivial = RD->hasTrivialCopyAssignment(); + break; + case CXXMoveConstructor: + Trivial = RD->hasTrivialMoveConstructor(); + break; + case CXXMoveAssignment: + Trivial = RD->hasTrivialMoveAssignment(); + break; + case CXXDestructor: + Trivial = RD->hasTrivialDestructor(); + break; + case CXXInvalid: + llvm_unreachable("non-special member explicitly defaulted!"); + } + + QualType ReturnType = Context.VoidTy; + if (CSM == CXXCopyAssignment || CSM == CXXMoveAssignment) { + // Check for return type matching. + ReturnType = Type->getResultType(); + QualType ExpectedReturnType = + Context.getLValueReferenceType(Context.getTypeDeclType(RD)); + if (!Context.hasSameType(ReturnType, ExpectedReturnType)) { + Diag(MD->getLocation(), diag::err_defaulted_special_member_return_type) + << (CSM == CXXMoveAssignment) << ExpectedReturnType; HadError = true; } - if (ArgType->getPointeeType().isConstQualified() && !Const) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_const_param); + + // A defaulted special member cannot have cv-qualifiers. + if (Type->getTypeQuals()) { + Diag(MD->getLocation(), diag::err_defaulted_special_member_quals) + << (CSM == CXXMoveAssignment); HadError = true; } } - if (OperType->getTypeQuals()) { - Diag(MD->getLocation(), diag::err_defaulted_copy_assign_quals); - HadError = true; - } - - if (OperType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXCopyAssignment, - PDiag(), - ExceptionType, SourceLocation(), - OperType, MD->getLocation())) { + // Check for parameter type matching. + QualType ArgType = ExpectedParams ? Type->getArgType(0) : QualType(); + bool HasConstParam = false; + if (ExpectedParams && ArgType->isReferenceType()) { + // Argument must be reference to possibly-const T. + QualType ReferentType = ArgType->getPointeeType(); + HasConstParam = ReferentType.isConstQualified(); + + if (ReferentType.isVolatileQualified()) { + Diag(MD->getLocation(), + diag::err_defaulted_special_member_volatile_param) << CSM; HadError = true; } - } - if (First) { - // We set the declaration to have the computed exception spec here. - // We duplicate the one parameter type. - EPI.RefQualifier = OperType->getRefQualifier(); - EPI.ExtInfo = OperType->getExtInfo(); - MD->setType(Context.getFunctionType(ReturnType, &ArgType, 1, EPI)); - // Such a function is also trivial if the implicitly-declared function - // would have been. - MD->setTrivial(MD->getParent()->hasTrivialCopyAssignment()); - } - - if (HadError) { - MD->setInvalidDecl(); - return; - } - - if (ShouldDeleteSpecialMember(MD, CXXCopyAssignment)) { - if (First) { - MD->setDeletedAsWritten(); - } else { - Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXCopyAssignment; - MD->setInvalidDecl(); + if (HasConstParam && !CanHaveConstParam) { + if (CSM == CXXCopyConstructor || CSM == CXXCopyAssignment) { + Diag(MD->getLocation(), + diag::err_defaulted_special_member_copy_const_param) + << (CSM == CXXCopyAssignment); + // FIXME: Explain why this special member can't be const. + } else { + Diag(MD->getLocation(), + diag::err_defaulted_special_member_move_const_param) + << (CSM == CXXMoveAssignment); + } + HadError = true; } - } -} - -void Sema::CheckExplicitlyDefaultedMoveConstructor(CXXConstructorDecl *CD) { - assert(CD->isExplicitlyDefaulted() && CD->isMoveConstructor()); - // Whether this was the first-declared instance of the constructor. - bool First = CD == CD->getCanonicalDecl(); - - bool HadError = false; - if (CD->getNumParams() != 1) { - Diag(CD->getLocation(), diag::err_defaulted_move_ctor_params) - << CD->getSourceRange(); + // If a function is explicitly defaulted on its first declaration, it shall + // have the same parameter type as if it had been implicitly declared. + // (Presumably this is to prevent it from being trivial?) + if (!HasConstParam && CanHaveConstParam && First) + Diag(MD->getLocation(), + diag::err_defaulted_special_member_copy_non_const_param) + << (CSM == CXXCopyAssignment); + } else if (ExpectedParams) { + // A copy assignment operator can take its argument by value, but a + // defaulted one cannot. + assert(CSM == CXXCopyAssignment && "unexpected non-ref argument"); + Diag(MD->getLocation(), diag::err_defaulted_copy_assign_not_ref); HadError = true; } - ImplicitExceptionSpecification Spec( - ComputeDefaultedMoveCtorExceptionSpec(CD->getParent())); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - const FunctionProtoType *CtorType = CD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); - - // Check for parameter type matching. - // This is a move ctor so we know it's a cv-qualified rvalue reference to T. - QualType ArgType = CtorType->getArgType(0); - if (ArgType->getPointeeType().isVolatileQualified()) { - Diag(CD->getLocation(), diag::err_defaulted_move_ctor_volatile_param); - HadError = true; - } - if (ArgType->getPointeeType().isConstQualified()) { - Diag(CD->getLocation(), diag::err_defaulted_move_ctor_const_param); - HadError = true; + // Rebuild the type with the implicit exception specification added, if we + // are going to need it. + const FunctionProtoType *ImplicitType = 0; + if (First || Type->hasExceptionSpec()) { + FunctionProtoType::ExtProtoInfo EPI = Type->getExtProtoInfo(); + computeImplicitExceptionSpec(*this, MD->getLocation(), MD).getEPI(EPI); + ImplicitType = cast( + Context.getFunctionType(ReturnType, &ArgType, ExpectedParams, EPI)); } // C++11 [dcl.fct.def.default]p2: // An explicitly-defaulted function may be declared constexpr only if it // would have been implicitly declared as constexpr, - // Do not apply this rule to templates, since core issue 1358 makes such - // functions always instantiate to constexpr functions. - if (CD->isConstexpr() && - CD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { - if (!CD->getParent()->defaultedMoveConstructorIsConstexpr()) { - Diag(CD->getLocStart(), diag::err_incorrect_defaulted_constexpr) - << CXXMoveConstructor; - HadError = true; - } + // Do not apply this rule to members of class templates, since core issue 1358 + // makes such functions always instantiate to constexpr functions. For + // non-constructors, this is checked elsewhere. + bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, RD, CSM, + HasConstParam); + if (isa(MD) && MD->isConstexpr() && !Constexpr && + MD->getTemplatedKind() == FunctionDecl::TK_NonTemplate) { + Diag(MD->getLocStart(), diag::err_incorrect_defaulted_constexpr) << CSM; + // FIXME: Explain why the constructor can't be constexpr. + HadError = true; } // and may have an explicit exception-specification only if it is compatible // with the exception-specification on the implicit declaration. - if (CtorType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXMoveConstructor, - PDiag(), - ExceptionType, SourceLocation(), - CtorType, CD->getLocation())) { - HadError = true; - } - } + if (Type->hasExceptionSpec() && + CheckEquivalentExceptionSpec( + PDiag(diag::err_incorrect_defaulted_exception_spec) << CSM, + PDiag(), ImplicitType, SourceLocation(), Type, MD->getLocation())) + HadError = true; // If a function is explicitly defaulted on its first declaration, if (First) { // -- it is implicitly considered to be constexpr if the implicit // definition would be, - CD->setConstexpr(CD->getParent()->defaultedMoveConstructorIsConstexpr()); - - // -- it is implicitly considered to have the same - // exception-specification as if it had been implicitly declared, and - // - // FIXME: a compatible, but different, explicit exception specification - // will be silently overridden. We should issue a warning if this happens. - EPI.ExtInfo = CtorType->getExtInfo(); + MD->setConstexpr(Constexpr); - // -- [...] it shall have the same parameter type as if it had been - // implicitly declared. - CD->setType(Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI)); + // -- it is implicitly considered to have the same exception-specification + // as if it had been implicitly declared, + MD->setType(QualType(ImplicitType, 0)); // Such a function is also trivial if the implicitly-declared function // would have been. - CD->setTrivial(CD->getParent()->hasTrivialMoveConstructor()); + MD->setTrivial(Trivial); } - if (HadError) { - CD->setInvalidDecl(); - return; - } - - if (ShouldDeleteSpecialMember(CD, CXXMoveConstructor)) { + if (ShouldDeleteSpecialMember(MD, CSM)) { if (First) { - CD->setDeletedAsWritten(); + MD->setDeletedAsWritten(); } else { - Diag(CD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXMoveConstructor; - CD->setInvalidDecl(); - } - } -} - -void Sema::CheckExplicitlyDefaultedMoveAssignment(CXXMethodDecl *MD) { - assert(MD->isExplicitlyDefaulted()); - - // Whether this was the first-declared instance of the operator - bool First = MD == MD->getCanonicalDecl(); - - bool HadError = false; - if (MD->getNumParams() != 1) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_params) - << MD->getSourceRange(); - HadError = true; - } - - QualType ReturnType = - MD->getType()->getAs()->getResultType(); - if (!ReturnType->isLValueReferenceType() || - !Context.hasSameType( - Context.getCanonicalType(ReturnType->getPointeeType()), - Context.getCanonicalType(Context.getTypeDeclType(MD->getParent())))) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_return_type); - HadError = true; - } - - ImplicitExceptionSpecification Spec( - ComputeDefaultedMoveCtorExceptionSpec(MD->getParent())); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - const FunctionProtoType *OperType = MD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); - - QualType ArgType = OperType->getArgType(0); - if (!ArgType->isRValueReferenceType()) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_not_ref); - HadError = true; - } else { - if (ArgType->getPointeeType().isVolatileQualified()) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_volatile_param); - HadError = true; - } - if (ArgType->getPointeeType().isConstQualified()) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_const_param); + // C++11 [dcl.fct.def.default]p4: + // [For a] user-provided explicitly-defaulted function [...] if such a + // function is implicitly defined as deleted, the program is ill-formed. + Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) << CSM; HadError = true; } } - if (OperType->getTypeQuals()) { - Diag(MD->getLocation(), diag::err_defaulted_move_assign_quals); - HadError = true; - } - - if (OperType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXMoveAssignment, - PDiag(), - ExceptionType, SourceLocation(), - OperType, MD->getLocation())) { - HadError = true; - } - } - if (First) { - // We set the declaration to have the computed exception spec here. - // We duplicate the one parameter type. - EPI.RefQualifier = OperType->getRefQualifier(); - EPI.ExtInfo = OperType->getExtInfo(); - MD->setType(Context.getFunctionType(ReturnType, &ArgType, 1, EPI)); - - // Such a function is also trivial if the implicitly-declared function - // would have been. - MD->setTrivial(MD->getParent()->hasTrivialMoveAssignment()); - } - - if (HadError) { + if (HadError) MD->setInvalidDecl(); - return; - } - - if (ShouldDeleteSpecialMember(MD, CXXMoveAssignment)) { - if (First) { - MD->setDeletedAsWritten(); - } else { - Diag(MD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXMoveAssignment; - MD->setInvalidDecl(); - } - } -} - -void Sema::CheckExplicitlyDefaultedDestructor(CXXDestructorDecl *DD) { - assert(DD->isExplicitlyDefaulted()); - - // Whether this was the first-declared instance of the destructor. - bool First = DD == DD->getCanonicalDecl(); - - ImplicitExceptionSpecification Spec - = ComputeDefaultedDtorExceptionSpec(DD->getParent()); - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - const FunctionProtoType *DtorType = DD->getType()->getAs(), - *ExceptionType = Context.getFunctionType( - Context.VoidTy, 0, 0, EPI)->getAs(); - - if (DtorType->hasExceptionSpec()) { - if (CheckEquivalentExceptionSpec( - PDiag(diag::err_incorrect_defaulted_exception_spec) - << CXXDestructor, - PDiag(), - ExceptionType, SourceLocation(), - DtorType, DD->getLocation())) { - DD->setInvalidDecl(); - return; - } - } - if (First) { - // We set the declaration to have the computed exception spec here. - // There are no parameters. - EPI.ExtInfo = DtorType->getExtInfo(); - DD->setType(Context.getFunctionType(Context.VoidTy, 0, 0, EPI)); - - // Such a function is also trivial if the implicitly-declared function - // would have been. - DD->setTrivial(DD->getParent()->hasTrivialDestructor()); - } - - if (ShouldDeleteSpecialMember(DD, CXXDestructor)) { - if (First) { - DD->setDeletedAsWritten(); - } else { - Diag(DD->getLocation(), diag::err_out_of_line_default_deletes) - << CXXDestructor; - DD->setInvalidDecl(); - } - } } namespace { @@ -4385,9 +4267,15 @@ struct SpecialMemberDeletionInfo { bool inUnion() const { return MD->getParent()->isUnion(); } /// Look up the corresponding special member in the given class. - Sema::SpecialMemberOverloadResult *lookupIn(CXXRecordDecl *Class) { + Sema::SpecialMemberOverloadResult *lookupIn(CXXRecordDecl *Class, + unsigned Quals) { unsigned TQ = MD->getTypeQualifiers(); - return S.LookupSpecialMember(Class, CSM, ConstArg, VolatileArg, + // cv-qualifiers on class members don't affect default ctor / dtor calls. + if (CSM == Sema::CXXDefaultConstructor || CSM == Sema::CXXDestructor) + Quals = 0; + return S.LookupSpecialMember(Class, CSM, + ConstArg || (Quals & Qualifiers::Const), + VolatileArg || (Quals & Qualifiers::Volatile), MD->getRefQualifier() == RQ_RValue, TQ & Qualifiers::Const, TQ & Qualifiers::Volatile); @@ -4399,7 +4287,8 @@ struct SpecialMemberDeletionInfo { bool shouldDeleteForField(FieldDecl *FD); bool shouldDeleteForAllConstMembers(); - bool shouldDeleteForClassSubobject(CXXRecordDecl *Class, Subobject Subobj); + bool shouldDeleteForClassSubobject(CXXRecordDecl *Class, Subobject Subobj, + unsigned Quals); bool shouldDeleteForSubobjectCall(Subobject Subobj, Sema::SpecialMemberOverloadResult *SMOR, bool IsDtorCallInCtor); @@ -4480,9 +4369,9 @@ bool SpecialMemberDeletionInfo::shouldDeleteForSubobjectCall( } /// Check whether we should delete a special member function due to having a -/// direct or virtual base class or static data member of class type M. +/// direct or virtual base class or non-static data member of class type M. bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject( - CXXRecordDecl *Class, Subobject Subobj) { + CXXRecordDecl *Class, Subobject Subobj, unsigned Quals) { FieldDecl *Field = Subobj.dyn_cast(); // C++11 [class.ctor]p5: @@ -4501,7 +4390,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject( // that is deleted or inaccessible if (!(CSM == Sema::CXXDefaultConstructor && Field && Field->hasInClassInitializer()) && - shouldDeleteForSubobjectCall(Subobj, lookupIn(Class), false)) + shouldDeleteForSubobjectCall(Subobj, lookupIn(Class, Quals), false)) return true; // C++11 [class.ctor]p5, C++11 [class.copy]p11: @@ -4522,7 +4411,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject( /// having a particular direct or virtual base class. bool SpecialMemberDeletionInfo::shouldDeleteForBase(CXXBaseSpecifier *Base) { CXXRecordDecl *BaseClass = Base->getType()->getAsCXXRecordDecl(); - return shouldDeleteForClassSubobject(BaseClass, Base); + return shouldDeleteForClassSubobject(BaseClass, Base, 0); } /// Check whether we should delete a special member function due to the class @@ -4549,7 +4438,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) { (!FieldRecord || !FieldRecord->hasUserProvidedDefaultConstructor())) { if (Diagnose) S.Diag(FD->getLocation(), diag::note_deleted_default_ctor_uninit_field) - << MD->getParent() << FD << FieldType << /*Const*/1; + << MD->getParent() << FD << FD->getType() << /*Const*/1; return true; } @@ -4577,7 +4466,7 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) { // -- a non-static data member of const non-class type (or array thereof) if (Diagnose) S.Diag(FD->getLocation(), diag::note_deleted_assign_field) - << IsMove << MD->getParent() << FD << FieldType << /*Const*/1; + << IsMove << MD->getParent() << FD << FD->getType() << /*Const*/1; return true; } } @@ -4599,7 +4488,8 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) { CXXRecordDecl *UnionFieldRecord = UnionFieldType->getAsCXXRecordDecl(); if (UnionFieldRecord && - shouldDeleteForClassSubobject(UnionFieldRecord, *UI)) + shouldDeleteForClassSubobject(UnionFieldRecord, *UI, + UnionFieldType.getCVRQualifiers())) return true; } @@ -4618,7 +4508,8 @@ bool SpecialMemberDeletionInfo::shouldDeleteForField(FieldDecl *FD) { return false; } - if (shouldDeleteForClassSubobject(FieldRecord, FD)) + if (shouldDeleteForClassSubobject(FieldRecord, FD, + FieldType.getCVRQualifiers())) return true; } @@ -4647,7 +4538,8 @@ bool SpecialMemberDeletionInfo::shouldDeleteForAllConstMembers() { /// C++11 [class.copy]p23, and C++11 [class.dtor]p5. bool Sema::ShouldDeleteSpecialMember(CXXMethodDecl *MD, CXXSpecialMember CSM, bool Diagnose) { - assert(!MD->isInvalidDecl()); + if (MD->isInvalidDecl()) + return false; CXXRecordDecl *RD = MD->getParent(); assert(!RD->isDependentType() && "do deletion after instantiation"); if (!LangOpts.CPlusPlus0x || RD->isInvalidDecl()) @@ -4803,7 +4695,7 @@ void Sema::DiagnoseHiddenVirtualMethods(CXXRecordDecl *DC, CXXMethodDecl *MD) { if (Diags.getDiagnosticLevel(diag::warn_overloaded_virtual, MD->getLocation()) == DiagnosticsEngine::Ignored) return; - if (MD->getDeclName().getNameKind() != DeclarationName::Identifier) + if (!MD->getDeclName().isIdentifier()) return; CXXBasePaths Paths(/*FindAmbiguities=*/true, // true to look in all bases. @@ -4850,6 +4742,14 @@ void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc, AdjustDeclIfTemplate(TagDecl); + for (const AttributeList* l = AttrList; l; l = l->getNext()) { + if (l->getKind() != AttributeList::AT_Visibility) + continue; + l->setInvalid(); + Diag(l->getLoc(), diag::warn_attribute_after_definition_ignored) << + l->getName(); + } + ActOnFields(S, RLoc, TagDecl, llvm::makeArrayRef( // strict aliasing violation! reinterpret_cast(FieldCollector->getCurFields()), @@ -5572,6 +5472,8 @@ Decl *Sema::ActOnStartNamespaceDef(Scope *NamespcScope, } } + ActOnDocumentableDecl(Namespc); + // Although we could have an invalid decl (i.e. the namespace name is a // redefinition), push it as current DeclContext and try to continue parsing. // FIXME: We should be able to push Namespc here, so that the each DeclContext @@ -6734,6 +6636,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, if (!Redeclaration) PushOnScopeChains(NewND, S); + ActOnDocumentableDecl(NewND); return NewND; } @@ -6816,7 +6719,10 @@ namespace { } Sema::ImplicitExceptionSpecification -Sema::ComputeDefaultedDefaultCtorExceptionSpec(CXXRecordDecl *ClassDecl) { +Sema::ComputeDefaultedDefaultCtorExceptionSpec(SourceLocation Loc, + CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + // C++ [except.spec]p14: // An implicitly declared special member function (Clause 12) shall have an // exception-specification. [...] @@ -6863,7 +6769,21 @@ Sema::ComputeDefaultedDefaultCtorExceptionSpec(CXXRecordDecl *ClassDecl) { if (Expr *E = F->getInClassInitializer()) ExceptSpec.CalledExpr(E); else if (!F->isInvalidDecl()) - ExceptSpec.SetDelayed(); + // DR1351: + // If the brace-or-equal-initializer of a non-static data member + // invokes a defaulted default constructor of its class or of an + // enclosing class in a potentially evaluated subexpression, the + // program is ill-formed. + // + // This resolution is unworkable: the exception specification of the + // default constructor can be needed in an unevaluated context, in + // particular, in the operand of a noexcept-expression, and we can be + // unable to compute an exception specification for an enclosed class. + // + // We do not allow an in-class initializer to require the evaluation + // of the exception specification for any in-class initializer whose + // definition is not lexically complete. + Diag(Loc, diag::err_in_class_initializer_references_def_ctor) << MD; } else if (const RecordType *RecordTy = Context.getBaseElementType(F->getType())->getAs()) { CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); @@ -6892,9 +6812,9 @@ CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor( assert(!ClassDecl->hasUserDeclaredConstructor() && "Should not build implicit default constructor!"); - ImplicitExceptionSpecification Spec = - ComputeDefaultedDefaultCtorExceptionSpec(ClassDecl); - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); + bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl, + CXXDefaultConstructor, + false); // Create the actual constructor declaration. CanQualType ClassType @@ -6904,16 +6824,20 @@ CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor( = Context.DeclarationNames.getCXXConstructorName(ClassType); DeclarationNameInfo NameInfo(Name, ClassLoc); CXXConstructorDecl *DefaultCon = CXXConstructorDecl::Create( - Context, ClassDecl, ClassLoc, NameInfo, - Context.getFunctionType(Context.VoidTy, 0, 0, EPI), /*TInfo=*/0, + Context, ClassDecl, ClassLoc, NameInfo, /*Type*/QualType(), /*TInfo=*/0, /*isExplicit=*/false, /*isInline=*/true, /*isImplicitlyDeclared=*/true, - /*isConstexpr=*/ClassDecl->defaultedDefaultConstructorIsConstexpr() && - getLangOpts().CPlusPlus0x); + Constexpr); DefaultCon->setAccess(AS_public); DefaultCon->setDefaulted(); DefaultCon->setImplicit(); DefaultCon->setTrivial(ClassDecl->hasTrivialDefaultConstructor()); - + + // Build an exception specification pointing back at this constructor. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = DefaultCon; + DefaultCon->setType(Context.getFunctionType(Context.VoidTy, 0, 0, EPI)); + // Note that we have declared this constructor. ++ASTContext::NumImplicitDefaultConstructorsDeclared; @@ -6948,7 +6872,7 @@ void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation, } SourceLocation Loc = Constructor->getLocation(); - Constructor->setBody(new (Context) CompoundStmt(Context, 0, 0, Loc, Loc)); + Constructor->setBody(new (Context) CompoundStmt(Loc)); Constructor->setUsed(); MarkVTableUsed(CurrentLocation, ClassDecl); @@ -6958,58 +6882,14 @@ void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation, } } -/// Get any existing defaulted default constructor for the given class. Do not -/// implicitly define one if it does not exist. -static CXXConstructorDecl *getDefaultedDefaultConstructorUnsafe(Sema &Self, - CXXRecordDecl *D) { - ASTContext &Context = Self.Context; - QualType ClassType = Context.getTypeDeclType(D); - DeclarationName ConstructorName - = Context.DeclarationNames.getCXXConstructorName( - Context.getCanonicalType(ClassType.getUnqualifiedType())); - - DeclContext::lookup_const_iterator Con, ConEnd; - for (llvm::tie(Con, ConEnd) = D->lookup(ConstructorName); - Con != ConEnd; ++Con) { - // A function template cannot be defaulted. - if (isa(*Con)) - continue; - - CXXConstructorDecl *Constructor = cast(*Con); - if (Constructor->isDefaultConstructor()) - return Constructor->isDefaulted() ? Constructor : 0; - } - return 0; -} - void Sema::ActOnFinishDelayedMemberInitializers(Decl *D) { if (!D) return; AdjustDeclIfTemplate(D); CXXRecordDecl *ClassDecl = cast(D); - CXXConstructorDecl *CtorDecl - = getDefaultedDefaultConstructorUnsafe(*this, ClassDecl); - - if (!CtorDecl) return; - - // Compute the exception specification for the default constructor. - const FunctionProtoType *CtorTy = - CtorDecl->getType()->castAs(); - if (CtorTy->getExceptionSpecType() == EST_Delayed) { - // FIXME: Don't do this unless the exception spec is needed. - ImplicitExceptionSpecification Spec = - ComputeDefaultedDefaultCtorExceptionSpec(ClassDecl); - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); - assert(EPI.ExceptionSpecType != EST_Delayed); - CtorDecl->setType(Context.getFunctionType(Context.VoidTy, 0, 0, EPI)); - } - - // If the default constructor is explicitly defaulted, checking the exception - // specification is deferred until now. - if (!CtorDecl->isInvalidDecl() && CtorDecl->isExplicitlyDefaulted() && - !ClassDecl->isDependentType()) - CheckExplicitlyDefaultedDefaultConstructor(CtorDecl); + if (!ClassDecl->isDependentType()) + CheckExplicitlyDefaultedMethods(ClassDecl); } void Sema::DeclareInheritedConstructors(CXXRecordDecl *ClassDecl) { @@ -7193,7 +7073,9 @@ void Sema::DeclareInheritedConstructors(CXXRecordDecl *ClassDecl) { } Sema::ImplicitExceptionSpecification -Sema::ComputeDefaultedDtorExceptionSpec(CXXRecordDecl *ClassDecl) { +Sema::ComputeDefaultedDtorExceptionSpec(CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + // C++ [except.spec]p14: // An implicitly declared special member function (Clause 12) shall have // an exception-specification. @@ -7240,14 +7122,8 @@ CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) { // If a class has no user-declared destructor, a destructor is // declared implicitly. An implicitly-declared destructor is an // inline public member of its class. - - ImplicitExceptionSpecification Spec = - ComputeDefaultedDtorExceptionSpec(ClassDecl); - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); // Create the actual destructor declaration. - QualType Ty = Context.getFunctionType(Context.VoidTy, 0, 0, EPI); - CanQualType ClassType = Context.getCanonicalType(Context.getTypeDeclType(ClassDecl)); SourceLocation ClassLoc = ClassDecl->getLocation(); @@ -7255,24 +7131,27 @@ CXXDestructorDecl *Sema::DeclareImplicitDestructor(CXXRecordDecl *ClassDecl) { = Context.DeclarationNames.getCXXDestructorName(ClassType); DeclarationNameInfo NameInfo(Name, ClassLoc); CXXDestructorDecl *Destructor - = CXXDestructorDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, Ty, 0, - /*isInline=*/true, + = CXXDestructorDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, + QualType(), 0, /*isInline=*/true, /*isImplicitlyDeclared=*/true); Destructor->setAccess(AS_public); Destructor->setDefaulted(); Destructor->setImplicit(); Destructor->setTrivial(ClassDecl->hasTrivialDestructor()); - + + // Build an exception specification pointing back at this destructor. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = Destructor; + Destructor->setType(Context.getFunctionType(Context.VoidTy, 0, 0, EPI)); + // Note that we have declared this destructor. ++ASTContext::NumImplicitDestructorsDeclared; - + // Introduce this destructor into its scope. if (Scope *S = getScopeForContext(ClassDecl)) PushOnScopeChains(Destructor, S, false); ClassDecl->addDecl(Destructor); - - // This could be uniqued if it ever proves significant. - Destructor->setTypeSourceInfo(Context.getTrivialTypeSourceInfo(Ty)); AddOverriddenMethods(ClassDecl, Destructor); @@ -7309,7 +7188,7 @@ void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation, } SourceLocation Loc = Destructor->getLocation(); - Destructor->setBody(new (Context) CompoundStmt(Context, 0, 0, Loc, Loc)); + Destructor->setBody(new (Context) CompoundStmt(Loc)); Destructor->setImplicitlyDefined(true); Destructor->setUsed(); MarkVTableUsed(CurrentLocation, ClassDecl); @@ -7322,15 +7201,6 @@ void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation, /// \brief Perform any semantic analysis which needs to be delayed until all /// pending class member declarations have been parsed. void Sema::ActOnFinishCXXMemberDecls() { - // Now we have parsed all exception specifications, determine the implicit - // exception specifications for destructors. - for (unsigned i = 0, e = DelayedDestructorExceptionSpecs.size(); - i != e; ++i) { - CXXDestructorDecl *Dtor = DelayedDestructorExceptionSpecs[i]; - AdjustDestructorExceptionSpec(Dtor->getParent(), Dtor, true); - } - DelayedDestructorExceptionSpecs.clear(); - // Perform any deferred checking of exception specifications for virtual // destructors. for (unsigned i = 0, e = DelayedDestructorExceptionSpecChecks.size(); @@ -7345,44 +7215,33 @@ void Sema::ActOnFinishCXXMemberDecls() { DelayedDestructorExceptionSpecChecks.clear(); } -void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *classDecl, - CXXDestructorDecl *destructor, - bool WasDelayed) { +void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *ClassDecl, + CXXDestructorDecl *Destructor) { + assert(getLangOpts().CPlusPlus0x && + "adjusting dtor exception specs was introduced in c++11"); + // C++11 [class.dtor]p3: // A declaration of a destructor that does not have an exception- // specification is implicitly considered to have the same exception- // specification as an implicit declaration. - const FunctionProtoType *dtorType = destructor->getType()-> + const FunctionProtoType *DtorType = Destructor->getType()-> getAs(); - if (!WasDelayed && dtorType->hasExceptionSpec()) + if (DtorType->hasExceptionSpec()) return; - ImplicitExceptionSpecification exceptSpec = - ComputeDefaultedDtorExceptionSpec(classDecl); - // Replace the destructor's type, building off the existing one. Fortunately, // the only thing of interest in the destructor type is its extended info. // The return and arguments are fixed. - FunctionProtoType::ExtProtoInfo epi = dtorType->getExtProtoInfo(); - epi.ExceptionSpecType = exceptSpec.getExceptionSpecType(); - epi.NumExceptions = exceptSpec.size(); - epi.Exceptions = exceptSpec.data(); - QualType ty = Context.getFunctionType(Context.VoidTy, 0, 0, epi); - - destructor->setType(ty); - - // If we can't compute the exception specification for this destructor yet - // (because it depends on an exception specification which we have not parsed - // yet), make a note that we need to try again when the class is complete. - if (epi.ExceptionSpecType == EST_Delayed) { - assert(!WasDelayed && "couldn't compute destructor exception spec"); - DelayedDestructorExceptionSpecs.push_back(destructor); - } + FunctionProtoType::ExtProtoInfo EPI = DtorType->getExtProtoInfo(); + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = Destructor; + Destructor->setType(Context.getFunctionType(Context.VoidTy, 0, 0, EPI)); // FIXME: If the destructor has a body that could throw, and the newly created // spec doesn't allow exceptions, we should emit a warning, because this // change in behavior can break conforming C++03 programs at runtime. - // However, we don't have a body yet, so it needs to be done somewhere else. + // However, we don't have a body or an exception specification yet, so it + // needs to be done somewhere else. } /// \brief Builds a statement that copies/moves the given entity from \p From to @@ -7584,11 +7443,13 @@ BuildSingleCopyAssign(Sema &S, SourceLocation Loc, QualType T, Loc, Copy.take()); } -std::pair -Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( - CXXRecordDecl *ClassDecl) { +/// Determine whether an implicit copy assignment operator for ClassDecl has a +/// const argument. +/// FIXME: It ought to be possible to store this on the record. +static bool isImplicitCopyAssignmentArgConst(Sema &S, + CXXRecordDecl *ClassDecl) { if (ClassDecl->isInvalidDecl()) - return std::make_pair(ImplicitExceptionSpecification(*this), false); + return true; // C++ [class.copy]p10: // If the class definition does not explicitly declare a copy @@ -7599,37 +7460,34 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( // X& X::operator=(const X&) // // if - bool HasConstCopyAssignment = true; - // -- each direct base class B of X has a copy assignment operator // whose parameter is of type const B&, const volatile B& or B, // and for (CXXRecordDecl::base_class_iterator Base = ClassDecl->bases_begin(), BaseEnd = ClassDecl->bases_end(); - HasConstCopyAssignment && Base != BaseEnd; ++Base) { + Base != BaseEnd; ++Base) { // We'll handle this below - if (LangOpts.CPlusPlus0x && Base->isVirtual()) + if (S.getLangOpts().CPlusPlus0x && Base->isVirtual()) continue; assert(!Base->getType()->isDependentType() && "Cannot generate implicit members for class with dependent bases."); CXXRecordDecl *BaseClassDecl = Base->getType()->getAsCXXRecordDecl(); - HasConstCopyAssignment &= - (bool)LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, - false, 0); + if (!S.LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, false, 0)) + return false; } // In C++11, the above citation has "or virtual" added - if (LangOpts.CPlusPlus0x) { + if (S.getLangOpts().CPlusPlus0x) { for (CXXRecordDecl::base_class_iterator Base = ClassDecl->vbases_begin(), BaseEnd = ClassDecl->vbases_end(); - HasConstCopyAssignment && Base != BaseEnd; ++Base) { + Base != BaseEnd; ++Base) { assert(!Base->getType()->isDependentType() && "Cannot generate implicit members for class with dependent bases."); CXXRecordDecl *BaseClassDecl = Base->getType()->getAsCXXRecordDecl(); - HasConstCopyAssignment &= - (bool)LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, - false, 0); + if (!S.LookupCopyingAssignment(BaseClassDecl, Qualifiers::Const, + false, 0)) + return false; } } @@ -7639,23 +7497,36 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( // const volatile M& or M. for (CXXRecordDecl::field_iterator Field = ClassDecl->field_begin(), FieldEnd = ClassDecl->field_end(); - HasConstCopyAssignment && Field != FieldEnd; - ++Field) { - QualType FieldType = Context.getBaseElementType((*Field)->getType()); - if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { - HasConstCopyAssignment &= - (bool)LookupCopyingAssignment(FieldClassDecl, Qualifiers::Const, - false, 0); - } + Field != FieldEnd; ++Field) { + QualType FieldType = S.Context.getBaseElementType(Field->getType()); + if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) + if (!S.LookupCopyingAssignment(FieldClassDecl, Qualifiers::Const, + false, 0)) + return false; } // Otherwise, the implicitly declared copy assignment operator will // have the form // // X& X::operator=(X&) - + + return true; +} + +Sema::ImplicitExceptionSpecification +Sema::ComputeDefaultedCopyAssignmentExceptionSpec(CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + + ImplicitExceptionSpecification ExceptSpec(*this); + if (ClassDecl->isInvalidDecl()) + return ExceptSpec; + + const FunctionProtoType *T = MD->getType()->castAs(); + assert(T->getNumArgs() == 1 && "not a copy assignment op"); + unsigned ArgQuals = T->getArgType(0).getNonReferenceType().getCVRQualifiers(); + // C++ [except.spec]p14: - // An implicitly declared special member function (Clause 12) shall have an + // An implicitly declared special member function (Clause 12) shall have an // exception-specification. [...] // It is unspecified whether or not an implicit copy assignment operator @@ -7664,8 +7535,6 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( // Based on a similar decision made for constness in C++0x, we're erring on // the side of assuming such calls to be made regardless of whether they // actually happen. - ImplicitExceptionSpecification ExceptSpec(*this); - unsigned ArgQuals = HasConstCopyAssignment ? Qualifiers::Const : 0; for (CXXRecordDecl::base_class_iterator Base = ClassDecl->bases_begin(), BaseEnd = ClassDecl->bases_end(); Base != BaseEnd; ++Base) { @@ -7693,15 +7562,17 @@ Sema::ComputeDefaultedCopyAssignmentExceptionSpecAndConst( FieldEnd = ClassDecl->field_end(); Field != FieldEnd; ++Field) { - QualType FieldType = Context.getBaseElementType((*Field)->getType()); + QualType FieldType = Context.getBaseElementType(Field->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { if (CXXMethodDecl *CopyAssign = - LookupCopyingAssignment(FieldClassDecl, ArgQuals, false, 0)) + LookupCopyingAssignment(FieldClassDecl, + ArgQuals | FieldType.getCVRQualifiers(), + false, 0)) ExceptSpec.CalledDecl(Field->getLocation(), CopyAssign); } } - return std::make_pair(ExceptSpec, HasConstCopyAssignment); + return ExceptSpec; } CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { @@ -7710,26 +7581,19 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { // for determining the argument type of the operator. Note also that // operators taking an object instead of a reference are allowed. - ImplicitExceptionSpecification Spec(*this); - bool Const; - llvm::tie(Spec, Const) = - ComputeDefaultedCopyAssignmentExceptionSpecAndConst(ClassDecl); - QualType ArgType = Context.getTypeDeclType(ClassDecl); QualType RetType = Context.getLValueReferenceType(ArgType); - if (Const) + if (isImplicitCopyAssignmentArgConst(*this, ClassDecl)) ArgType = ArgType.withConst(); ArgType = Context.getLValueReferenceType(ArgType); // An implicitly-declared copy assignment operator is an inline public // member of its class. - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal); SourceLocation ClassLoc = ClassDecl->getLocation(); DeclarationNameInfo NameInfo(Name, ClassLoc); CXXMethodDecl *CopyAssignment - = CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, - Context.getFunctionType(RetType, &ArgType, 1, EPI), + = CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/0, /*isStatic=*/false, /*StorageClassAsWritten=*/SC_None, /*isInline=*/true, /*isConstexpr=*/false, @@ -7738,7 +7602,13 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { CopyAssignment->setDefaulted(); CopyAssignment->setImplicit(); CopyAssignment->setTrivial(ClassDecl->hasTrivialCopyAssignment()); - + + // Build an exception specification pointing back at this member. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = CopyAssignment; + CopyAssignment->setType(Context.getFunctionType(RetType, &ArgType, 1, EPI)); + // Add the parameter to the operator. ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyAssignment, ClassLoc, ClassLoc, /*Id=*/0, @@ -8076,9 +7946,10 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, } Sema::ImplicitExceptionSpecification -Sema::ComputeDefaultedMoveAssignmentExceptionSpec(CXXRecordDecl *ClassDecl) { - ImplicitExceptionSpecification ExceptSpec(*this); +Sema::ComputeDefaultedMoveAssignmentExceptionSpec(CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + ImplicitExceptionSpecification ExceptSpec(*this); if (ClassDecl->isInvalidDecl()) return ExceptSpec; @@ -8103,7 +7974,7 @@ Sema::ComputeDefaultedMoveAssignmentExceptionSpec(CXXRecordDecl *ClassDecl) { CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); if (CXXMethodDecl *MoveAssign = LookupMovingAssignment(BaseClassDecl, - false, 0)) + 0, false, 0)) ExceptSpec.CalledDecl(Base->getLocStart(), MoveAssign); } @@ -8113,7 +7984,7 @@ Sema::ComputeDefaultedMoveAssignmentExceptionSpec(CXXRecordDecl *ClassDecl) { CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); if (CXXMethodDecl *MoveAssign = LookupMovingAssignment(BaseClassDecl, - false, 0)) + 0, false, 0)) ExceptSpec.CalledDecl(Base->getLocStart(), MoveAssign); } @@ -8121,10 +7992,12 @@ Sema::ComputeDefaultedMoveAssignmentExceptionSpec(CXXRecordDecl *ClassDecl) { FieldEnd = ClassDecl->field_end(); Field != FieldEnd; ++Field) { - QualType FieldType = Context.getBaseElementType((*Field)->getType()); + QualType FieldType = Context.getBaseElementType(Field->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { - if (CXXMethodDecl *MoveAssign = LookupMovingAssignment(FieldClassDecl, - false, 0)) + if (CXXMethodDecl *MoveAssign = + LookupMovingAssignment(FieldClassDecl, + FieldType.getCVRQualifiers(), + false, 0)) ExceptSpec.CalledDecl(Field->getLocation(), MoveAssign); } } @@ -8167,7 +8040,7 @@ hasMoveOrIsTriviallyCopyable(Sema &S, QualType Type, bool IsConstructor) { // reference types, are supposed to return false here, but that appears // to be a standard defect. CXXRecordDecl *ClassDecl = Type->getAsCXXRecordDecl(); - if (!ClassDecl) + if (!ClassDecl || !ClassDecl->getDefinition()) return true; if (Type.isTriviallyCopyableType(S.Context)) @@ -8209,7 +8082,7 @@ static bool subobjectsHaveMoveOrTrivialCopy(Sema &S, CXXRecordDecl *ClassDecl, for (CXXRecordDecl::field_iterator Field = ClassDecl->field_begin(), FieldEnd = ClassDecl->field_end(); Field != FieldEnd; ++Field) { - if (!hasMoveOrIsTriviallyCopyable(S, (*Field)->getType(), IsConstructor)) + if (!hasMoveOrIsTriviallyCopyable(S, Field->getType(), IsConstructor)) return false; } @@ -8244,22 +8117,17 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) { // Note: The following rules are largely analoguous to the move // constructor rules. - ImplicitExceptionSpecification Spec( - ComputeDefaultedMoveAssignmentExceptionSpec(ClassDecl)); - QualType ArgType = Context.getTypeDeclType(ClassDecl); QualType RetType = Context.getLValueReferenceType(ArgType); ArgType = Context.getRValueReferenceType(ArgType); // An implicitly-declared move assignment operator is an inline public // member of its class. - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); DeclarationName Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal); SourceLocation ClassLoc = ClassDecl->getLocation(); DeclarationNameInfo NameInfo(Name, ClassLoc); CXXMethodDecl *MoveAssignment - = CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, - Context.getFunctionType(RetType, &ArgType, 1, EPI), + = CXXMethodDecl::Create(Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/0, /*isStatic=*/false, /*StorageClassAsWritten=*/SC_None, /*isInline=*/true, @@ -8270,6 +8138,12 @@ CXXMethodDecl *Sema::DeclareImplicitMoveAssignment(CXXRecordDecl *ClassDecl) { MoveAssignment->setImplicit(); MoveAssignment->setTrivial(ClassDecl->hasTrivialMoveAssignment()); + // Build an exception specification pointing back at this member. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = MoveAssignment; + MoveAssignment->setType(Context.getFunctionType(RetType, &ArgType, 1, EPI)); + // Add the parameter to the operator. ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveAssignment, ClassLoc, ClassLoc, /*Id=*/0, @@ -8620,10 +8494,12 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, } } -std::pair -Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { +/// Determine whether an implicit copy constructor for ClassDecl has a const +/// argument. +/// FIXME: It ought to be possible to store this on the record. +static bool isImplicitCopyCtorArgConst(Sema &S, CXXRecordDecl *ClassDecl) { if (ClassDecl->isInvalidDecl()) - return std::make_pair(ImplicitExceptionSpecification(*this), false); + return true; // C++ [class.copy]p5: // The implicitly-declared copy constructor for a class X will @@ -8632,60 +8508,71 @@ Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { // X::X(const X&) // // if - // FIXME: It ought to be possible to store this on the record. - bool HasConstCopyConstructor = true; - // -- each direct or virtual base class B of X has a copy // constructor whose first parameter is of type const B& or // const volatile B&, and for (CXXRecordDecl::base_class_iterator Base = ClassDecl->bases_begin(), BaseEnd = ClassDecl->bases_end(); - HasConstCopyConstructor && Base != BaseEnd; - ++Base) { + Base != BaseEnd; ++Base) { // Virtual bases are handled below. if (Base->isVirtual()) continue; - + CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); - HasConstCopyConstructor &= - (bool)LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const); + // FIXME: This lookup is wrong. If the copy ctor for a member or base is + // ambiguous, we should still produce a constructor with a const-qualified + // parameter. + if (!S.LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const)) + return false; } for (CXXRecordDecl::base_class_iterator Base = ClassDecl->vbases_begin(), BaseEnd = ClassDecl->vbases_end(); - HasConstCopyConstructor && Base != BaseEnd; - ++Base) { + Base != BaseEnd; ++Base) { CXXRecordDecl *BaseClassDecl = cast(Base->getType()->getAs()->getDecl()); - HasConstCopyConstructor &= - (bool)LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const); + if (!S.LookupCopyingConstructor(BaseClassDecl, Qualifiers::Const)) + return false; } - + // -- for all the nonstatic data members of X that are of a // class type M (or array thereof), each such class type // has a copy constructor whose first parameter is of type // const M& or const volatile M&. for (CXXRecordDecl::field_iterator Field = ClassDecl->field_begin(), FieldEnd = ClassDecl->field_end(); - HasConstCopyConstructor && Field != FieldEnd; - ++Field) { - QualType FieldType = Context.getBaseElementType((*Field)->getType()); + Field != FieldEnd; ++Field) { + QualType FieldType = S.Context.getBaseElementType(Field->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { - HasConstCopyConstructor &= - (bool)LookupCopyingConstructor(FieldClassDecl, Qualifiers::Const); + if (!S.LookupCopyingConstructor(FieldClassDecl, Qualifiers::Const)) + return false; } } + // Otherwise, the implicitly declared copy constructor will have // the form // // X::X(X&) - + + return true; +} + +Sema::ImplicitExceptionSpecification +Sema::ComputeDefaultedCopyCtorExceptionSpec(CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + + ImplicitExceptionSpecification ExceptSpec(*this); + if (ClassDecl->isInvalidDecl()) + return ExceptSpec; + + const FunctionProtoType *T = MD->getType()->castAs(); + assert(T->getNumArgs() >= 1 && "not a copy ctor"); + unsigned Quals = T->getArgType(0).getNonReferenceType().getCVRQualifiers(); + // C++ [except.spec]p14: // An implicitly declared special member function (Clause 12) shall have an // exception-specification. [...] - ImplicitExceptionSpecification ExceptSpec(*this); - unsigned Quals = HasConstCopyConstructor? Qualifiers::Const : 0; for (CXXRecordDecl::base_class_iterator Base = ClassDecl->bases_begin(), BaseEnd = ClassDecl->bases_end(); Base != BaseEnd; @@ -8714,15 +8601,16 @@ Sema::ComputeDefaultedCopyCtorExceptionSpecAndConst(CXXRecordDecl *ClassDecl) { FieldEnd = ClassDecl->field_end(); Field != FieldEnd; ++Field) { - QualType FieldType = Context.getBaseElementType((*Field)->getType()); + QualType FieldType = Context.getBaseElementType(Field->getType()); if (CXXRecordDecl *FieldClassDecl = FieldType->getAsCXXRecordDecl()) { if (CXXConstructorDecl *CopyConstructor = - LookupCopyingConstructor(FieldClassDecl, Quals)) + LookupCopyingConstructor(FieldClassDecl, + Quals | FieldType.getCVRQualifiers())) ExceptSpec.CalledDecl(Field->getLocation(), CopyConstructor); } } - return std::make_pair(ExceptSpec, HasConstCopyConstructor); + return ExceptSpec; } CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( @@ -8731,18 +8619,16 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( // If the class definition does not explicitly declare a copy // constructor, one is declared implicitly. - ImplicitExceptionSpecification Spec(*this); - bool Const; - llvm::tie(Spec, Const) = - ComputeDefaultedCopyCtorExceptionSpecAndConst(ClassDecl); - QualType ClassType = Context.getTypeDeclType(ClassDecl); QualType ArgType = ClassType; + bool Const = isImplicitCopyCtorArgConst(*this, ClassDecl); if (Const) ArgType = ArgType.withConst(); ArgType = Context.getLValueReferenceType(ArgType); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); + + bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl, + CXXCopyConstructor, + Const); DeclarationName Name = Context.DeclarationNames.getCXXConstructorName( @@ -8753,15 +8639,20 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( // An implicitly-declared copy constructor is an inline public // member of its class. CXXConstructorDecl *CopyConstructor = CXXConstructorDecl::Create( - Context, ClassDecl, ClassLoc, NameInfo, - Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI), /*TInfo=*/0, + Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/0, /*isExplicit=*/false, /*isInline=*/true, /*isImplicitlyDeclared=*/true, - /*isConstexpr=*/ClassDecl->defaultedCopyConstructorIsConstexpr() && - getLangOpts().CPlusPlus0x); + Constexpr); CopyConstructor->setAccess(AS_public); CopyConstructor->setDefaulted(); CopyConstructor->setTrivial(ClassDecl->hasTrivialCopyConstructor()); + // Build an exception specification pointing back at this member. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = CopyConstructor; + CopyConstructor->setType( + Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI)); + // Note that we have declared this constructor. ++ASTContext::NumImplicitCopyConstructorsDeclared; @@ -8825,7 +8716,9 @@ void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation, } Sema::ImplicitExceptionSpecification -Sema::ComputeDefaultedMoveCtorExceptionSpec(CXXRecordDecl *ClassDecl) { +Sema::ComputeDefaultedMoveCtorExceptionSpec(CXXMethodDecl *MD) { + CXXRecordDecl *ClassDecl = MD->getParent(); + // C++ [except.spec]p14: // An implicitly declared special member function (Clause 12) shall have an // exception-specification. [...] @@ -8842,7 +8735,8 @@ Sema::ComputeDefaultedMoveCtorExceptionSpec(CXXRecordDecl *ClassDecl) { if (const RecordType *BaseType = B->getType()->getAs()) { CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); - CXXConstructorDecl *Constructor = LookupMovingConstructor(BaseClassDecl); + CXXConstructorDecl *Constructor = + LookupMovingConstructor(BaseClassDecl, 0); // If this is a deleted function, add it anyway. This might be conformant // with the standard. This might not. I'm not sure. It might not matter. if (Constructor) @@ -8856,7 +8750,8 @@ Sema::ComputeDefaultedMoveCtorExceptionSpec(CXXRecordDecl *ClassDecl) { B != BEnd; ++B) { if (const RecordType *BaseType = B->getType()->getAs()) { CXXRecordDecl *BaseClassDecl = cast(BaseType->getDecl()); - CXXConstructorDecl *Constructor = LookupMovingConstructor(BaseClassDecl); + CXXConstructorDecl *Constructor = + LookupMovingConstructor(BaseClassDecl, 0); // If this is a deleted function, add it anyway. This might be conformant // with the standard. This might not. I'm not sure. It might not matter. if (Constructor) @@ -8868,10 +8763,10 @@ Sema::ComputeDefaultedMoveCtorExceptionSpec(CXXRecordDecl *ClassDecl) { for (RecordDecl::field_iterator F = ClassDecl->field_begin(), FEnd = ClassDecl->field_end(); F != FEnd; ++F) { - if (const RecordType *RecordTy - = Context.getBaseElementType(F->getType())->getAs()) { - CXXRecordDecl *FieldRecDecl = cast(RecordTy->getDecl()); - CXXConstructorDecl *Constructor = LookupMovingConstructor(FieldRecDecl); + QualType FieldType = Context.getBaseElementType(F->getType()); + if (CXXRecordDecl *FieldRecDecl = FieldType->getAsCXXRecordDecl()) { + CXXConstructorDecl *Constructor = + LookupMovingConstructor(FieldRecDecl, FieldType.getCVRQualifiers()); // If this is a deleted function, add it anyway. This might be conformant // with the standard. This might not. I'm not sure. It might not matter. // In particular, the problem is that this function never gets called. It @@ -8906,13 +8801,12 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor( return 0; } - ImplicitExceptionSpecification Spec( - ComputeDefaultedMoveCtorExceptionSpec(ClassDecl)); - QualType ClassType = Context.getTypeDeclType(ClassDecl); QualType ArgType = Context.getRValueReferenceType(ClassType); - - FunctionProtoType::ExtProtoInfo EPI = Spec.getEPI(); + + bool Constexpr = defaultedSpecialMemberIsConstexpr(*this, ClassDecl, + CXXMoveConstructor, + false); DeclarationName Name = Context.DeclarationNames.getCXXConstructorName( @@ -8924,15 +8818,20 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor( // An implicitly-declared copy/move constructor is an inline public // member of its class. CXXConstructorDecl *MoveConstructor = CXXConstructorDecl::Create( - Context, ClassDecl, ClassLoc, NameInfo, - Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI), /*TInfo=*/0, + Context, ClassDecl, ClassLoc, NameInfo, QualType(), /*TInfo=*/0, /*isExplicit=*/false, /*isInline=*/true, /*isImplicitlyDeclared=*/true, - /*isConstexpr=*/ClassDecl->defaultedMoveConstructorIsConstexpr() && - getLangOpts().CPlusPlus0x); + Constexpr); MoveConstructor->setAccess(AS_public); MoveConstructor->setDefaulted(); MoveConstructor->setTrivial(ClassDecl->hasTrivialMoveConstructor()); + // Build an exception specification pointing back at this member. + FunctionProtoType::ExtProtoInfo EPI; + EPI.ExceptionSpecType = EST_Unevaluated; + EPI.ExceptionSpecDecl = MoveConstructor; + MoveConstructor->setType( + Context.getFunctionType(Context.VoidTy, &ArgType, 1, EPI)); + // Add the parameter to the constructor. ParmVarDecl *FromParam = ParmVarDecl::Create(Context, MoveConstructor, ClassLoc, ClassLoc, @@ -9046,8 +8945,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( // will fill in the actual details. Invoke->setUsed(); Invoke->setReferenced(); - Invoke->setBody(new (Context) CompoundStmt(Context, 0, 0, Conv->getLocation(), - Conv->getLocation())); + Invoke->setBody(new (Context) CompoundStmt(Conv->getLocation())); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(Conv); @@ -9171,13 +9069,6 @@ Sema::BuildCXXConstructExpr(SourceLocation ConstructLoc, QualType DeclInitType, unsigned NumExprs = ExprArgs.size(); Expr **Exprs = (Expr **)ExprArgs.release(); - for (specific_attr_iterator - i = Constructor->specific_attr_begin(), - e = Constructor->specific_attr_end(); i != e; ++i) { - const NonNullAttr *NonNull = *i; - CheckNonNullArguments(NonNull, ExprArgs.get(), ConstructLoc); - } - MarkFunctionReferenced(ConstructLoc, Constructor); return Owned(CXXConstructExpr::Create(Context, DeclInitType, ConstructLoc, Constructor, Elidable, Exprs, NumExprs, @@ -9243,7 +9134,7 @@ void Sema::FinalizeVarWithDestructor(VarDecl *VD, const RecordType *Record) { bool Sema::CompleteConstructorCall(CXXConstructorDecl *Constructor, MultiExprArg ArgsPtr, - SourceLocation Loc, + SourceLocation Loc, ASTOwningVector &ConvertedArgs, bool AllowExplicit) { // FIXME: This duplicates a lot of code from Sema::ConvertArgumentsForCall. @@ -9271,7 +9162,8 @@ Sema::CompleteConstructorCall(CXXConstructorDecl *Constructor, DiagnoseSentinelCalls(Constructor, Loc, AllArgs.data(), AllArgs.size()); - // FIXME: Missing call to CheckFunctionCall or equivalent + CheckConstructorCall(Constructor, AllArgs.data(), AllArgs.size(), + Proto, Loc); return Invalid; } @@ -9329,7 +9221,7 @@ CheckOperatorNewDeleteTypes(Sema &SemaRef, const FunctionDecl *FnDecl, diag::err_operator_new_delete_too_few_parameters) << FnDecl->getDeclName(); - // Check the the first parameter type is not dependent. + // Check the first parameter type is not dependent. QualType FirstParamType = FnDecl->getParamDecl(0)->getType(); if (FirstParamType->isDependentType()) return SemaRef.Diag(FnDecl->getLocation(), DependentParamTypeDiag) @@ -9568,7 +9460,7 @@ bool Sema::CheckLiteralOperatorDeclaration(FunctionDecl *FnDecl) { TemplateParameterList *Params = TpDecl->getTemplateParameters(); if (Params->size() == 1) { NonTypeTemplateParmDecl *PmDecl = - cast(Params->getParam(0)); + dyn_cast(Params->getParam(0)); // The template parameter must be a char parameter pack. if (PmDecl && PmDecl->isTemplateParameterPack() && @@ -9769,7 +9661,8 @@ VarDecl *Sema::BuildExceptionDeclaration(Scope *S, Diag(Loc, diag::err_objc_object_catch); Invalid = true; } else if (T->isObjCObjectPointerType()) { - if (!getLangOpts().ObjCNonFragileABI) + // FIXME: should this be a test for macosx-fragile specifically? + if (getLangOpts().ObjCRuntime.isFragile()) Diag(Loc, diag::warn_objc_pointer_cxx_catch_fragile); } } @@ -9881,37 +9774,49 @@ Decl *Sema::ActOnExceptionDeclarator(Scope *S, Declarator &D) { Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc, Expr *AssertExpr, - Expr *AssertMessageExpr_, + Expr *AssertMessageExpr, SourceLocation RParenLoc) { - StringLiteral *AssertMessage = cast(AssertMessageExpr_); + StringLiteral *AssertMessage = cast(AssertMessageExpr); + + if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression)) + return 0; - if (!AssertExpr->isTypeDependent() && !AssertExpr->isValueDependent()) { + return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr, + AssertMessage, RParenLoc, false); +} + +Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc, + Expr *AssertExpr, + StringLiteral *AssertMessage, + SourceLocation RParenLoc, + bool Failed) { + if (!AssertExpr->isTypeDependent() && !AssertExpr->isValueDependent() && + !Failed) { // In a static_assert-declaration, the constant-expression shall be a // constant expression that can be contextually converted to bool. ExprResult Converted = PerformContextuallyConvertToBool(AssertExpr); if (Converted.isInvalid()) - return 0; + Failed = true; llvm::APSInt Cond; - if (VerifyIntegerConstantExpression(Converted.get(), &Cond, - PDiag(diag::err_static_assert_expression_is_not_constant), + if (!Failed && VerifyIntegerConstantExpression(Converted.get(), &Cond, + diag::err_static_assert_expression_is_not_constant, /*AllowFold=*/false).isInvalid()) - return 0; + Failed = true; - if (!Cond) { + if (!Failed && !Cond) { llvm::SmallString<256> MsgBuffer; llvm::raw_svector_ostream Msg(MsgBuffer); AssertMessage->printPretty(Msg, Context, 0, getPrintingPolicy()); Diag(StaticAssertLoc, diag::err_static_assert_failed) << Msg.str() << AssertExpr->getSourceRange(); + Failed = true; } } - if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression)) - return 0; - Decl *Decl = StaticAssertDecl::Create(Context, CurContext, StaticAssertLoc, - AssertExpr, AssertMessage, RParenLoc); + AssertExpr, AssertMessage, RParenLoc, + Failed); CurContext->addDecl(Decl); return Decl; @@ -10116,7 +10021,7 @@ Decl *Sema::ActOnTemplatedFriendTag(Scope *S, SourceLocation FriendLoc, /// friend class A::B; /// We permit this as a special case; if there are any template /// parameters present at all, require proper matching, i.e. -/// template <> template friend class A::B; +/// template <> template \ friend class A::B; Decl *Sema::ActOnFriendTypeDecl(Scope *S, const DeclSpec &DS, MultiTemplateParamsArg TempParams) { SourceLocation Loc = DS.getLocStart(); @@ -10438,9 +10343,11 @@ Decl *Sema::ActOnFriendFunctionDecl(Scope *S, Declarator &D, FrD->setAccess(AS_public); CurContext->addDecl(FrD); - if (ND->isInvalidDecl()) + if (ND->isInvalidDecl()) { FrD->setInvalidDecl(); - else { + } else { + if (DC->isRecord()) CheckFriendAccess(ND); + FunctionDecl *FD; if (FunctionTemplateDecl *FTD = dyn_cast(ND)) FD = FTD->getTemplatedDecl(); @@ -10464,8 +10371,13 @@ void Sema::SetDeclDeleted(Decl *Dcl, SourceLocation DelLoc) { return; } if (const FunctionDecl *Prev = Fn->getPreviousDecl()) { - Diag(DelLoc, diag::err_deleted_decl_not_first); - Diag(Prev->getLocation(), diag::note_previous_declaration); + // Don't consider the implicit declaration we generate for explicit + // specializations. FIXME: Do not generate these implicit declarations. + if ((Prev->getTemplateSpecializationKind() != TSK_ExplicitSpecialization + || Prev->getPreviousDecl()) && !Prev->isDefined()) { + Diag(DelLoc, diag::err_deleted_decl_not_first); + Diag(Prev->getLocation(), diag::note_previous_declaration); + } // If the declaration wasn't the first, we delete the function anyway for // recovery. } @@ -10531,10 +10443,11 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) { if (Primary == Primary->getCanonicalDecl()) return; + CheckExplicitlyDefaultedSpecialMember(MD); + switch (Member) { case CXXDefaultConstructor: { CXXConstructorDecl *CD = cast(MD); - CheckExplicitlyDefaultedDefaultConstructor(CD); if (!CD->isInvalidDecl()) DefineImplicitDefaultConstructor(DefaultLoc, CD); break; @@ -10542,14 +10455,12 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) { case CXXCopyConstructor: { CXXConstructorDecl *CD = cast(MD); - CheckExplicitlyDefaultedCopyConstructor(CD); if (!CD->isInvalidDecl()) DefineImplicitCopyConstructor(DefaultLoc, CD); break; } case CXXCopyAssignment: { - CheckExplicitlyDefaultedCopyAssignment(MD); if (!MD->isInvalidDecl()) DefineImplicitCopyAssignment(DefaultLoc, MD); break; @@ -10557,7 +10468,6 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) { case CXXDestructor: { CXXDestructorDecl *DD = cast(MD); - CheckExplicitlyDefaultedDestructor(DD); if (!DD->isInvalidDecl()) DefineImplicitDestructor(DefaultLoc, DD); break; @@ -10565,14 +10475,12 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) { case CXXMoveConstructor: { CXXConstructorDecl *CD = cast(MD); - CheckExplicitlyDefaultedMoveConstructor(CD); if (!CD->isInvalidDecl()) DefineImplicitMoveConstructor(DefaultLoc, CD); break; } case CXXMoveAssignment: { - CheckExplicitlyDefaultedMoveAssignment(MD); if (!MD->isInvalidDecl()) DefineImplicitMoveAssignment(DefaultLoc, MD); break; @@ -10650,8 +10558,8 @@ bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New, if (const RecordType *RT = NewClassTy->getAs()) { if (!RT->isBeingDefined() && RequireCompleteType(New->getLocation(), NewClassTy, - PDiag(diag::err_covariant_return_incomplete) - << New->getDeclName())) + diag::err_covariant_return_incomplete, + New->getDeclName())) return true; } @@ -10857,7 +10765,7 @@ bool Sema::DefineUsedVTables() { // Note: The VTableUses vector could grow as a result of marking // the members of a class as "used", so we check the size each - // time through the loop and prefer indices (with are stable) to + // time through the loop and prefer indices (which are stable) to // iterators (which are not). bool DefinedAnything = false; for (unsigned I = 0; I != VTableUses.size(); ++I) { @@ -10867,6 +10775,8 @@ bool Sema::DefineUsedVTables() { SourceLocation Loc = VTableUses[I].second; + bool DefineVTable = true; + // If this class has a key function, but that key function is // defined in another translation unit, we don't need to emit the // vtable even though we're using it. @@ -10877,7 +10787,8 @@ bool Sema::DefineUsedVTables() { case TSK_ExplicitSpecialization: case TSK_ExplicitInstantiationDeclaration: // The key function is in another translation unit. - continue; + DefineVTable = false; + break; case TSK_ExplicitInstantiationDefinition: case TSK_ImplicitInstantiation: @@ -10906,7 +10817,15 @@ bool Sema::DefineUsedVTables() { } if (IsExplicitInstantiationDeclaration) - continue; + DefineVTable = false; + } + + // The exception specifications for all virtual members may be needed even + // if we are not providing an authoritative form of the vtable in this TU. + // We may choose to emit it available_externally anyway. + if (!DefineVTable) { + MarkVirtualMemberExceptionSpecsNeeded(Loc, Class); + continue; } // Mark all of the virtual members of this class as referenced, so @@ -10935,16 +10854,33 @@ bool Sema::DefineUsedVTables() { return DefinedAnything; } +void Sema::MarkVirtualMemberExceptionSpecsNeeded(SourceLocation Loc, + const CXXRecordDecl *RD) { + for (CXXRecordDecl::method_iterator I = RD->method_begin(), + E = RD->method_end(); I != E; ++I) + if ((*I)->isVirtual() && !(*I)->isPure()) + ResolveExceptionSpec(Loc, (*I)->getType()->castAs()); +} + void Sema::MarkVirtualMembersReferenced(SourceLocation Loc, const CXXRecordDecl *RD) { - for (CXXRecordDecl::method_iterator i = RD->method_begin(), - e = RD->method_end(); i != e; ++i) { - CXXMethodDecl *MD = *i; + // Mark all functions which will appear in RD's vtable as used. + CXXFinalOverriderMap FinalOverriders; + RD->getFinalOverriders(FinalOverriders); + for (CXXFinalOverriderMap::const_iterator I = FinalOverriders.begin(), + E = FinalOverriders.end(); + I != E; ++I) { + for (OverridingMethods::const_iterator OI = I->second.begin(), + OE = I->second.end(); + OI != OE; ++OI) { + assert(OI->second.size() > 0 && "no final overrider"); + CXXMethodDecl *Overrider = OI->second.front().Method; - // C++ [basic.def.odr]p2: - // [...] A virtual member function is used if it is not pure. [...] - if (MD->isVirtual() && !MD->isPure()) - MarkFunctionReferenced(Loc, MD); + // C++ [basic.def.odr]p2: + // [...] A virtual member function is used if it is not pure. [...] + if (!Overrider->isPure()) + MarkFunctionReferenced(Loc, Overrider); + } } // Only classes that have virtual bases need a VTT. @@ -11162,8 +11098,8 @@ bool Sema::checkThisInStaticMemberFunctionExceptionSpec(CXXMethodDecl *Method) { switch (Proto->getExceptionSpecType()) { case EST_Uninstantiated: + case EST_Unevaluated: case EST_BasicNoexcept: - case EST_Delayed: case EST_DynamicNone: case EST_MSAny: case EST_None: @@ -11290,7 +11226,7 @@ Sema::checkExceptionSpecification(ExceptionSpecificationType EST, if (!NoexceptExpr->isValueDependent()) NoexceptExpr = VerifyIntegerConstantExpression(NoexceptExpr, 0, - PDiag(diag::err_noexcept_needs_constant_expression), + diag::err_noexcept_needs_constant_expression, /*AllowFold*/ false).take(); EPI.NoexceptExpr = NoexceptExpr; } diff --git a/lib/Sema/SemaDeclObjC.cpp b/lib/Sema/SemaDeclObjC.cpp index a942d49..9da4d69 100644 --- a/lib/Sema/SemaDeclObjC.cpp +++ b/lib/Sema/SemaDeclObjC.cpp @@ -173,10 +173,11 @@ void Sema::CheckObjCMethodOverride(ObjCMethodDecl *NewMethod, Diag(Overridden->getLocation(), diag::note_previous_decl) << "method"; } - ObjCMethodDecl::param_const_iterator oi = Overridden->param_begin(); + ObjCMethodDecl::param_const_iterator oi = Overridden->param_begin(), + oe = Overridden->param_end(); for (ObjCMethodDecl::param_iterator ni = NewMethod->param_begin(), ne = NewMethod->param_end(); - ni != ne; ++ni, ++oi) { + ni != ne && oi != oe; ++ni, ++oi) { const ParmVarDecl *oldDecl = (*oi); ParmVarDecl *newDecl = (*ni); if (newDecl->hasAttr() != @@ -196,7 +197,6 @@ static bool CheckARCMethodDecl(Sema &S, ObjCMethodDecl *method) { ObjCMethodFamily family = method->getMethodFamily(); switch (family) { case OMF_None: - case OMF_dealloc: case OMF_finalize: case OMF_retain: case OMF_release: @@ -206,6 +206,24 @@ static bool CheckARCMethodDecl(Sema &S, ObjCMethodDecl *method) { case OMF_performSelector: return false; + case OMF_dealloc: + if (!S.Context.hasSameType(method->getResultType(), S.Context.VoidTy)) { + SourceRange ResultTypeRange; + if (const TypeSourceInfo *ResultTypeInfo + = method->getResultTypeSourceInfo()) + ResultTypeRange = ResultTypeInfo->getTypeLoc().getSourceRange(); + if (ResultTypeRange.isInvalid()) + S.Diag(method->getLocation(), diag::error_dealloc_bad_result_type) + << method->getResultType() + << FixItHint::CreateInsertion(method->getSelectorLoc(0), "(void)"); + else + S.Diag(method->getLocation(), diag::error_dealloc_bad_result_type) + << method->getResultType() + << FixItHint::CreateReplacement(ResultTypeRange, "void"); + return true; + } + return false; + case OMF_init: // If the method doesn't obey the init rules, don't bother annotating it. if (S.checkInitMethod(method, QualType())) @@ -267,9 +285,9 @@ void Sema::AddAnyMethodToGlobalPool(Decl *D) { /// ActOnStartOfObjCMethodDef - This routine sets up parameters; invisible /// and user declared, in the method definition's AST. void Sema::ActOnStartOfObjCMethodDef(Scope *FnBodyScope, Decl *D) { - assert(getCurMethodDecl() == 0 && "Method parsing confused"); + assert((getCurMethodDecl() == 0) && "Methodparsing confused"); ObjCMethodDecl *MDecl = dyn_cast_or_null(D); - + // If we don't have a valid method decl, simply return. if (!MDecl) return; @@ -338,11 +356,11 @@ void Sema::ActOnStartOfObjCMethodDef(Scope *FnBodyScope, Decl *D) { // Finally, in ActOnFinishFunctionBody() (SemaDecl), warn if flag is set. // Only do this if the current class actually has a superclass. if (IC->getSuperClass()) { - ObjCShouldCallSuperDealloc = + getCurFunction()->ObjCShouldCallSuperDealloc = !(Context.getLangOpts().ObjCAutoRefCount || Context.getLangOpts().getGC() == LangOptions::GCOnly) && MDecl->getMethodFamily() == OMF_dealloc; - ObjCShouldCallSuperFinalize = + getCurFunction()->ObjCShouldCallSuperFinalize = Context.getLangOpts().getGC() != LangOptions::NonGC && MDecl->getMethodFamily() == OMF_finalize; } @@ -474,11 +492,11 @@ ActOnStartClassInterface(SourceLocation AtInterfaceLoc, Diag(SuperLoc, diag::err_undef_superclass) << SuperName << ClassName << SourceRange(AtInterfaceLoc, ClassLoc); else if (RequireCompleteType(SuperLoc, - Context.getObjCInterfaceType(SuperClassDecl), - PDiag(diag::err_forward_superclass) - << SuperClassDecl->getDeclName() - << ClassName - << SourceRange(AtInterfaceLoc, ClassLoc))) { + Context.getObjCInterfaceType(SuperClassDecl), + diag::err_forward_superclass, + SuperClassDecl->getDeclName(), + ClassName, + SourceRange(AtInterfaceLoc, ClassLoc))) { SuperClassDecl = 0; } } @@ -501,13 +519,13 @@ ActOnStartClassInterface(SourceLocation AtInterfaceLoc, return ActOnObjCContainerStartDefinition(IDecl); } -/// ActOnCompatiblityAlias - this action is called after complete parsing of -/// @compatibility_alias declaration. It sets up the alias relationships. -Decl *Sema::ActOnCompatiblityAlias(SourceLocation AtLoc, - IdentifierInfo *AliasName, - SourceLocation AliasLocation, - IdentifierInfo *ClassName, - SourceLocation ClassLocation) { +/// ActOnCompatibilityAlias - this action is called after complete parsing of +/// a \@compatibility_alias declaration. It sets up the alias relationships. +Decl *Sema::ActOnCompatibilityAlias(SourceLocation AtLoc, + IdentifierInfo *AliasName, + SourceLocation AliasLocation, + IdentifierInfo *ClassName, + SourceLocation ClassLocation) { // Look for previous declaration of alias name NamedDecl *ADecl = LookupSingleName(TUScope, AliasName, AliasLocation, LookupOrdinaryName, ForRedeclaration); @@ -712,7 +730,7 @@ void Sema::DiagnoseClassExtensionDupMethods(ObjCCategoryDecl *CAT, } } -/// ActOnForwardProtocolDeclaration - Handle @protocol foo; +/// ActOnForwardProtocolDeclaration - Handle \@protocol foo; Sema::DeclGroupPtrTy Sema::ActOnForwardProtocolDeclaration(SourceLocation AtProtocolLoc, const IdentifierLocPair *IdentList, @@ -759,8 +777,8 @@ ActOnStartCategoryInterface(SourceLocation AtInterfaceLoc, if (!IDecl || RequireCompleteType(ClassLoc, Context.getObjCInterfaceType(IDecl), - PDiag(diag::err_category_forward_interface) - << (CategoryName == 0))) { + diag::err_category_forward_interface, + CategoryName == 0)) { // Create an invalid ObjCCategoryDecl to serve as context for // the enclosing method declarations. We mark the decl invalid // to make it clear that this isn't a valid AST. @@ -1019,8 +1037,8 @@ void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, ObjCInterfaceDecl* IDecl = ImpDecl->getClassInterface(); if (!IDecl) return; - /// Check case of non-existing @interface decl. - /// (legacy objective-c @implementation decl without an @interface decl). + /// Check case of non-existing \@interface decl. + /// (legacy objective-c \@implementation decl without an \@interface decl). /// Add implementations's ivar to the synthesize class's ivar list. if (IDecl->isImplicitInterfaceDecl()) { IDecl->setEndOfDefinitionLoc(RBrace); @@ -1038,7 +1056,7 @@ void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, return; assert(ivars && "missing @implementation ivars"); - if (LangOpts.ObjCNonFragileABI2) { + if (LangOpts.ObjCRuntime.isNonFragile()) { if (ImpDecl->getSuperClass()) Diag(ImpDecl->getLocation(), diag::warn_on_superclass_use); for (unsigned i = 0; i < numIvars; i++) { @@ -1094,7 +1112,7 @@ void Sema::CheckImplementationIvars(ObjCImplementationDecl *ImpDecl, if (numIvars > 0) Diag(ivars[j]->getLocation(), diag::err_inconsistant_ivar_count); else if (IVI != IVE) - Diag((*IVI)->getLocation(), diag::err_inconsistant_ivar_count); + Diag(IVI->getLocation(), diag::err_inconsistant_ivar_count); } void Sema::WarnUndefinedMethod(SourceLocation ImpLoc, ObjCMethodDecl *method, @@ -1399,8 +1417,9 @@ void Sema::WarnConflictingTypedMethods(ObjCMethodDecl *ImpMethodDecl, true); for (ObjCMethodDecl::param_iterator IM = ImpMethodDecl->param_begin(), - IF = MethodDecl->param_begin(), EM = ImpMethodDecl->param_end(); - IM != EM; ++IM, ++IF) { + IF = MethodDecl->param_begin(), EM = ImpMethodDecl->param_end(), + EF = MethodDecl->param_end(); + IM != EM && IF != EF; ++IM, ++IF) { CheckMethodOverrideParam(*this, ImpMethodDecl, MethodDecl, *IM, *IF, IsProtocolMethodDecl, false, true); } @@ -1421,8 +1440,9 @@ void Sema::CheckConflictingOverridingMethod(ObjCMethodDecl *Method, true); for (ObjCMethodDecl::param_iterator IM = Method->param_begin(), - IF = Overridden->param_begin(), EM = Method->param_end(); - IM != EM; ++IM, ++IF) { + IF = Overridden->param_begin(), EM = Method->param_end(), + EF = Overridden->param_end(); + IM != EM && IF != EF; ++IM, ++IF) { CheckMethodOverrideParam(*this, Method, Overridden, *IM, *IF, IsProtocolMethodDecl, true, true); } @@ -1454,8 +1474,9 @@ void Sema::WarnExactTypedMethods(ObjCMethodDecl *ImpMethodDecl, IsProtocolMethodDecl, false, false); if (match) for (ObjCMethodDecl::param_iterator IM = ImpMethodDecl->param_begin(), - IF = MethodDecl->param_begin(), EM = ImpMethodDecl->param_end(); - IM != EM; ++IM, ++IF) { + IF = MethodDecl->param_begin(), EM = ImpMethodDecl->param_end(), + EF = MethodDecl->param_end(); + IM != EM && IF != EF; ++IM, ++IF) { match = CheckMethodOverrideParam(*this, ImpMethodDecl, MethodDecl, *IM, *IF, IsProtocolMethodDecl, false, false); @@ -1487,8 +1508,8 @@ void Sema::WarnExactTypedMethods(ObjCMethodDecl *ImpMethodDecl, void Sema::CheckProtocolMethodDefs(SourceLocation ImpLoc, ObjCProtocolDecl *PDecl, bool& IncompleteImpl, - const llvm::DenseSet &InsMap, - const llvm::DenseSet &ClsMap, + const SelectorSet &InsMap, + const SelectorSet &ClsMap, ObjCContainerDecl *CDecl) { ObjCCategoryDecl *C = dyn_cast(CDecl); ObjCInterfaceDecl *IDecl = C ? C->getClassInterface() @@ -1497,7 +1518,7 @@ void Sema::CheckProtocolMethodDefs(SourceLocation ImpLoc, ObjCInterfaceDecl *Super = IDecl->getSuperClass(); ObjCInterfaceDecl *NSIDecl = 0; - if (getLangOpts().NeXTRuntime) { + if (getLangOpts().ObjCRuntime.isNeXTFamily()) { // check to see if class implements forwardInvocation method and objects // of this class are derived from 'NSProxy' so that to forward requests // from one object to another. @@ -1584,10 +1605,10 @@ void Sema::CheckProtocolMethodDefs(SourceLocation ImpLoc, /// MatchAllMethodDeclarations - Check methods declared in interface /// or protocol against those declared in their implementations. /// -void Sema::MatchAllMethodDeclarations(const llvm::DenseSet &InsMap, - const llvm::DenseSet &ClsMap, - llvm::DenseSet &InsMapSeen, - llvm::DenseSet &ClsMapSeen, +void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap, + const SelectorSet &ClsMap, + SelectorSet &InsMapSeen, + SelectorSet &ClsMapSeen, ObjCImplDecl* IMPDecl, ObjCContainerDecl* CDecl, bool &IncompleteImpl, @@ -1683,7 +1704,7 @@ void Sema::MatchAllMethodDeclarations(const llvm::DenseSet &InsMap, /// warns each time an exact match is found. void Sema::CheckCategoryVsClassMethodMatches( ObjCCategoryImplDecl *CatIMPDecl) { - llvm::DenseSet InsMap, ClsMap; + SelectorSet InsMap, ClsMap; for (ObjCImplementationDecl::instmeth_iterator I = CatIMPDecl->instmeth_begin(), @@ -1704,7 +1725,7 @@ void Sema::CheckCategoryVsClassMethodMatches( ObjCInterfaceDecl *IDecl = CatDecl->getClassInterface(); if (!IDecl) return; - llvm::DenseSet InsMapSeen, ClsMapSeen; + SelectorSet InsMapSeen, ClsMapSeen; bool IncompleteImpl = false; MatchAllMethodDeclarations(InsMap, ClsMap, InsMapSeen, ClsMapSeen, CatIMPDecl, IDecl, @@ -1715,7 +1736,7 @@ void Sema::CheckCategoryVsClassMethodMatches( void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl, ObjCContainerDecl* CDecl, bool IncompleteImpl) { - llvm::DenseSet InsMap; + SelectorSet InsMap; // Check and see if instance methods in class interface have been // implemented in the implementation class. for (ObjCImplementationDecl::instmeth_iterator @@ -1726,11 +1747,12 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl, // an implementation or 2) there is a @synthesize/@dynamic implementation // of the property in the @implementation. if (const ObjCInterfaceDecl *IDecl = dyn_cast(CDecl)) - if (!(LangOpts.ObjCDefaultSynthProperties && LangOpts.ObjCNonFragileABI2) || - IDecl->isObjCRequiresPropertyDefs()) + if (!(LangOpts.ObjCDefaultSynthProperties && + LangOpts.ObjCRuntime.isNonFragile()) || + IDecl->isObjCRequiresPropertyDefs()) DiagnoseUnimplementedProperties(S, IMPDecl, CDecl, InsMap); - llvm::DenseSet ClsMap; + SelectorSet ClsMap; for (ObjCImplementationDecl::classmeth_iterator I = IMPDecl->classmeth_begin(), E = IMPDecl->classmeth_end(); I != E; ++I) @@ -1738,7 +1760,7 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl, // Check for type conflict of methods declared in a class/protocol and // its implementation; if any. - llvm::DenseSet InsMapSeen, ClsMapSeen; + SelectorSet InsMapSeen, ClsMapSeen; MatchAllMethodDeclarations(InsMap, ClsMap, InsMapSeen, ClsMapSeen, IMPDecl, CDecl, IncompleteImpl, true); @@ -1954,9 +1976,10 @@ bool Sema::MatchTwoMethodDeclarations(const ObjCMethodDecl *left, return false; ObjCMethodDecl::param_const_iterator - li = left->param_begin(), le = left->param_end(), ri = right->param_begin(); + li = left->param_begin(), le = left->param_end(), ri = right->param_begin(), + re = right->param_end(); - for (; li != le; ++li, ++ri) { + for (; li != le && ri != re; ++li, ++ri) { assert(ri != right->param_end() && "Param mismatch"); const ParmVarDecl *lparm = *li, *rparm = *ri; @@ -2140,53 +2163,16 @@ ObjCMethodDecl *Sema::LookupImplementedMethodInGlobalPool(Selector Sel) { return 0; } -/// CompareMethodParamsInBaseAndSuper - This routine compares methods with -/// identical selector names in current and its super classes and issues -/// a warning if any of their argument types are incompatible. -void Sema::CompareMethodParamsInBaseAndSuper(Decl *ClassDecl, - ObjCMethodDecl *Method, - bool IsInstance) { - ObjCInterfaceDecl *ID = dyn_cast(ClassDecl); - if (ID == 0) return; - - while (ObjCInterfaceDecl *SD = ID->getSuperClass()) { - ObjCMethodDecl *SuperMethodDecl = - SD->lookupMethod(Method->getSelector(), IsInstance); - if (SuperMethodDecl == 0) { - ID = SD; - continue; - } - ObjCMethodDecl::param_iterator ParamI = Method->param_begin(), - E = Method->param_end(); - ObjCMethodDecl::param_iterator PrevI = SuperMethodDecl->param_begin(); - for (; ParamI != E; ++ParamI, ++PrevI) { - // Number of parameters are the same and is guaranteed by selector match. - assert(PrevI != SuperMethodDecl->param_end() && "Param mismatch"); - QualType T1 = Context.getCanonicalType((*ParamI)->getType()); - QualType T2 = Context.getCanonicalType((*PrevI)->getType()); - // If type of argument of method in this class does not match its - // respective argument type in the super class method, issue warning; - if (!Context.typesAreCompatible(T1, T2)) { - Diag((*ParamI)->getLocation(), diag::ext_typecheck_base_super) - << T1 << T2; - Diag(SuperMethodDecl->getLocation(), diag::note_previous_declaration); - return; - } - } - ID = SD; - } -} - /// DiagnoseDuplicateIvars - /// Check for duplicate ivars in the entire class at the start of -/// @implementation. This becomes necesssary because class extension can +/// \@implementation. This becomes necesssary because class extension can /// add ivars to a class in random order which will not be known until -/// class's @implementation is seen. +/// class's \@implementation is seen. void Sema::DiagnoseDuplicateIvars(ObjCInterfaceDecl *ID, ObjCInterfaceDecl *SID) { for (ObjCInterfaceDecl::ivar_iterator IVI = ID->ivar_begin(), IVE = ID->ivar_end(); IVI != IVE; ++IVI) { - ObjCIvarDecl* Ivar = (*IVI); + ObjCIvarDecl* Ivar = *IVI; if (Ivar->isInvalidDecl()) continue; if (IdentifierInfo *II = Ivar->getIdentifier()) { @@ -2273,9 +2259,6 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, InsMap[Method->getSelector()] = Method; /// The following allows us to typecheck messages to "id". AddInstanceMethodToGlobalPool(Method); - // verify that the instance method conforms to the same definition of - // parent methods if it shadows one. - CompareMethodParamsInBaseAndSuper(ClassDecl, Method, true); } } else { /// Check for class method of the same name with incompatible types @@ -2298,11 +2281,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, Diag(PrevMethod->getLocation(), diag::note_previous_declaration); } ClsMap[Method->getSelector()] = Method; - /// The following allows us to typecheck messages to "Class". AddFactoryMethodToGlobalPool(Method); - // verify that the class method conforms to the same definition of - // parent methods if it shadows one. - CompareMethodParamsInBaseAndSuper(ClassDecl, Method, false); } } } @@ -2347,7 +2326,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ClsExtDecl; ClsExtDecl = ClsExtDecl->getNextClassExtension()) { for (ObjCContainerDecl::prop_iterator I = ClsExtDecl->prop_begin(), E = ClsExtDecl->prop_end(); I != E; ++I) { - ObjCPropertyDecl *Property = (*I); + ObjCPropertyDecl *Property = *I; // Skip over properties declared @dynamic if (const ObjCPropertyImplDecl *PIDecl = IC->FindPropertyImplDecl(Property->getIdentifier())) @@ -2399,7 +2378,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, Diag(IDecl->getLocation(), diag::err_objc_root_class_subclass); } - if (LangOpts.ObjCNonFragileABI2) { + if (LangOpts.ObjCRuntime.isNonFragile()) { while (IDecl->getSuperClass()) { DiagnoseDuplicateIvars(IDecl, IDecl->getSuperClass()); IDecl = IDecl->getSuperClass(); @@ -2443,6 +2422,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, Consumer.HandleTopLevelDeclInObjCContainer(DG); } + ActOnDocumentableDecl(ClassDecl); return ClassDecl; } @@ -2488,19 +2468,10 @@ bool containsInvalidMethodImplAttribute(ObjCMethodDecl *IMD, return false; } -namespace { - /// \brief Describes the compatibility of a result type with its method. - enum ResultTypeCompatibilityKind { - RTC_Compatible, - RTC_Incompatible, - RTC_Unknown - }; -} - /// \brief Check whether the declared result type of the given Objective-C /// method declaration is compatible with the method's class. /// -static ResultTypeCompatibilityKind +static Sema::ResultTypeCompatibilityKind CheckRelatedResultTypeCompatibility(Sema &S, ObjCMethodDecl *Method, ObjCInterfaceDecl *CurrentClass) { QualType ResultType = Method->getResultType(); @@ -2513,27 +2484,27 @@ CheckRelatedResultTypeCompatibility(Sema &S, ObjCMethodDecl *Method, // - it is id or qualified id, or if (ResultObjectType->isObjCIdType() || ResultObjectType->isObjCQualifiedIdType()) - return RTC_Compatible; + return Sema::RTC_Compatible; if (CurrentClass) { if (ObjCInterfaceDecl *ResultClass = ResultObjectType->getInterfaceDecl()) { // - it is the same as the method's class type, or if (declaresSameEntity(CurrentClass, ResultClass)) - return RTC_Compatible; + return Sema::RTC_Compatible; // - it is a superclass of the method's class type if (ResultClass->isSuperClassOf(CurrentClass)) - return RTC_Compatible; + return Sema::RTC_Compatible; } } else { // Any Objective-C pointer type might be acceptable for a protocol // method; we just don't know. - return RTC_Unknown; + return Sema::RTC_Unknown; } } - return RTC_Incompatible; + return Sema::RTC_Incompatible; } namespace { @@ -2543,7 +2514,6 @@ class OverrideSearch { public: Sema &S; ObjCMethodDecl *Method; - llvm::SmallPtrSet Searched; llvm::SmallPtrSet Overridden; bool Recursive; @@ -2572,8 +2542,13 @@ public: // Prevent the search from reaching this container again. This is // important with categories, which override methods from the // interface and each other. - Searched.insert(container); - searchFromContainer(container); + if (ObjCCategoryDecl *Category = dyn_cast(container)) { + searchFromContainer(container); + if (ObjCInterfaceDecl *Interface = Category->getClassInterface()) + searchFromContainer(Interface); + } else { + searchFromContainer(container); + } } typedef llvm::SmallPtrSet::iterator iterator; @@ -2609,7 +2584,7 @@ private: void searchFrom(ObjCCategoryDecl *category) { // A method in a category declaration overrides declarations from // the main class and from protocols the category references. - search(category->getClassInterface()); + // The main class is handled in the constructor. search(category->getReferencedProtocols()); } @@ -2619,10 +2594,12 @@ private: // declaration. if (ObjCCategoryDecl *category = impl->getCategoryDecl()) { search(category); + if (ObjCInterfaceDecl *Interface = category->getClassInterface()) + search(Interface); // Otherwise it overrides declarations from the class. - } else { - search(impl->getClassInterface()); + } else if (ObjCInterfaceDecl *Interface = impl->getClassInterface()) { + search(Interface); } } @@ -2647,7 +2624,8 @@ private: void searchFrom(ObjCImplementationDecl *impl) { // A method in a class implementation overrides declarations from // the class interface. - search(impl->getClassInterface()); + if (ObjCInterfaceDecl *Interface = impl->getClassInterface()) + search(Interface); } @@ -2658,9 +2636,6 @@ private: } void search(ObjCContainerDecl *container) { - // Abort if we've already searched this container. - if (!Searched.insert(container)) return; - // Check for a method in this container which matches this selector. ObjCMethodDecl *meth = container->getMethod(Method->getSelector(), Method->isInstanceMethod()); @@ -2682,6 +2657,68 @@ private: }; } +void Sema::CheckObjCMethodOverrides(ObjCMethodDecl *ObjCMethod, + ObjCInterfaceDecl *CurrentClass, + ResultTypeCompatibilityKind RTC) { + // Search for overridden methods and merge information down from them. + OverrideSearch overrides(*this, ObjCMethod); + // Keep track if the method overrides any method in the class's base classes, + // its protocols, or its categories' protocols; we will keep that info + // in the ObjCMethodDecl. + // For this info, a method in an implementation is not considered as + // overriding the same method in the interface or its categories. + bool hasOverriddenMethodsInBaseOrProtocol = false; + for (OverrideSearch::iterator + i = overrides.begin(), e = overrides.end(); i != e; ++i) { + ObjCMethodDecl *overridden = *i; + + if (isa(overridden->getDeclContext()) || + CurrentClass != overridden->getClassInterface() || + overridden->isOverriding()) + hasOverriddenMethodsInBaseOrProtocol = true; + + // Propagate down the 'related result type' bit from overridden methods. + if (RTC != Sema::RTC_Incompatible && overridden->hasRelatedResultType()) + ObjCMethod->SetRelatedResultType(); + + // Then merge the declarations. + mergeObjCMethodDecls(ObjCMethod, overridden); + + if (ObjCMethod->isImplicit() && overridden->isImplicit()) + continue; // Conflicting properties are detected elsewhere. + + // Check for overriding methods + if (isa(ObjCMethod->getDeclContext()) || + isa(ObjCMethod->getDeclContext())) + CheckConflictingOverridingMethod(ObjCMethod, overridden, + isa(overridden->getDeclContext())); + + if (CurrentClass && overridden->getDeclContext() != CurrentClass && + isa(overridden->getDeclContext()) && + !overridden->isImplicit() /* not meant for properties */) { + ObjCMethodDecl::param_iterator ParamI = ObjCMethod->param_begin(), + E = ObjCMethod->param_end(); + ObjCMethodDecl::param_iterator PrevI = overridden->param_begin(), + PrevE = overridden->param_end(); + for (; ParamI != E && PrevI != PrevE; ++ParamI, ++PrevI) { + assert(PrevI != overridden->param_end() && "Param mismatch"); + QualType T1 = Context.getCanonicalType((*ParamI)->getType()); + QualType T2 = Context.getCanonicalType((*PrevI)->getType()); + // If type of argument of method in this class does not match its + // respective argument type in the super class method, issue warning; + if (!Context.typesAreCompatible(T1, T2)) { + Diag((*ParamI)->getLocation(), diag::ext_typecheck_base_super) + << T1 << T2; + Diag(overridden->getLocation(), diag::note_previous_declaration); + break; + } + } + } + } + + ObjCMethod->setOverriding(hasOverriddenMethodsInBaseOrProtocol); +} + Decl *Sema::ActOnMethodDeclaration( Scope *S, SourceLocation MethodLoc, SourceLocation EndLoc, @@ -2871,32 +2908,14 @@ Decl *Sema::ActOnMethodDeclaration( ResultTypeCompatibilityKind RTC = CheckRelatedResultTypeCompatibility(*this, ObjCMethod, CurrentClass); - // Search for overridden methods and merge information down from them. - OverrideSearch overrides(*this, ObjCMethod); - for (OverrideSearch::iterator - i = overrides.begin(), e = overrides.end(); i != e; ++i) { - ObjCMethodDecl *overridden = *i; - - // Propagate down the 'related result type' bit from overridden methods. - if (RTC != RTC_Incompatible && overridden->hasRelatedResultType()) - ObjCMethod->SetRelatedResultType(); + CheckObjCMethodOverrides(ObjCMethod, CurrentClass, RTC); - // Then merge the declarations. - mergeObjCMethodDecls(ObjCMethod, overridden); - - // Check for overriding methods - if (isa(ObjCMethod->getDeclContext()) || - isa(ObjCMethod->getDeclContext())) - CheckConflictingOverridingMethod(ObjCMethod, overridden, - isa(overridden->getDeclContext())); - } - bool ARCError = false; if (getLangOpts().ObjCAutoRefCount) ARCError = CheckARCMethodDecl(*this, ObjCMethod); // Infer the related result type when possible. - if (!ARCError && RTC == RTC_Compatible && + if (!ARCError && RTC == Sema::RTC_Compatible && !ObjCMethod->hasRelatedResultType() && LangOpts.ObjCInferRelatedResultType) { bool InferRelatedResultType = false; @@ -2927,7 +2946,9 @@ Decl *Sema::ActOnMethodDeclaration( if (InferRelatedResultType) ObjCMethod->SetRelatedResultType(); } - + + ActOnDocumentableDecl(ObjCMethod); + return ObjCMethod; } @@ -2948,7 +2969,7 @@ bool Sema::CheckObjCDeclScope(Decl *D) { return true; } -/// Called whenever @defs(ClassName) is encountered in the source. Inserts the +/// Called whenever \@defs(ClassName) is encountered in the source. Inserts the /// instance variables of ClassName into Decls. void Sema::ActOnDefs(Scope *S, Decl *TagD, SourceLocation DeclStart, IdentifierInfo *ClassName, @@ -2959,7 +2980,7 @@ void Sema::ActOnDefs(Scope *S, Decl *TagD, SourceLocation DeclStart, Diag(DeclStart, diag::err_undef_interface) << ClassName; return; } - if (LangOpts.ObjCNonFragileABI) { + if (LangOpts.ObjCRuntime.isNonFragile()) { Diag(DeclStart, diag::err_atdef_nonfragile_interface); return; } diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp index 14b2434..63bfa9d 100644 --- a/lib/Sema/SemaExceptionSpec.cpp +++ b/lib/Sema/SemaExceptionSpec.cpp @@ -51,7 +51,8 @@ bool Sema::CheckSpecifiedExceptionType(QualType T, const SourceRange &Range) { // C++ 15.4p2: A type denoted in an exception-specification shall not denote // an incomplete type. if (RequireCompleteType(Range.getBegin(), T, - PDiag(diag::err_incomplete_in_exception_spec) << /*direct*/0 << Range)) + diag::err_incomplete_in_exception_spec, + /*direct*/0, Range)) return true; // C++ 15.4p2: A type denoted in an exception-specification shall not denote @@ -71,8 +72,9 @@ bool Sema::CheckSpecifiedExceptionType(QualType T, const SourceRange &Range) { if (T->isRecordType() && T->getAs()->isBeingDefined()) return false; - if (!T->isVoidType() && RequireCompleteType(Range.getBegin(), T, - PDiag(diag::err_incomplete_in_exception_spec) << kind << Range)) + if (!T->isVoidType() && + RequireCompleteType(Range.getBegin(), T, + diag::err_incomplete_in_exception_spec, kind, Range)) return true; return false; @@ -98,20 +100,22 @@ bool Sema::CheckDistantExceptionSpec(QualType T) { const FunctionProtoType * Sema::ResolveExceptionSpec(SourceLocation Loc, const FunctionProtoType *FPT) { - // FIXME: If FD is a special member, we should delay computing its exception - // specification until this point. - if (FPT->getExceptionSpecType() != EST_Uninstantiated) + if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) return FPT; FunctionDecl *SourceDecl = FPT->getExceptionSpecDecl(); const FunctionProtoType *SourceFPT = SourceDecl->getType()->castAs(); - if (SourceFPT->getExceptionSpecType() != EST_Uninstantiated) + // If the exception specification has already been resolved, just return it. + if (!isUnresolvedExceptionSpec(SourceFPT->getExceptionSpecType())) return SourceFPT; - // Instantiate the exception specification now. - InstantiateExceptionSpec(Loc, SourceDecl); + // Compute or instantiate the exception specification now. + if (FPT->getExceptionSpecType() == EST_Unevaluated) + EvaluateImplicitExceptionSpec(Loc, cast(SourceDecl)); + else + InstantiateExceptionSpec(Loc, SourceDecl); return SourceDecl->getType()->castAs(); } @@ -344,8 +348,8 @@ bool Sema::CheckEquivalentExceptionSpec(const PartialDiagnostic &DiagID, ExceptionSpecificationType OldEST = Old->getExceptionSpecType(); ExceptionSpecificationType NewEST = New->getExceptionSpecType(); - assert(OldEST != EST_Delayed && NewEST != EST_Delayed && - OldEST != EST_Uninstantiated && NewEST != EST_Uninstantiated && + assert(!isUnresolvedExceptionSpec(OldEST) && + !isUnresolvedExceptionSpec(NewEST) && "Shouldn't see unknown exception specifications here"); // Shortcut the case where both have no spec. @@ -542,8 +546,8 @@ bool Sema::CheckExceptionSpecSubset( ExceptionSpecificationType SubEST = Subset->getExceptionSpecType(); - assert(SuperEST != EST_Delayed && SubEST != EST_Delayed && - SuperEST != EST_Uninstantiated && SubEST != EST_Uninstantiated && + assert(!isUnresolvedExceptionSpec(SuperEST) && + !isUnresolvedExceptionSpec(SubEST) && "Shouldn't see unknown exception specifications here"); // It does not. If the subset contains everything, we've failed. @@ -806,15 +810,6 @@ static CanThrowResult canCalleeThrow(Sema &S, const Expr *E, if (!FT) return CT_Can; - if (FT->getExceptionSpecType() == EST_Delayed) { - // FIXME: Try to resolve a delayed exception spec in ResolveExceptionSpec. - assert(isa(D) && - "only constructor exception specs can be unknown"); - S.Diag(E->getLocStart(), diag::err_exception_spec_unknown) - << E->getSourceRange(); - return CT_Can; - } - return FT->isNothrow(S.Context) ? CT_Cannot : CT_Can; } @@ -964,7 +959,7 @@ CanThrowResult Sema::canThrow(const Expr *E) { // possibility. case Expr::ObjCArrayLiteralClass: case Expr::ObjCDictionaryLiteralClass: - case Expr::ObjCNumericLiteralClass: + case Expr::ObjCBoxedExprClass: return CT_Can; // Many other things have subexpressions, so we have to test those. diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index d2e0e6b..6a503ee 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -130,6 +130,77 @@ void Sema::NoteDeletedFunction(FunctionDecl *Decl) { << 1 << Decl->isDeleted(); } +/// \brief Determine whether a FunctionDecl was ever declared with an +/// explicit storage class. +static bool hasAnyExplicitStorageClass(const FunctionDecl *D) { + for (FunctionDecl::redecl_iterator I = D->redecls_begin(), + E = D->redecls_end(); + I != E; ++I) { + if (I->getStorageClassAsWritten() != SC_None) + return true; + } + return false; +} + +/// \brief Check whether we're in an extern inline function and referring to a +/// variable or function with internal linkage (C11 6.7.4p3). +/// +/// This is only a warning because we used to silently accept this code, but +/// in many cases it will not behave correctly. This is not enabled in C++ mode +/// because the restriction language is a bit weaker (C++11 [basic.def.odr]p6) +/// and so while there may still be user mistakes, most of the time we can't +/// prove that there are errors. +static void diagnoseUseOfInternalDeclInInlineFunction(Sema &S, + const NamedDecl *D, + SourceLocation Loc) { + // This is disabled under C++; there are too many ways for this to fire in + // contexts where the warning is a false positive, or where it is technically + // correct but benign. + if (S.getLangOpts().CPlusPlus) + return; + + // Check if this is an inlined function or method. + FunctionDecl *Current = S.getCurFunctionDecl(); + if (!Current) + return; + if (!Current->isInlined()) + return; + if (Current->getLinkage() != ExternalLinkage) + return; + + // Check if the decl has internal linkage. + if (D->getLinkage() != InternalLinkage) + return; + + // Downgrade from ExtWarn to Extension if + // (1) the supposedly external inline function is in the main file, + // and probably won't be included anywhere else. + // (2) the thing we're referencing is a pure function. + // (3) the thing we're referencing is another inline function. + // This last can give us false negatives, but it's better than warning on + // wrappers for simple C library functions. + const FunctionDecl *UsedFn = dyn_cast(D); + bool DowngradeWarning = S.getSourceManager().isFromMainFile(Loc); + if (!DowngradeWarning && UsedFn) + DowngradeWarning = UsedFn->isInlined() || UsedFn->hasAttr(); + + S.Diag(Loc, DowngradeWarning ? diag::ext_internal_in_extern_inline + : diag::warn_internal_in_extern_inline) + << /*IsVar=*/!UsedFn << D; + + // Suggest "static" on the inline function, if possible. + if (!hasAnyExplicitStorageClass(Current)) { + const FunctionDecl *FirstDecl = Current->getCanonicalDecl(); + SourceLocation DeclBegin = FirstDecl->getSourceRange().getBegin(); + S.Diag(DeclBegin, diag::note_convert_inline_to_static) + << Current << FixItHint::CreateInsertion(DeclBegin, "static "); + } + + S.Diag(D->getCanonicalDecl()->getLocation(), + diag::note_internal_decl_declared_here) + << D; +} + /// \brief Determine whether the use of this declaration is valid, and /// emit any corresponding diagnostics. /// @@ -182,6 +253,9 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc, // Warn if this is used but marked unused. if (D->hasAttr()) Diag(Loc, diag::warn_used_but_marked_unused) << D->getDeclName(); + + diagnoseUseOfInternalDeclInInlineFunction(*this, D, Loc); + return false; } @@ -510,8 +584,7 @@ ExprResult Sema::DefaultArgumentPromotion(Expr *E) { // is a prvalue for the temporary. // FIXME: add some way to gate this entire thing for correctness in // potentially potentially evaluated contexts. - if (getLangOpts().CPlusPlus && E->isGLValue() && - ExprEvalContexts.back().Context != Unevaluated) { + if (getLangOpts().CPlusPlus && E->isGLValue() && !isUnevaluatedContext()) { ExprResult Temp = PerformCopyInitialization( InitializedEntity::InitializeTemporary(E->getType()), E->getExprLoc(), @@ -524,9 +597,66 @@ ExprResult Sema::DefaultArgumentPromotion(Expr *E) { return Owned(E); } +/// Determine the degree of POD-ness for an expression. +/// Incomplete types are considered POD, since this check can be performed +/// when we're in an unevaluated context. +Sema::VarArgKind Sema::isValidVarArgType(const QualType &Ty) { + if (Ty->isIncompleteType()) { + if (Ty->isObjCObjectType()) + return VAK_Invalid; + return VAK_Valid; + } + + if (Ty.isCXX98PODType(Context)) + return VAK_Valid; + + // C++0x [expr.call]p7: + // Passing a potentially-evaluated argument of class type (Clause 9) + // having a non-trivial copy constructor, a non-trivial move constructor, + // or a non-trivial destructor, with no corresponding parameter, + // is conditionally-supported with implementation-defined semantics. + if (getLangOpts().CPlusPlus0x && !Ty->isDependentType()) + if (CXXRecordDecl *Record = Ty->getAsCXXRecordDecl()) + if (Record->hasTrivialCopyConstructor() && + Record->hasTrivialMoveConstructor() && + Record->hasTrivialDestructor()) + return VAK_ValidInCXX11; + + if (getLangOpts().ObjCAutoRefCount && Ty->isObjCLifetimeType()) + return VAK_Valid; + return VAK_Invalid; +} + +bool Sema::variadicArgumentPODCheck(const Expr *E, VariadicCallType CT) { + // Don't allow one to pass an Objective-C interface to a vararg. + const QualType & Ty = E->getType(); + + // Complain about passing non-POD types through varargs. + switch (isValidVarArgType(Ty)) { + case VAK_Valid: + break; + case VAK_ValidInCXX11: + DiagRuntimeBehavior(E->getLocStart(), 0, + PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) + << E->getType() << CT); + break; + case VAK_Invalid: { + if (Ty->isObjCObjectType()) + return DiagRuntimeBehavior(E->getLocStart(), 0, + PDiag(diag::err_cannot_pass_objc_interface_to_vararg) + << Ty << CT); + + return DiagRuntimeBehavior(E->getLocStart(), 0, + PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) + << getLangOpts().CPlusPlus0x << Ty << CT); + } + } + // c++ rules are enforced elsewhere. + return false; +} + /// DefaultVariadicArgumentPromotion - Like DefaultArgumentPromotion, but -/// will warn if the resulting type is not a POD type, and rejects ObjC -/// interfaces passed by value. +/// will create a trap if the resulting type is not a POD type. ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, FunctionDecl *FDecl) { if (const BuiltinType *PlaceholderTy = E->getType()->getAsPlaceholderType()) { @@ -550,76 +680,38 @@ ExprResult Sema::DefaultVariadicArgumentPromotion(Expr *E, VariadicCallType CT, return ExprError(); E = ExprRes.take(); - // Don't allow one to pass an Objective-C interface to a vararg. - if (E->getType()->isObjCObjectType() && - DiagRuntimeBehavior(E->getLocStart(), 0, - PDiag(diag::err_cannot_pass_objc_interface_to_vararg) - << E->getType() << CT)) - return ExprError(); - - // Complain about passing non-POD types through varargs. However, don't - // perform this check for incomplete types, which we can get here when we're - // in an unevaluated context. - if (!E->getType()->isIncompleteType() && !E->getType().isPODType(Context)) { - // C++0x [expr.call]p7: - // Passing a potentially-evaluated argument of class type (Clause 9) - // having a non-trivial copy constructor, a non-trivial move constructor, - // or a non-trivial destructor, with no corresponding parameter, - // is conditionally-supported with implementation-defined semantics. - bool TrivialEnough = false; - if (getLangOpts().CPlusPlus0x && !E->getType()->isDependentType()) { - if (CXXRecordDecl *Record = E->getType()->getAsCXXRecordDecl()) { - if (Record->hasTrivialCopyConstructor() && - Record->hasTrivialMoveConstructor() && - Record->hasTrivialDestructor()) { - DiagRuntimeBehavior(E->getLocStart(), 0, - PDiag(diag::warn_cxx98_compat_pass_non_pod_arg_to_vararg) - << E->getType() << CT); - TrivialEnough = true; - } - } - } + // Diagnostics regarding non-POD argument types are + // emitted along with format string checking in Sema::CheckFunctionCall(). + if (isValidVarArgType(E->getType()) == VAK_Invalid) { + // Turn this into a trap. + CXXScopeSpec SS; + SourceLocation TemplateKWLoc; + UnqualifiedId Name; + Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), + E->getLocStart()); + ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, + Name, true, false); + if (TrapFn.isInvalid()) + return ExprError(); - if (!TrivialEnough && - getLangOpts().ObjCAutoRefCount && - E->getType()->isObjCLifetimeType()) - TrivialEnough = true; - - if (TrivialEnough) { - // Nothing to diagnose. This is okay. - } else if (DiagRuntimeBehavior(E->getLocStart(), 0, - PDiag(diag::warn_cannot_pass_non_pod_arg_to_vararg) - << getLangOpts().CPlusPlus0x << E->getType() - << CT)) { - // Turn this into a trap. - CXXScopeSpec SS; - SourceLocation TemplateKWLoc; - UnqualifiedId Name; - Name.setIdentifier(PP.getIdentifierInfo("__builtin_trap"), - E->getLocStart()); - ExprResult TrapFn = ActOnIdExpression(TUScope, SS, TemplateKWLoc, Name, - true, false); - if (TrapFn.isInvalid()) - return ExprError(); + ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(), + E->getLocStart(), MultiExprArg(), + E->getLocEnd()); + if (Call.isInvalid()) + return ExprError(); - ExprResult Call = ActOnCallExpr(TUScope, TrapFn.get(), E->getLocStart(), - MultiExprArg(), E->getLocEnd()); - if (Call.isInvalid()) - return ExprError(); - - ExprResult Comma = ActOnBinOp(TUScope, E->getLocStart(), tok::comma, - Call.get(), E); - if (Comma.isInvalid()) - return ExprError(); - E = Comma.get(); - } + ExprResult Comma = ActOnBinOp(TUScope, E->getLocStart(), tok::comma, + Call.get(), E); + if (Comma.isInvalid()) + return ExprError(); + return Comma.get(); } - // c++ rules are enforced elsewhere. + if (!getLangOpts().CPlusPlus && RequireCompleteType(E->getExprLoc(), E->getType(), diag::err_call_incomplete_argument)) return ExprError(); - + return Owned(E); } @@ -942,6 +1034,10 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, QualType RHSType = Context.getCanonicalType(RHS.get()->getType()).getUnqualifiedType(); + // For conversion purposes, we ignore any atomic qualifier on the LHS. + if (const AtomicType *AtomicLHS = LHSType->getAs()) + LHSType = AtomicLHS->getValueType(); + // If both types are identical, no conversion is needed. if (LHSType == RHSType) return LHSType; @@ -949,7 +1045,7 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, // If either side is a non-arithmetic type (e.g. a pointer), we are done. // The caller can deal with this (e.g. pointer + int). if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) - return LHSType; + return QualType(); // Apply unary and bitfield promotions to the LHS's type. QualType LHSUnpromotedType = LHSType; @@ -1370,7 +1466,8 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, // unqualified lookup. This is useful when (for example) the // original lookup would not have found something because it was a // dependent name. - DeclContext *DC = SS.isEmpty() ? CurContext : 0; + DeclContext *DC = (SS.isEmpty() && !CallsUndergoingInstantiation.empty()) + ? CurContext : 0; while (DC) { if (isa(DC)) { LookupQualifiedName(R, DC); @@ -1394,42 +1491,44 @@ bool Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R, // Give a code modification hint to insert 'this->'. // TODO: fixit for inserting 'Base::' in the other cases. // Actually quite difficult! + if (getLangOpts().MicrosoftMode) + diagnostic = diag::warn_found_via_dependent_bases_lookup; if (isInstance) { + Diag(R.getNameLoc(), diagnostic) << Name + << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); UnresolvedLookupExpr *ULE = cast( CallsUndergoingInstantiation.back()->getCallee()); - CXXMethodDecl *DepMethod = cast_or_null( - CurMethod->getInstantiatedFromMemberFunction()); - if (DepMethod) { - if (getLangOpts().MicrosoftMode) - diagnostic = diag::warn_found_via_dependent_bases_lookup; - Diag(R.getNameLoc(), diagnostic) << Name - << FixItHint::CreateInsertion(R.getNameLoc(), "this->"); - QualType DepThisType = DepMethod->getThisType(Context); - CheckCXXThisCapture(R.getNameLoc()); - CXXThisExpr *DepThis = new (Context) CXXThisExpr( - R.getNameLoc(), DepThisType, false); - TemplateArgumentListInfo TList; - if (ULE->hasExplicitTemplateArgs()) - ULE->copyTemplateArgumentsInto(TList); - - CXXScopeSpec SS; - SS.Adopt(ULE->getQualifierLoc()); - CXXDependentScopeMemberExpr *DepExpr = - CXXDependentScopeMemberExpr::Create( - Context, DepThis, DepThisType, true, SourceLocation(), - SS.getWithLocInContext(Context), - ULE->getTemplateKeywordLoc(), 0, - R.getLookupNameInfo(), - ULE->hasExplicitTemplateArgs() ? &TList : 0); - CallsUndergoingInstantiation.back()->setCallee(DepExpr); - } else { - // FIXME: we should be able to handle this case too. It is correct - // to add this-> here. This is a workaround for PR7947. - Diag(R.getNameLoc(), diagnostic) << Name; - } + + + CXXMethodDecl *DepMethod; + if (CurMethod->getTemplatedKind() == + FunctionDecl::TK_FunctionTemplateSpecialization) + DepMethod = cast(CurMethod->getPrimaryTemplate()-> + getInstantiatedFromMemberTemplate()->getTemplatedDecl()); + else + DepMethod = cast( + CurMethod->getInstantiatedFromMemberFunction()); + assert(DepMethod && "No template pattern found"); + + QualType DepThisType = DepMethod->getThisType(Context); + CheckCXXThisCapture(R.getNameLoc()); + CXXThisExpr *DepThis = new (Context) CXXThisExpr( + R.getNameLoc(), DepThisType, false); + TemplateArgumentListInfo TList; + if (ULE->hasExplicitTemplateArgs()) + ULE->copyTemplateArgumentsInto(TList); + + CXXScopeSpec SS; + SS.Adopt(ULE->getQualifierLoc()); + CXXDependentScopeMemberExpr *DepExpr = + CXXDependentScopeMemberExpr::Create( + Context, DepThis, DepThisType, true, SourceLocation(), + SS.getWithLocInContext(Context), + ULE->getTemplateKeywordLoc(), 0, + R.getLookupNameInfo(), + ULE->hasExplicitTemplateArgs() ? &TList : 0); + CallsUndergoingInstantiation.back()->setCallee(DepExpr); } else { - if (getLangOpts().MicrosoftMode) - diagnostic = diag::warn_found_via_dependent_bases_lookup; Diag(R.getNameLoc(), diagnostic) << Name; } @@ -1862,6 +1961,10 @@ Sema::LookupInObjCMethod(LookupResult &Lookup, Scope *S, return ExprError(); MarkAnyDeclReferenced(Loc, IV); + + ObjCMethodFamily MF = CurMethod->getMethodFamily(); + if (MF != OMF_init && MF != OMF_dealloc && MF != OMF_finalize) + Diag(Loc, diag::warn_direct_ivar_access) << IV->getDeclName(); return Owned(new (Context) ObjCIvarRefExpr(IV, IV->getType(), Loc, SelfExpr.take(), true, true)); @@ -2303,7 +2406,7 @@ Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, // FIXME: Does the addition of const really only apply in // potentially-evaluated contexts? Since the variable isn't actually // captured in an unevaluated context, it seems that the answer is no. - if (ExprEvalContexts.back().Context != Sema::Unevaluated) { + if (!isUnevaluatedContext()) { QualType CapturedType = getCapturedDeclRefType(cast(VD), Loc); if (!CapturedType.isNull()) type = CapturedType; @@ -2381,6 +2484,7 @@ ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { default: llvm_unreachable("Unknown simple primary expr!"); case tok::kw___func__: IT = PredefinedExpr::Func; break; // [C99 6.4.2.2] case tok::kw___FUNCTION__: IT = PredefinedExpr::Function; break; + case tok::kw_L__FUNCTION__: IT = PredefinedExpr::LFunction; break; case tok::kw___PRETTY_FUNCTION__: IT = PredefinedExpr::PrettyFunction; break; } @@ -2402,7 +2506,10 @@ ExprResult Sema::ActOnPredefinedExpr(SourceLocation Loc, tok::TokenKind Kind) { unsigned Length = PredefinedExpr::ComputeName(IT, currentDecl).length(); llvm::APInt LengthI(32, Length + 1); - ResTy = Context.CharTy.withConst(); + if (IT == PredefinedExpr::LFunction) + ResTy = Context.WCharTy.withConst(); + else + ResTy = Context.CharTy.withConst(); ResTy = Context.getConstantArrayType(ResTy, LengthI, ArrayType::Normal, 0); } return Owned(new (Context) PredefinedExpr(Loc, ResTy, IT)); @@ -2603,7 +2710,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { llvm::APSInt Value(CharBits, CharIsUnsigned); for (unsigned I = 0, N = Literal.getUDSuffixOffset(); I != N; ++I) { Value = ThisTokBegin[I]; - TemplateArgument Arg(Value, Context.CharTy); + TemplateArgument Arg(Context, Value, Context.CharTy); TemplateArgumentLocInfo ArgInfo; ExplicitArgs.addArgument(TemplateArgumentLoc(Arg, ArgInfo)); } @@ -2647,7 +2754,12 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { diag::warn_cxx98_compat_longlong : diag::ext_longlong); // Get the value in the widest-possible width. - llvm::APInt ResultVal(Context.getTargetInfo().getIntMaxTWidth(), 0); + unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth(); + // The microsoft literal suffix extensions support 128-bit literals, which + // may be wider than [u]intmax_t. + if (Literal.isMicrosoftInteger && MaxWidth < 128) + MaxWidth = 128; + llvm::APInt ResultVal(MaxWidth, 0); if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into uintmax_t, warn and force to ull. @@ -2695,7 +2807,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { } } - // Finally, check long long if needed. + // Check long long if needed. if (Ty.isNull()) { unsigned LongLongSize = Context.getTargetInfo().getLongLongWidth(); @@ -2712,6 +2824,16 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { Width = LongLongSize; } } + + // If it doesn't fit in unsigned long long, and we're using Microsoft + // extensions, then its a 128-bit integer literal. + if (Ty.isNull() && Literal.isMicrosoftInteger) { + if (Literal.isUnsigned) + Ty = Context.UnsignedInt128Ty; + else + Ty = Context.Int128Ty; + Width = 128; + } // If we still couldn't decide a type, we probably have something that // does not fit in a signed long long, but has no U suffix. @@ -2783,8 +2905,9 @@ static bool CheckObjCTraitOperandConstraints(Sema &S, QualType T, SourceLocation Loc, SourceRange ArgRange, UnaryExprOrTypeTrait TraitKind) { - // Reject sizeof(interface) and sizeof(interface) in 64-bit mode. - if (S.LangOpts.ObjCNonFragileABI && T->isObjCObjectType()) { + // Reject sizeof(interface) and sizeof(interface) if the + // runtime doesn't allow it. + if (!S.LangOpts.ObjCRuntime.allowsSizeofAlignof() && T->isObjCObjectType()) { S.Diag(Loc, diag::err_sizeof_nonfragile_interface) << T << (TraitKind == UETT_SizeOf) << ArgRange; @@ -2822,9 +2945,8 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(Expr *E, return false; if (RequireCompleteExprType(E, - PDiag(diag::err_sizeof_alignof_incomplete_type) - << ExprKind << E->getSourceRange(), - std::make_pair(SourceLocation(), PDiag(0)))) + diag::err_sizeof_alignof_incomplete_type, + ExprKind, E->getSourceRange())) return true; // Completeing the expression's type may have changed it. @@ -2891,8 +3013,8 @@ bool Sema::CheckUnaryExprOrTypeTraitOperand(QualType ExprType, return false; if (RequireCompleteType(OpLoc, ExprType, - PDiag(diag::err_sizeof_alignof_incomplete_type) - << ExprKind << ExprRange)) + diag::err_sizeof_alignof_incomplete_type, + ExprKind, ExprRange)) return true; if (CheckObjCTraitOperandConstraints(*this, ExprType, OpLoc, ExprRange, @@ -3075,6 +3197,22 @@ Sema::ActOnPostfixUnaryOp(Scope *S, SourceLocation OpLoc, return BuildUnaryOp(S, OpLoc, Opc, Input); } +/// \brief Diagnose if arithmetic on the given ObjC pointer is illegal. +/// +/// \return true on error +static bool checkArithmeticOnObjCPointer(Sema &S, + SourceLocation opLoc, + Expr *op) { + assert(op->getType()->isObjCObjectPointerType()); + if (S.LangOpts.ObjCRuntime.allowsPointerArithmetic()) + return false; + + S.Diag(opLoc, diag::err_arithmetic_nonfragile_interface) + << op->getType()->castAs()->getPointeeType() + << op->getSourceRange(); + return true; +} + ExprResult Sema::ActOnArraySubscriptExpr(Scope *S, Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { @@ -3105,7 +3243,6 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *Base, SourceLocation LLoc, return CreateBuiltinArraySubscriptExpr(Base, LLoc, Idx, RLoc); } - ExprResult Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, Expr *Idx, SourceLocation RLoc) { @@ -3143,13 +3280,21 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, IndexExpr = RHSExp; ResultType = PTy->getPointeeType(); } else if (const ObjCObjectPointerType *PTy = - LHSTy->getAs()) { + LHSTy->getAs()) { BaseExpr = LHSExp; IndexExpr = RHSExp; - Result = BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, 0, 0); - if (!Result.isInvalid()) - return Owned(Result.take()); + + // Use custom logic if this should be the pseudo-object subscript + // expression. + if (!LangOpts.ObjCRuntime.isSubscriptPointerArithmetic()) + return BuildObjCSubscriptExpression(RLoc, BaseExpr, IndexExpr, 0, 0); + ResultType = PTy->getPointeeType(); + if (!LangOpts.ObjCRuntime.allowsPointerArithmetic()) { + Diag(LLoc, diag::err_subscript_nonfragile_interface) + << ResultType << BaseExpr->getSourceRange(); + return ExprError(); + } } else if (const PointerType *PTy = RHSTy->getAs()) { // Handle the uncommon case of "123[Ptr]". BaseExpr = RHSExp; @@ -3161,6 +3306,11 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, BaseExpr = RHSExp; IndexExpr = LHSExp; ResultType = PTy->getPointeeType(); + if (!LangOpts.ObjCRuntime.allowsPointerArithmetic()) { + Diag(LLoc, diag::err_subscript_nonfragile_interface) + << ResultType << BaseExpr->getSourceRange(); + return ExprError(); + } } else if (const VectorType *VTy = LHSTy->getAs()) { BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; @@ -3230,16 +3380,8 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (!ResultType.hasQualifiers()) VK = VK_RValue; } else if (!ResultType->isDependentType() && RequireCompleteType(LLoc, ResultType, - PDiag(diag::err_subscript_incomplete_type) - << BaseExpr->getSourceRange())) - return ExprError(); - - // Diagnose bad cases where we step over interface counts. - if (ResultType->isObjCObjectType() && LangOpts.ObjCNonFragileABI) { - Diag(LLoc, diag::err_subscript_nonfragile_interface) - << ResultType << BaseExpr->getSourceRange(); + diag::err_subscript_incomplete_type, BaseExpr)) return ExprError(); - } assert(VK == VK_RValue || LangOpts.CPlusPlus || !ResultType.isCForbiddenLValueType()); @@ -3263,14 +3405,20 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, if (Param->hasUninstantiatedDefaultArg()) { Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); + EnterExpressionEvaluationContext EvalContext(*this, PotentiallyEvaluated, + Param); + // Instantiate the expression. MultiLevelTemplateArgumentList ArgList = getTemplateInstantiationArgs(FD, 0, /*RelativeToPrimary=*/true); std::pair Innermost = ArgList.getInnermost(); - InstantiatingTemplate Inst(*this, CallLoc, Param, Innermost.first, - Innermost.second); + InstantiatingTemplate Inst(*this, CallLoc, Param, + ArrayRef(Innermost.first, + Innermost.second)); + if (Inst) + return ExprError(); ExprResult Result; { @@ -3299,9 +3447,10 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, if (Result.isInvalid()) return ExprError(); + Expr *Arg = Result.takeAs(); + CheckImplicitConversions(Arg, Param->getOuterLocStart()); // Build the default argument expression. - return Owned(CXXDefaultArgExpr::Create(Context, CallLoc, Param, - Result.takeAs())); + return Owned(CXXDefaultArgExpr::Create(Context, CallLoc, Param, Arg)); } // If the default expression creates temporaries, we need to @@ -3331,6 +3480,25 @@ ExprResult Sema::BuildCXXDefaultArgExpr(SourceLocation CallLoc, return Owned(CXXDefaultArgExpr::Create(Context, CallLoc, Param)); } + +Sema::VariadicCallType +Sema::getVariadicCallType(FunctionDecl *FDecl, const FunctionProtoType *Proto, + Expr *Fn) { + if (Proto && Proto->isVariadic()) { + if (dyn_cast_or_null(FDecl)) + return VariadicConstructor; + else if (Fn && Fn->getType()->isBlockPointerType()) + return VariadicBlock; + else if (FDecl) { + if (CXXMethodDecl *Method = dyn_cast_or_null(FDecl)) + if (Method->isInstance()) + return VariadicMethod; + } + return VariadicFunction; + } + return VariadicDoesNotApply; +} + /// ConvertArgumentsForCall - Converts the arguments specified in /// Args/NumArgs to the parameter types of the function FDecl with /// function prototype Proto. Call is the call expression itself, and @@ -3365,11 +3533,18 @@ Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, // arguments for the remaining parameters), don't make the call. if (NumArgs < NumArgsInProto) { if (NumArgs < MinArgs) { - Diag(RParenLoc, MinArgs == NumArgsInProto - ? diag::err_typecheck_call_too_few_args - : diag::err_typecheck_call_too_few_args_at_least) - << FnKind - << MinArgs << NumArgs << Fn->getSourceRange(); + if (MinArgs == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) + Diag(RParenLoc, MinArgs == NumArgsInProto && !Proto->isVariadic() + ? diag::err_typecheck_call_too_few_args_one + : diag::err_typecheck_call_too_few_args_at_least_one) + << FnKind + << FDecl->getParamDecl(0) << Fn->getSourceRange(); + else + Diag(RParenLoc, MinArgs == NumArgsInProto && !Proto->isVariadic() + ? diag::err_typecheck_call_too_few_args + : diag::err_typecheck_call_too_few_args_at_least) + << FnKind + << MinArgs << NumArgs << Fn->getSourceRange(); // Emit the location of the prototype. if (FDecl && !FDecl->getBuiltinID() && !IsExecConfig) @@ -3385,14 +3560,24 @@ Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, // them. if (NumArgs > NumArgsInProto) { if (!Proto->isVariadic()) { - Diag(Args[NumArgsInProto]->getLocStart(), - MinArgs == NumArgsInProto - ? diag::err_typecheck_call_too_many_args - : diag::err_typecheck_call_too_many_args_at_most) - << FnKind - << NumArgsInProto << NumArgs << Fn->getSourceRange() - << SourceRange(Args[NumArgsInProto]->getLocStart(), - Args[NumArgs-1]->getLocEnd()); + if (NumArgsInProto == 1 && FDecl && FDecl->getParamDecl(0)->getDeclName()) + Diag(Args[NumArgsInProto]->getLocStart(), + MinArgs == NumArgsInProto + ? diag::err_typecheck_call_too_many_args_one + : diag::err_typecheck_call_too_many_args_at_most_one) + << FnKind + << FDecl->getParamDecl(0) << NumArgs << Fn->getSourceRange() + << SourceRange(Args[NumArgsInProto]->getLocStart(), + Args[NumArgs-1]->getLocEnd()); + else + Diag(Args[NumArgsInProto]->getLocStart(), + MinArgs == NumArgsInProto + ? diag::err_typecheck_call_too_many_args + : diag::err_typecheck_call_too_many_args_at_most) + << FnKind + << NumArgsInProto << NumArgs << Fn->getSourceRange() + << SourceRange(Args[NumArgsInProto]->getLocStart(), + Args[NumArgs-1]->getLocEnd()); // Emit the location of the prototype. if (FDecl && !FDecl->getBuiltinID() && !IsExecConfig) @@ -3405,12 +3590,8 @@ Sema::ConvertArgumentsForCall(CallExpr *Call, Expr *Fn, } } SmallVector AllArgs; - VariadicCallType CallType = - Proto->isVariadic() ? VariadicFunction : VariadicDoesNotApply; - if (Fn->getType()->isBlockPointerType()) - CallType = VariadicBlock; // Block - else if (isa(Fn)) - CallType = VariadicMethod; + VariadicCallType CallType = getVariadicCallType(FDecl, Proto, Fn); + Invalid = GatherArgumentsForCall(Call->getLocStart(), FDecl, Proto, 0, Args, NumArgs, AllArgs, CallType); if (Invalid) @@ -3448,8 +3629,7 @@ bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, if (RequireCompleteType(Arg->getLocStart(), ProtoArgType, - PDiag(diag::err_call_incomplete_argument) - << Arg->getSourceRange())) + diag::err_call_incomplete_argument, Arg)) return true; // Pass the argument @@ -3500,7 +3680,6 @@ bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, // If this is a variadic call, handle args passed through "...". if (CallType != VariadicDoesNotApply) { - // Assume that extern "C" functions with variadic arguments that // return __unknown_anytype aren't *really* variadic. if (Proto->getResultType() == Context.UnknownAnyTy && @@ -3763,20 +3942,19 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, // Make the call expr early, before semantic checks. This guarantees cleanup // of arguments and function on error. CallExpr *TheCall; - if (Config) { + if (Config) TheCall = new (Context) CUDAKernelCallExpr(Context, Fn, cast(Config), Args, NumArgs, Context.BoolTy, VK_RValue, RParenLoc); - } else { + else TheCall = new (Context) CallExpr(Context, Fn, Args, NumArgs, Context.BoolTy, VK_RValue, RParenLoc); - } unsigned BuiltinID = (FDecl ? FDecl->getBuiltinID() : 0); @@ -3839,7 +4017,8 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, TheCall->setType(FuncT->getCallResultType(Context)); TheCall->setValueKind(Expr::getValueKindForType(FuncT->getResultType())); - if (const FunctionProtoType *Proto = dyn_cast(FuncT)) { + const FunctionProtoType *Proto = dyn_cast(FuncT); + if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, NumArgs, RParenLoc, IsExecConfig)) return ExprError(); @@ -3851,8 +4030,7 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, // on our knowledge of the function definition. const FunctionDecl *Def = 0; if (FDecl->hasBody(Def) && NumArgs != Def->param_size()) { - const FunctionProtoType *Proto - = Def->getType()->getAs(); + Proto = Def->getType()->getAs(); if (!Proto || !(Proto->isVariadic() && NumArgs >= Def->param_size())) Diag(RParenLoc, diag::warn_call_wrong_number_of_arguments) << (NumArgs > Def->param_size()) << FDecl << Fn->getSourceRange(); @@ -3892,8 +4070,7 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, if (RequireCompleteType(Arg->getLocStart(), Arg->getType(), - PDiag(diag::err_call_incomplete_argument) - << Arg->getSourceRange())) + diag::err_call_incomplete_argument, Arg)) return ExprError(); TheCall->setArg(i, Arg); @@ -3911,13 +4088,13 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, // Do special checking on direct calls to functions. if (FDecl) { - if (CheckFunctionCall(FDecl, TheCall)) + if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(BuiltinID, TheCall); } else if (NDecl) { - if (CheckBlockCall(NDecl, TheCall)) + if (CheckBlockCall(NDecl, TheCall, Proto)) return ExprError(); } @@ -3946,18 +4123,17 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, if (literalType->isArrayType()) { if (RequireCompleteType(LParenLoc, Context.getBaseElementType(literalType), - PDiag(diag::err_illegal_decl_array_incomplete_type) - << SourceRange(LParenLoc, - LiteralExpr->getSourceRange().getEnd()))) + diag::err_illegal_decl_array_incomplete_type, + SourceRange(LParenLoc, + LiteralExpr->getSourceRange().getEnd()))) return ExprError(); if (literalType->isVariableArrayType()) return ExprError(Diag(LParenLoc, diag::err_variable_object_no_init) << SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd())); } else if (!literalType->isDependentType() && RequireCompleteType(LParenLoc, literalType, - PDiag(diag::err_typecheck_decl_incomplete_type) - << SourceRange(LParenLoc, - LiteralExpr->getSourceRange().getEnd()))) + diag::err_typecheck_decl_incomplete_type, + SourceRange(LParenLoc, LiteralExpr->getSourceRange().getEnd()))) return ExprError(); InitializedEntity Entity @@ -4054,11 +4230,6 @@ CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) { // pointers. Everything else should be possible. QualType SrcTy = Src.get()->getType(); - if (const AtomicType *SrcAtomicTy = SrcTy->getAs()) - SrcTy = SrcAtomicTy->getValueType(); - if (const AtomicType *DestAtomicTy = DestTy->getAs()) - DestTy = DestAtomicTy->getValueType(); - if (Context.hasSameUnqualifiedType(SrcTy, DestTy)) return CK_NoOp; @@ -4461,7 +4632,10 @@ bool Sema::DiagnoseConditionalForNull(Expr *LHSExpr, Expr *RHSExpr, if (NullKind == Expr::NPCK_NotNull) return false; - if (NullKind == Expr::NPCK_ZeroInteger) { + if (NullKind == Expr::NPCK_ZeroExpression) + return false; + + if (NullKind == Expr::NPCK_ZeroLiteral) { // In this case, check to make sure that we got here from a "NULL" // string in the source code. NullExpr = NullExpr->IgnoreParenImpCasts(); @@ -5382,21 +5556,19 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, return Compatible; } + // If we have an atomic type, try a non-atomic assignment, then just add an + // atomic qualification step. if (const AtomicType *AtomicTy = dyn_cast(LHSType)) { - if (AtomicTy->getValueType() == RHSType) { - Kind = CK_NonAtomicToAtomic; - return Compatible; - } - } - - if (const AtomicType *AtomicTy = dyn_cast(RHSType)) { - if (AtomicTy->getValueType() == LHSType) { - Kind = CK_AtomicToNonAtomic; - return Compatible; - } + Sema::AssignConvertType result = + CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind); + if (result != Compatible) + return result; + if (Kind != CK_NoOp) + RHS = ImpCastExprToType(RHS.take(), AtomicTy->getValueType(), Kind); + Kind = CK_NonAtomicToAtomic; + return Compatible; } - // If the left-hand side is a reference type, then we are in a // (rare!) case where we've allowed the use of references in C, // e.g., as a parameter type in a built-in function. In this case, @@ -5936,14 +6108,8 @@ QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, return QualType(); - if (!LHS.get()->getType()->isArithmeticType() || - !RHS.get()->getType()->isArithmeticType()) { - if (IsCompAssign && - LHS.get()->getType()->isAtomicType() && - RHS.get()->getType()->isArithmeticType()) - return compType; + if (compType.isNull() || !compType->isArithmeticType()) return InvalidOperands(Loc, LHS, RHS); - } // Check for division by zero. if (IsDiv && @@ -5971,8 +6137,7 @@ QualType Sema::CheckRemainderOperands( if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); - if (!LHS.get()->getType()->isIntegerType() || - !RHS.get()->getType()->isIntegerType()) + if (compType.isNull() || !compType->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); // Check for remainder by zero. @@ -6036,17 +6201,12 @@ static void diagnoseArithmeticOnFunctionPointer(Sema &S, SourceLocation Loc, /// \returns True if pointer has incomplete type static bool checkArithmeticIncompletePointerType(Sema &S, SourceLocation Loc, Expr *Operand) { - if ((Operand->getType()->isPointerType() && - !Operand->getType()->isDependentType()) || - Operand->getType()->isObjCObjectPointerType()) { - QualType PointeeTy = Operand->getType()->getPointeeType(); - if (S.RequireCompleteType( - Loc, PointeeTy, - S.PDiag(diag::err_typecheck_arithmetic_incomplete_type) - << PointeeTy << Operand->getSourceRange())) - return true; - } - return false; + assert(Operand->getType()->isAnyPointerType() && + !Operand->getType()->isDependentType()); + QualType PointeeTy = Operand->getType()->getPointeeType(); + return S.RequireCompleteType(Loc, PointeeTy, + diag::err_typecheck_arithmetic_incomplete_type, + PointeeTy, Operand->getSourceRange()); } /// \brief Check the validity of an arithmetic pointer operand. @@ -6117,26 +6277,14 @@ static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc, return !S.getLangOpts().CPlusPlus; } - if (checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) return false; - if (checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) return false; + if (isLHSPointer && checkArithmeticIncompletePointerType(S, Loc, LHSExpr)) + return false; + if (isRHSPointer && checkArithmeticIncompletePointerType(S, Loc, RHSExpr)) + return false; return true; } -/// \brief Check bad cases where we step over interface counts. -static bool checkArithmethicPointerOnNonFragileABI(Sema &S, - SourceLocation OpLoc, - Expr *Op) { - assert(Op->getType()->isAnyPointerType()); - QualType PointeeTy = Op->getType()->getPointeeType(); - if (!PointeeTy->isObjCObjectType() || !S.LangOpts.ObjCNonFragileABI) - return true; - - S.Diag(OpLoc, diag::err_arithmetic_nonfragile_interface) - << PointeeTy << Op->getSourceRange(); - return false; -} - /// diagnoseStringPlusInt - Emit a warning when adding an integer to a string /// literal. static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc, @@ -6208,25 +6356,31 @@ QualType Sema::CheckAdditionOperands( // C99 6.5.6 diagnoseStringPlusInt(*this, Loc, LHS.get(), RHS.get()); // handle the common case first (both operands are arithmetic). - if (LHS.get()->getType()->isArithmeticType() && - RHS.get()->getType()->isArithmeticType()) { + if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } - if (LHS.get()->getType()->isAtomicType() && - RHS.get()->getType()->isArithmeticType()) { - *CompLHSTy = LHS.get()->getType(); - return compType; - } + // Type-checking. Ultimately the pointer's going to be in PExp; + // note that we bias towards the LHS being the pointer. + Expr *PExp = LHS.get(), *IExp = RHS.get(); - // Put any potential pointer into PExp - Expr* PExp = LHS.get(), *IExp = RHS.get(); - if (IExp->getType()->isAnyPointerType()) + bool isObjCPointer; + if (PExp->getType()->isPointerType()) { + isObjCPointer = false; + } else if (PExp->getType()->isObjCObjectPointerType()) { + isObjCPointer = true; + } else { std::swap(PExp, IExp); - - if (!PExp->getType()->isAnyPointerType()) - return InvalidOperands(Loc, LHS, RHS); + if (PExp->getType()->isPointerType()) { + isObjCPointer = false; + } else if (PExp->getType()->isObjCObjectPointerType()) { + isObjCPointer = true; + } else { + return InvalidOperands(Loc, LHS, RHS); + } + } + assert(PExp->getType()->isAnyPointerType()); if (!IExp->getType()->isIntegerType()) return InvalidOperands(Loc, LHS, RHS); @@ -6234,8 +6388,7 @@ QualType Sema::CheckAdditionOperands( // C99 6.5.6 if (!checkArithmeticOpPointerOperand(*this, Loc, PExp)) return QualType(); - // Diagnose bad cases where we step over interface counts. - if (!checkArithmethicPointerOnNonFragileABI(*this, Loc, PExp)) + if (isObjCPointer && checkArithmeticOnObjCPointer(*this, Loc, PExp)) return QualType(); // Check array bounds for pointer arithemtic @@ -6274,24 +6427,18 @@ QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, // Enforce type constraints: C99 6.5.6p3. // Handle the common case first (both operands are arithmetic). - if (LHS.get()->getType()->isArithmeticType() && - RHS.get()->getType()->isArithmeticType()) { + if (!compType.isNull() && compType->isArithmeticType()) { if (CompLHSTy) *CompLHSTy = compType; return compType; } - if (LHS.get()->getType()->isAtomicType() && - RHS.get()->getType()->isArithmeticType()) { - *CompLHSTy = LHS.get()->getType(); - return compType; - } - // Either ptr - int or ptr - ptr. if (LHS.get()->getType()->isAnyPointerType()) { QualType lpointee = LHS.get()->getType()->getPointeeType(); // Diagnose bad cases where we step over interface counts. - if (!checkArithmethicPointerOnNonFragileABI(*this, Loc, LHS.get())) + if (LHS.get()->getType()->isObjCObjectPointerType() && + checkArithmeticOnObjCPointer(*this, Loc, LHS.get())) return QualType(); // The result type of a pointer-int computation is the pointer type. @@ -6560,6 +6707,163 @@ static void diagnoseFunctionPointerToVoidComparison(Sema &S, SourceLocation Loc, << LHS.get()->getSourceRange() << RHS.get()->getSourceRange(); } +static bool isObjCObjectLiteral(ExprResult &E) { + switch (E.get()->getStmtClass()) { + case Stmt::ObjCArrayLiteralClass: + case Stmt::ObjCDictionaryLiteralClass: + case Stmt::ObjCStringLiteralClass: + case Stmt::ObjCBoxedExprClass: + return true; + default: + // Note that ObjCBoolLiteral is NOT an object literal! + return false; + } +} + +static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { + // Get the LHS object's interface type. + QualType Type = LHS->getType(); + QualType InterfaceType; + if (const ObjCObjectPointerType *PTy = Type->getAs()) { + InterfaceType = PTy->getPointeeType(); + if (const ObjCObjectType *iQFaceTy = + InterfaceType->getAsObjCQualifiedInterfaceType()) + InterfaceType = iQFaceTy->getBaseType(); + } else { + // If this is not actually an Objective-C object, bail out. + return false; + } + + // If the RHS isn't an Objective-C object, bail out. + if (!RHS->getType()->isObjCObjectPointerType()) + return false; + + // Try to find the -isEqual: method. + Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); + ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, + InterfaceType, + /*instance=*/true); + if (!Method) { + if (Type->isObjCIdType()) { + // For 'id', just check the global pool. + Method = S.LookupInstanceMethodInGlobalPool(IsEqualSel, SourceRange(), + /*receiverId=*/true, + /*warn=*/false); + } else { + // Check protocols. + Method = S.LookupMethodInQualifiedType(IsEqualSel, + cast(Type), + /*instance=*/true); + } + } + + if (!Method) + return false; + + QualType T = Method->param_begin()[0]->getType(); + if (!T->isObjCObjectPointerType()) + return false; + + QualType R = Method->getResultType(); + if (!R->isScalarType()) + return false; + + return true; +} + +static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc, + ExprResult &LHS, ExprResult &RHS, + BinaryOperator::Opcode Opc){ + Expr *Literal; + Expr *Other; + if (isObjCObjectLiteral(LHS)) { + Literal = LHS.get(); + Other = RHS.get(); + } else { + Literal = RHS.get(); + Other = LHS.get(); + } + + // Don't warn on comparisons against nil. + Other = Other->IgnoreParenCasts(); + if (Other->isNullPointerConstant(S.getASTContext(), + Expr::NPC_ValueDependentIsNotNull)) + return; + + // This should be kept in sync with warn_objc_literal_comparison. + // LK_String should always be last, since it has its own warning flag. + enum { + LK_Array, + LK_Dictionary, + LK_Numeric, + LK_Boxed, + LK_String + } LiteralKind; + + switch (Literal->getStmtClass()) { + case Stmt::ObjCStringLiteralClass: + // "string literal" + LiteralKind = LK_String; + break; + case Stmt::ObjCArrayLiteralClass: + // "array literal" + LiteralKind = LK_Array; + break; + case Stmt::ObjCDictionaryLiteralClass: + // "dictionary literal" + LiteralKind = LK_Dictionary; + break; + case Stmt::ObjCBoxedExprClass: { + Expr *Inner = cast(Literal)->getSubExpr(); + switch (Inner->getStmtClass()) { + case Stmt::IntegerLiteralClass: + case Stmt::FloatingLiteralClass: + case Stmt::CharacterLiteralClass: + case Stmt::ObjCBoolLiteralExprClass: + case Stmt::CXXBoolLiteralExprClass: + // "numeric literal" + LiteralKind = LK_Numeric; + break; + case Stmt::ImplicitCastExprClass: { + CastKind CK = cast(Inner)->getCastKind(); + // Boolean literals can be represented by implicit casts. + if (CK == CK_IntegralToBoolean || CK == CK_IntegralCast) { + LiteralKind = LK_Numeric; + break; + } + // FALLTHROUGH + } + default: + // "boxed expression" + LiteralKind = LK_Boxed; + break; + } + break; + } + default: + llvm_unreachable("Unknown Objective-C object literal kind"); + } + + if (LiteralKind == LK_String) + S.Diag(Loc, diag::warn_objc_string_literal_comparison) + << Literal->getSourceRange(); + else + S.Diag(Loc, diag::warn_objc_literal_comparison) + << LiteralKind << Literal->getSourceRange(); + + if (BinaryOperator::isEqualityOp(Opc) && + hasIsEqualMethod(S, LHS.get(), RHS.get())) { + SourceLocation Start = LHS.get()->getLocStart(); + SourceLocation End = S.PP.getLocForEndOfToken(RHS.get()->getLocEnd()); + SourceRange OpRange(Loc, S.PP.getLocForEndOfToken(Loc)); + + S.Diag(Loc, diag::note_objc_literal_comparison_isequal) + << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![") + << FixItHint::CreateReplacement(OpRange, "isEqual:") + << FixItHint::CreateInsertion(End, "]"); + } +} + // C99 6.5.8, C++ [expr.rel] QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned OpaqueOpc, @@ -6884,6 +7188,9 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, if (!Context.areComparableObjCPointerTypes(LHSType, RHSType)) diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false); + if (isObjCObjectLiteral(LHS) || isObjCObjectLiteral(RHS)) + diagnoseObjCLiteralComparison(*this, Loc, LHS, RHS, Opc); + if (LHSIsNull && !RHSIsNull) LHS = ImpCastExprToType(LHS.take(), RHSType, CK_BitCast); else @@ -7037,8 +7344,7 @@ inline QualType Sema::CheckBitwiseOperands( LHS = LHSResult.take(); RHS = RHSResult.take(); - if (LHS.get()->getType()->isIntegralOrUnscopedEnumerationType() && - RHS.get()->getType()->isIntegralOrUnscopedEnumerationType()) + if (!compType.isNull() && compType->isIntegralOrUnscopedEnumerationType()) return compType; return InvalidOperands(Loc, LHS, RHS); } @@ -7251,6 +7557,7 @@ static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { break; case Expr::MLV_ArrayType: + case Expr::MLV_ArrayTemporary: Diag = diag::err_typecheck_array_not_modifiable_lvalue; NeedType = true; break; @@ -7271,8 +7578,7 @@ static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { case Expr::MLV_IncompleteType: case Expr::MLV_IncompleteVoidType: return S.RequireCompleteType(Loc, E->getType(), - S.PDiag(diag::err_typecheck_incomplete_type_not_modifiable_lvalue) - << E->getSourceRange()); + diag::err_typecheck_incomplete_type_not_modifiable_lvalue, E); case Expr::MLV_DuplicateVectorComponents: Diag = diag::err_typecheck_duplicate_vector_components_not_mlvalue; break; @@ -7297,7 +7603,27 @@ static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { return true; } +static void CheckIdentityFieldAssignment(Expr *LHSExpr, Expr *RHSExpr, + SourceLocation Loc, + Sema &Sema) { + // C / C++ fields + MemberExpr *ML = dyn_cast(LHSExpr); + MemberExpr *MR = dyn_cast(RHSExpr); + if (ML && MR && ML->getMemberDecl() == MR->getMemberDecl()) { + if (isa(ML->getBase()) && isa(MR->getBase())) + Sema.Diag(Loc, diag::warn_identity_field_assign) << 0; + } + // Objective-C instance variables + ObjCIvarRefExpr *OL = dyn_cast(LHSExpr); + ObjCIvarRefExpr *OR = dyn_cast(RHSExpr); + if (OL && OR && OL->getDecl() == OR->getDecl()) { + DeclRefExpr *RL = dyn_cast(OL->getBase()->IgnoreImpCasts()); + DeclRefExpr *RR = dyn_cast(OR->getBase()->IgnoreImpCasts()); + if (RL && RR && RL->getDecl() == RR->getDecl()) + Sema.Diag(Loc, diag::warn_identity_field_assign) << 1; + } +} // C99 6.5.16.1 QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, @@ -7314,6 +7640,10 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, CompoundType; AssignConvertType ConvTy; if (CompoundType.isNull()) { + Expr *RHSCheck = RHS.get(); + + CheckIdentityFieldAssignment(LHSExpr, RHSCheck, Loc, *this); + QualType LHSTy(LHSType); ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); if (RHS.isInvalid()) @@ -7334,7 +7664,6 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, // If the RHS is a unary plus or minus, check to see if they = and + are // right next to each other. If so, the user may have typo'd "x =+ 4" // instead of "x += 4". - Expr *RHSCheck = RHS.get(); if (ImplicitCastExpr *ICE = dyn_cast(RHSCheck)) RHSCheck = ICE->getSubExpr(); if (UnaryOperator *UO = dyn_cast(RHSCheck)) { @@ -7384,8 +7713,6 @@ QualType Sema::CheckAssignmentOperands(Expr *LHSExpr, ExprResult &RHS, // C99 6.5.17 static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, SourceLocation Loc) { - S.DiagnoseUnusedExprResult(LHS.get()); - LHS = S.CheckPlaceholderExpr(LHS.take()); RHS = S.CheckPlaceholderExpr(RHS.take()); if (LHS.isInvalid() || RHS.isInvalid()) @@ -7401,6 +7728,8 @@ static QualType CheckCommaOperands(Sema &S, ExprResult &LHS, ExprResult &RHS, if (LHS.isInvalid()) return QualType(); + S.DiagnoseUnusedExprResult(LHS.get()); + if (!S.getLangOpts().CPlusPlus) { RHS = S.DefaultFunctionArrayLvalueConversion(RHS.take()); if (RHS.isInvalid()) @@ -7441,14 +7770,16 @@ static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op, S.Diag(OpLoc, diag::warn_increment_bool) << Op->getSourceRange(); } else if (ResType->isRealType()) { // OK! - } else if (ResType->isAnyPointerType()) { + } else if (ResType->isPointerType()) { // C99 6.5.2.4p2, 6.5.6p2 if (!checkArithmeticOpPointerOperand(S, OpLoc, Op)) return QualType(); - - // Diagnose bad cases where we step over interface counts. - else if (!checkArithmethicPointerOnNonFragileABI(S, OpLoc, Op)) - return QualType(); + } else if (ResType->isObjCObjectPointerType()) { + // On modern runtimes, ObjC pointer arithmetic is forbidden. + // Otherwise, we just need a complete type. + if (checkArithmeticIncompletePointerType(S, OpLoc, Op) || + checkArithmeticOnObjCPointer(S, OpLoc, Op)) + return QualType(); } else if (ResType->isAnyComplexType()) { // C99 does not support ++/-- on complex types, we allow as an extension. S.Diag(OpLoc, diag::ext_integer_increment_complex) @@ -8064,7 +8395,7 @@ static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc, << DiagRange << BinOp::getOpcodeStr(Opc) << OpStr; SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_silence) << OpStr, - RHSExpr->getSourceRange()); + (isLeftComp ? LHSExpr : RHSExpr)->getSourceRange()); SuggestParentheses(Self, OpLoc, Self.PDiag(diag::note_precedence_bitwise_first) << BinOp::getOpcodeStr(Opc), ParensRange); @@ -8669,8 +9000,7 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, // with an incomplete type would be ill-formed. if (!Dependent && RequireCompleteType(BuiltinLoc, ArgTy, - PDiag(diag::err_offsetof_incomplete_type) - << TypeRange)) + diag::err_offsetof_incomplete_type, TypeRange)) return ExprError(); // offsetof with non-identifier designators (e.g. "offsetof(x, a.b[c])") are a @@ -8743,10 +9073,18 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc, // The macro offsetof accepts a restricted set of type arguments in this // International Standard. type shall be a POD structure or a POD union // (clause 9). + // C++11 [support.types]p4: + // If type is not a standard-layout class (Clause 9), the results are + // undefined. if (CXXRecordDecl *CRD = dyn_cast(RD)) { - if (!CRD->isPOD() && !DidWarnAboutNonPOD && + bool IsSafe = LangOpts.CPlusPlus0x? CRD->isStandardLayout() : CRD->isPOD(); + unsigned DiagID = + LangOpts.CPlusPlus0x? diag::warn_offsetof_non_standardlayout_type + : diag::warn_offsetof_non_pod_type; + + if (!IsSafe && !DidWarnAboutNonPOD && DiagRuntimeBehavior(BuiltinLoc, 0, - PDiag(diag::warn_offsetof_non_pod_type) + PDiag(DiagID) << SourceRange(CompPtr[0].LocStart, OC.LocEnd) << CurrentType)) DidWarnAboutNonPOD = true; @@ -8850,8 +9188,9 @@ ExprResult Sema::ActOnChooseExpr(SourceLocation BuiltinLoc, } else { // The conditional expression is required to be a constant expression. llvm::APSInt condEval(32); - ExprResult CondICE = VerifyIntegerConstantExpression(CondExpr, &condEval, - PDiag(diag::err_typecheck_choose_expr_requires_constant), false); + ExprResult CondICE + = VerifyIntegerConstantExpression(CondExpr, &condEval, + diag::err_typecheck_choose_expr_requires_constant, false); if (CondICE.isInvalid()) return ExprError(); CondExpr = CondICE.take(); @@ -8892,7 +9231,8 @@ void Sema::ActOnBlockStart(SourceLocation CaretLoc, Scope *CurScope) { PushExpressionEvaluationContext(PotentiallyEvaluated); } -void Sema::ActOnBlockArguments(Declarator &ParamInfo, Scope *CurScope) { +void Sema::ActOnBlockArguments(SourceLocation CaretLoc, Declarator &ParamInfo, + Scope *CurScope) { assert(ParamInfo.getIdentifier()==0 && "block-id should have no identifier!"); assert(ParamInfo.getContext() == Declarator::BlockLiteralContext); BlockScopeInfo *CurBlock = getCurBlock(); @@ -8900,6 +9240,18 @@ void Sema::ActOnBlockArguments(Declarator &ParamInfo, Scope *CurScope) { TypeSourceInfo *Sig = GetTypeForDeclarator(ParamInfo, CurScope); QualType T = Sig->getType(); + // FIXME: We should allow unexpanded parameter packs here, but that would, + // in turn, make the block expression contain unexpanded parameter packs. + if (DiagnoseUnexpandedParameterPack(CaretLoc, Sig, UPPC_Block)) { + // Drop the parameters. + FunctionProtoType::ExtProtoInfo EPI; + EPI.HasTrailingReturn = false; + EPI.TypeQuals |= DeclSpec::TQ_const; + T = Context.getFunctionType(Context.DependentTy, /*Args=*/0, /*NumArgs=*/0, + EPI); + Sig = Context.getTrivialTypeSourceInfo(T); + } + // GetTypeForDeclarator always produces a function type for a block // literal signature. Furthermore, it is always a FunctionProtoType // unless the function was written with a typedef. @@ -9038,7 +9390,10 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, PopExpressionEvaluationContext(); BlockScopeInfo *BSI = cast(FunctionScopes.back()); - + + if (BSI->HasImplicitReturnType) + deduceClosureReturnType(*BSI); + PopDeclContext(); QualType RetTy = Context.VoidTy; @@ -9111,7 +9466,12 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, BSI->TheDecl->setBody(cast(Body)); - computeNRVO(Body, getCurBlock()); + // Try to apply the named return value optimization. We have to check again + // if we can do this, though, because blocks keep return statements around + // to deduce an implicit return type. + if (getLangOpts().CPlusPlus && RetTy->isRecordType() && + !BSI->TheDecl->isDependentContext()) + computeNRVO(Body, getCurBlock()); BlockExpr *Result = new (Context) BlockExpr(BSI->TheDecl, BlockTy); const AnalysisBasedWarnings::Policy &WP = AnalysisWarnings.getDefaultPolicy(); @@ -9182,14 +9542,14 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc, if (!TInfo->getType()->isDependentType()) { if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(), - PDiag(diag::err_second_parameter_to_va_arg_incomplete) - << TInfo->getTypeLoc().getSourceRange())) + diag::err_second_parameter_to_va_arg_incomplete, + TInfo->getTypeLoc())) return ExprError(); if (RequireNonAbstractType(TInfo->getTypeLoc().getBeginLoc(), - TInfo->getType(), - PDiag(diag::err_second_parameter_to_va_arg_abstract) - << TInfo->getTypeLoc().getSourceRange())) + TInfo->getType(), + diag::err_second_parameter_to_va_arg_abstract, + TInfo->getTypeLoc())) return ExprError(); if (!TInfo->getType().isPODType(Context)) { @@ -9291,7 +9651,10 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, bool MayHaveFunctionDiff = false; switch (ConvTy) { - case Compatible: return false; + case Compatible: + DiagnoseAssignmentEnum(DstType, SrcType, SrcExpr); + return false; + case PointerToInt: DiagKind = diag::ext_typecheck_convert_pointer_int; ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); @@ -9434,15 +9797,44 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result) { - return VerifyIntegerConstantExpression(E, Result, - PDiag(diag::err_expr_not_ice) << LangOpts.CPlusPlus); + class SimpleICEDiagnoser : public VerifyICEDiagnoser { + public: + virtual void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) { + S.Diag(Loc, diag::err_expr_not_ice) << S.LangOpts.CPlusPlus << SR; + } + } Diagnoser; + + return VerifyIntegerConstantExpression(E, Result, Diagnoser); +} + +ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, + llvm::APSInt *Result, + unsigned DiagID, + bool AllowFold) { + class IDDiagnoser : public VerifyICEDiagnoser { + unsigned DiagID; + + public: + IDDiagnoser(unsigned DiagID) + : VerifyICEDiagnoser(DiagID == 0), DiagID(DiagID) { } + + virtual void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) { + S.Diag(Loc, DiagID) << SR; + } + } Diagnoser(DiagID); + + return VerifyIntegerConstantExpression(E, Result, Diagnoser, AllowFold); +} + +void Sema::VerifyICEDiagnoser::diagnoseFold(Sema &S, SourceLocation Loc, + SourceRange SR) { + S.Diag(Loc, diag::ext_expr_not_ice) << SR << S.LangOpts.CPlusPlus; } ExprResult Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, - const PartialDiagnostic &NotIceDiag, - bool AllowFold, - const PartialDiagnostic &FoldDiag) { + VerifyICEDiagnoser &Diagnoser, + bool AllowFold) { SourceLocation DiagLoc = E->getLocStart(); if (getLangOpts().CPlusPlus0x) { @@ -9452,23 +9844,111 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, // have a single non-explicit conversion function to an integral or // unscoped enumeration type ExprResult Converted; - if (NotIceDiag.getDiagID()) { - Converted = ConvertToIntegralOrEnumerationType( - DiagLoc, E, - PDiag(diag::err_ice_not_integral), - PDiag(diag::err_ice_incomplete_type), - PDiag(diag::err_ice_explicit_conversion), - PDiag(diag::note_ice_conversion_here), - PDiag(diag::err_ice_ambiguous_conversion), - PDiag(diag::note_ice_conversion_here), - PDiag(0), - /*AllowScopedEnumerations*/ false); + if (!Diagnoser.Suppress) { + class CXX11ConvertDiagnoser : public ICEConvertDiagnoser { + public: + CXX11ConvertDiagnoser() : ICEConvertDiagnoser(false, true) { } + + virtual DiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_ice_not_integral) << T; + } + + virtual DiagnosticBuilder diagnoseIncomplete(Sema &S, + SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_ice_incomplete_type) << T; + } + + virtual DiagnosticBuilder diagnoseExplicitConv(Sema &S, + SourceLocation Loc, + QualType T, + QualType ConvTy) { + return S.Diag(Loc, diag::err_ice_explicit_conversion) << T << ConvTy; + } + + virtual DiagnosticBuilder noteExplicitConv(Sema &S, + CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_ice_ambiguous_conversion) << T; + } + + virtual DiagnosticBuilder noteAmbiguous(Sema &S, + CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_ice_conversion_here) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseConversion(Sema &S, + SourceLocation Loc, + QualType T, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + } ConvertDiagnoser; + + Converted = ConvertToIntegralOrEnumerationType(DiagLoc, E, + ConvertDiagnoser, + /*AllowScopedEnumerations*/ false); } else { // The caller wants to silently enquire whether this is an ICE. Don't // produce any diagnostics if it isn't. - Converted = ConvertToIntegralOrEnumerationType( - DiagLoc, E, PDiag(), PDiag(), PDiag(), PDiag(), - PDiag(), PDiag(), PDiag(), false); + class SilentICEConvertDiagnoser : public ICEConvertDiagnoser { + public: + SilentICEConvertDiagnoser() : ICEConvertDiagnoser(true, true) { } + + virtual DiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, + QualType T) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder diagnoseIncomplete(Sema &S, + SourceLocation Loc, + QualType T) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder diagnoseExplicitConv(Sema &S, + SourceLocation Loc, + QualType T, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder noteExplicitConv(Sema &S, + CXXConversionDecl *Conv, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, + QualType T) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder noteAmbiguous(Sema &S, + CXXConversionDecl *Conv, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + + virtual DiagnosticBuilder diagnoseConversion(Sema &S, + SourceLocation Loc, + QualType T, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + } ConvertDiagnoser; + + Converted = ConvertToIntegralOrEnumerationType(DiagLoc, E, + ConvertDiagnoser, false); } if (Converted.isInvalid()) return Converted; @@ -9477,8 +9957,8 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, return ExprError(); } else if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { // An ICE must be of integral or unscoped enumeration type. - if (NotIceDiag.getDiagID()) - Diag(DiagLoc, NotIceDiag) << E->getSourceRange(); + if (!Diagnoser.Suppress) + Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); return ExprError(); } @@ -9518,8 +9998,8 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, } if (!Folded || !AllowFold) { - if (NotIceDiag.getDiagID()) { - Diag(DiagLoc, NotIceDiag) << E->getSourceRange(); + if (!Diagnoser.Suppress) { + Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange()); for (unsigned I = 0, N = Notes.size(); I != N; ++I) Diag(Notes[I].first, Notes[I].second); } @@ -9527,11 +10007,7 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result, return ExprError(); } - if (FoldDiag.getDiagID()) - Diag(DiagLoc, FoldDiag) << E->getSourceRange(); - else - Diag(DiagLoc, diag::ext_expr_not_ice) - << E->getSourceRange() << LangOpts.CPlusPlus; + Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange()); for (unsigned I = 0, N = Notes.size(); I != N; ++I) Diag(Notes[I].first, Notes[I].second); @@ -9569,7 +10045,7 @@ namespace { // Error on DeclRefExprs referring to FieldDecls. ExprResult TransformDeclRefExpr(DeclRefExpr *E) { if (isa(E->getDecl()) && - SemaRef.ExprEvalContexts.back().Context != Sema::Unevaluated) + !SemaRef.isUnevaluatedContext()) return SemaRef.Diag(E->getLocation(), diag::err_invalid_non_static_member_use) << E->getDecl() << E->getSourceRange(); @@ -9774,11 +10250,11 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func) { // FIXME: Is this really right? if (CurContext == Func) return; - // Instantiate the exception specification for any function which is + // Resolve the exception specification for any function which is // used: CodeGen will need it. const FunctionProtoType *FPT = Func->getType()->getAs(); - if (FPT && FPT->getExceptionSpecType() == EST_Uninstantiated) - InstantiateExceptionSpec(Loc, Func); + if (FPT && isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) + ResolveExceptionSpec(Loc, FPT); // Implicit instantiation of function templates and member functions of // class templates. @@ -9891,14 +10367,15 @@ diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, static ExprResult captureInLambda(Sema &S, LambdaScopeInfo *LSI, VarDecl *Var, QualType FieldType, QualType DeclRefType, - SourceLocation Loc) { + SourceLocation Loc, + bool RefersToEnclosingLocal) { CXXRecordDecl *Lambda = LSI->Lambda; // Build the non-static data member. FieldDecl *Field = FieldDecl::Create(S.Context, Lambda, Loc, Loc, 0, FieldType, S.Context.getTrivialTypeSourceInfo(FieldType, Loc), - 0, false, false); + 0, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Lambda->addDecl(Field); @@ -9920,8 +10397,8 @@ static ExprResult captureInLambda(Sema &S, LambdaScopeInfo *LSI, // C++ [expr.prim.labda]p12: // An entity captured by a lambda-expression is odr-used (3.2) in // the scope containing the lambda-expression. - Expr *Ref = new (S.Context) DeclRefExpr(Var, false, DeclRefType, - VK_LValue, Loc); + Expr *Ref = new (S.Context) DeclRefExpr(Var, RefersToEnclosingLocal, + DeclRefType, VK_LValue, Loc); Var->setReferenced(true); Var->setUsed(true); @@ -10264,7 +10741,8 @@ bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation Loc, Expr *CopyExpr = 0; if (BuildAndDiagnose) { ExprResult Result = captureInLambda(*this, LSI, Var, CaptureType, - DeclRefType, Loc); + DeclRefType, Loc, + I == N-1); if (!Result.isInvalid()) CopyExpr = Result.take(); } @@ -10439,6 +10917,23 @@ static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc, } SemaRef.MarkAnyDeclReferenced(Loc, D); + + // If this is a call to a method via a cast, also mark the method in the + // derived class used in case codegen can devirtualize the call. + const MemberExpr *ME = dyn_cast(E); + if (!ME) + return; + CXXMethodDecl *MD = dyn_cast(ME->getMemberDecl()); + if (!MD) + return; + const Expr *Base = ME->getBase(); + const CXXRecordDecl *MostDerivedClassDecl = Base->getBestDynamicClassType(); + if (!MostDerivedClassDecl) + return; + CXXMethodDecl *DM = MD->getCorrespondingMethodInClass(MostDerivedClassDecl); + if (!DM) + return; + SemaRef.MarkAnyDeclReferenced(Loc, DM); } /// \brief Perform reference-marking and odr-use handling for a DeclRefExpr. @@ -10645,18 +11140,30 @@ bool Sema::CheckCallReturnType(QualType ReturnType, SourceLocation Loc, return false; } - PartialDiagnostic Note = - FD ? PDiag(diag::note_function_with_incomplete_return_type_declared_here) - << FD->getDeclName() : PDiag(); - SourceLocation NoteLoc = FD ? FD->getLocation() : SourceLocation(); - - if (RequireCompleteType(Loc, ReturnType, - FD ? - PDiag(diag::err_call_function_incomplete_return) - << CE->getSourceRange() << FD->getDeclName() : - PDiag(diag::err_call_incomplete_return) - << CE->getSourceRange(), - std::make_pair(NoteLoc, Note))) + class CallReturnIncompleteDiagnoser : public TypeDiagnoser { + FunctionDecl *FD; + CallExpr *CE; + + public: + CallReturnIncompleteDiagnoser(FunctionDecl *FD, CallExpr *CE) + : FD(FD), CE(CE) { } + + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) { + if (!FD) { + S.Diag(Loc, diag::err_call_incomplete_return) + << T << CE->getSourceRange(); + return; + } + + S.Diag(Loc, diag::err_call_function_incomplete_return) + << CE->getSourceRange() << FD->getDeclName() << T; + S.Diag(FD->getLocation(), + diag::note_function_with_incomplete_return_type_declared_here) + << FD->getDeclName(); + } + } Diagnoser(FD, CE); + + if (RequireCompleteType(Loc, ReturnType, Diagnoser)) return true; return false; diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index af86cb2..2740259 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements semantic analysis for C++ expressions. -// +/// +/// \file +/// \brief Implements semantic analysis for C++ expressions. +/// //===----------------------------------------------------------------------===// #include "clang/Sema/SemaInternal.h" @@ -332,7 +333,7 @@ ExprResult Sema::BuildCXXTypeId(QualType TypeInfoType, // When typeid is applied to an expression other than an glvalue of a // polymorphic class type [...] [the] expression is an unevaluated // operand. [...] - if (RecordD->isPolymorphic() && E->Classify(Context).isGLValue()) { + if (RecordD->isPolymorphic() && E->isGLValue()) { // The subexpression is potentially evaluated; switch the context // and recheck the subexpression. ExprResult Result = TranformToPotentiallyEvaluated(E); @@ -375,10 +376,20 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, LookupResult R(*this, TypeInfoII, SourceLocation(), LookupTagName); LookupQualifiedName(R, getStdNamespace()); CXXTypeInfoDecl = R.getAsSingle(); + // Microsoft's typeinfo doesn't have type_info in std but in the global + // namespace if _HAS_EXCEPTIONS is defined to 0. See PR13153. + if (!CXXTypeInfoDecl && LangOpts.MicrosoftMode) { + LookupQualifiedName(R, Context.getTranslationUnitDecl()); + CXXTypeInfoDecl = R.getAsSingle(); + } if (!CXXTypeInfoDecl) return ExprError(Diag(OpLoc, diag::err_need_header_before_typeid)); } + if (!getLangOpts().RTTI) { + return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); + } + QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { @@ -584,14 +595,13 @@ ExprResult Sema::CheckCXXThrowOperand(SourceLocation ThrowLoc, Expr *E, } if (!isPointer || !Ty->isVoidType()) { if (RequireCompleteType(ThrowLoc, Ty, - PDiag(isPointer ? diag::err_throw_incomplete_ptr - : diag::err_throw_incomplete) - << E->getSourceRange())) + isPointer? diag::err_throw_incomplete_ptr + : diag::err_throw_incomplete, + E->getSourceRange())) return ExprError(); if (RequireNonAbstractType(ThrowLoc, E->getType(), - PDiag(diag::err_throw_abstract_type) - << E->getSourceRange())) + diag::err_throw_abstract_type, E)) return ExprError(); } @@ -737,7 +747,7 @@ void Sema::CheckCXXThisCapture(SourceLocation Loc, bool Explicit) { FieldDecl *Field = FieldDecl::Create(Context, Lambda, Loc, Loc, 0, ThisTy, Context.getTrivialTypeSourceInfo(ThisTy, Loc), - 0, false, false); + 0, false, ICIS_NoInit); Field->setImplicit(true); Field->setAccess(AS_private); Lambda->addDecl(Field); @@ -839,8 +849,7 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo, if (!Ty->isVoidType() && RequireCompleteType(TyBeginLoc, ElemTy, - PDiag(diag::err_invalid_incomplete_type_use) - << FullRange)) + diag::err_invalid_incomplete_type_use, FullRange)) return ExprError(); if (RequireNonAbstractType(TyBeginLoc, Ty, @@ -932,7 +941,7 @@ static bool doesUsualArrayDeleteWantSize(Sema &S, SourceLocation loc, } /// \brief Parsed a C++ 'new' expression (C++ 5.3.4). - +/// /// E.g.: /// @code new (memory) int[size][4] @endcode /// or @@ -945,10 +954,8 @@ static bool doesUsualArrayDeleteWantSize(Sema &S, SourceLocation loc, /// \param PlacementRParen Closing paren of the placement arguments. /// \param TypeIdParens If the type is in parens, the source range. /// \param D The type to be allocated, as well as array dimensions. -/// \param ConstructorLParen Opening paren of the constructor args, empty if -/// initializer-list syntax is used. -/// \param ConstructorArgs Constructor/initialization arguments. -/// \param ConstructorRParen Closing paren of the constructor args. +/// \param Initializer The initializing expression or initializer-list, or null +/// if there is none. ExprResult Sema::ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal, SourceLocation PlacementLParen, MultiExprArg PlacementArgs, @@ -960,7 +967,7 @@ Sema::ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal, // If the specified type is an array, unwrap it and save the expression. if (D.getNumTypeObjects() > 0 && D.getTypeObject(0).Kind == DeclaratorChunk::Array) { - DeclaratorChunk &Chunk = D.getTypeObject(0); + DeclaratorChunk &Chunk = D.getTypeObject(0); if (TypeContainsAuto) return ExprError(Diag(Chunk.Loc, diag::err_new_array_of_auto) << D.getSourceRange()); @@ -984,8 +991,10 @@ Sema::ActOnCXXNew(SourceLocation StartLoc, bool UseGlobal, DeclaratorChunk::ArrayTypeInfo &Array = D.getTypeObject(I).Arr; if (Expr *NumElts = (Expr *)Array.NumElts) { if (!NumElts->isTypeDependent() && !NumElts->isValueDependent()) { - Array.NumElts = VerifyIntegerConstantExpression(NumElts, 0, - PDiag(diag::err_new_array_nonconst)).take(); + Array.NumElts + = VerifyIntegerConstantExpression(NumElts, 0, + diag::err_new_array_nonconst) + .take(); if (!Array.NumElts) return ExprError(); } @@ -1084,8 +1093,10 @@ Sema::BuildCXXNew(SourceLocation StartLoc, bool UseGlobal, } } - // C++0x [decl.spec.auto]p6. Deduce the type which 'auto' stands in for. - if (TypeMayContainAuto && AllocType->getContainedAutoType()) { + // C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for. + AutoType *AT = 0; + if (TypeMayContainAuto && + (AT = AllocType->getContainedAutoType()) && !AT->isDeduced()) { if (initStyle == CXXNewExpr::NoInit || NumInits == 0) return ExprError(Diag(StartLoc, diag::err_auto_new_requires_ctor_arg) << AllocType << TypeRange); @@ -1101,8 +1112,7 @@ Sema::BuildCXXNew(SourceLocation StartLoc, bool UseGlobal, } Expr *Deduce = Inits[0]; TypeSourceInfo *DeducedType = 0; - if (DeduceAutoType(AllocTypeInfo, Deduce, DeducedType) == - DAR_Failed) + if (DeduceAutoType(AllocTypeInfo, Deduce, DeducedType) == DAR_Failed) return ExprError(Diag(StartLoc, diag::err_auto_new_deduction_failure) << AllocType << Deduce->getType() << TypeRange << Deduce->getSourceRange()); @@ -1150,19 +1160,64 @@ Sema::BuildCXXNew(SourceLocation StartLoc, bool UseGlobal, // enumeration type, or a class type for which a single non-explicit // conversion function to integral or unscoped enumeration type exists. if (ArraySize && !ArraySize->isTypeDependent()) { - ExprResult ConvertedSize = ConvertToIntegralOrEnumerationType( - StartLoc, ArraySize, - PDiag(diag::err_array_size_not_integral) << getLangOpts().CPlusPlus0x, - PDiag(diag::err_array_size_incomplete_type) - << ArraySize->getSourceRange(), - PDiag(diag::err_array_size_explicit_conversion), - PDiag(diag::note_array_size_conversion), - PDiag(diag::err_array_size_ambiguous_conversion), - PDiag(diag::note_array_size_conversion), - PDiag(getLangOpts().CPlusPlus0x ? - diag::warn_cxx98_compat_array_size_conversion : - diag::ext_array_size_conversion), - /*AllowScopedEnumerations*/ false); + class SizeConvertDiagnoser : public ICEConvertDiagnoser { + Expr *ArraySize; + + public: + SizeConvertDiagnoser(Expr *ArraySize) + : ICEConvertDiagnoser(false, false), ArraySize(ArraySize) { } + + virtual DiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_array_size_not_integral) + << S.getLangOpts().CPlusPlus0x << T; + } + + virtual DiagnosticBuilder diagnoseIncomplete(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_array_size_incomplete_type) + << T << ArraySize->getSourceRange(); + } + + virtual DiagnosticBuilder diagnoseExplicitConv(Sema &S, + SourceLocation Loc, + QualType T, + QualType ConvTy) { + return S.Diag(Loc, diag::err_array_size_explicit_conversion) << T << ConvTy; + } + + virtual DiagnosticBuilder noteExplicitConv(Sema &S, + CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_array_size_conversion) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_array_size_ambiguous_conversion) << T; + } + + virtual DiagnosticBuilder noteAmbiguous(Sema &S, CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_array_size_conversion) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseConversion(Sema &S, SourceLocation Loc, + QualType T, + QualType ConvTy) { + return S.Diag(Loc, + S.getLangOpts().CPlusPlus0x + ? diag::warn_cxx98_compat_array_size_conversion + : diag::ext_array_size_conversion) + << T << ConvTy->isEnumeralType() << ConvTy; + } + } SizeDiagnoser(ArraySize); + + ExprResult ConvertedSize + = ConvertToIntegralOrEnumerationType(StartLoc, ArraySize, SizeDiagnoser, + /*AllowScopedEnumerations*/ false); if (ConvertedSize.isInvalid()) return ExprError(); @@ -1401,9 +1456,7 @@ bool Sema::CheckAllocatedType(QualType AllocType, SourceLocation Loc, return Diag(Loc, diag::err_bad_new_type) << AllocType << 1 << R; else if (!AllocType->isDependentType() && - RequireCompleteType(Loc, AllocType, - PDiag(diag::err_new_incomplete_type) - << R)) + RequireCompleteType(Loc, AllocType, diag::err_new_incomplete_type,R)) return true; else if (RequireNonAbstractType(Loc, AllocType, diag::err_allocation_of_abstract_type)) @@ -2014,7 +2067,7 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, if (const RecordType *Record = Type->getAs()) { if (RequireCompleteType(StartLoc, Type, - PDiag(diag::err_delete_incomplete_class_type))) + diag::err_delete_incomplete_class_type)) return ExprError(); SmallVector ObjectPtrConversions; @@ -2084,8 +2137,7 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, << Type << Ex.get()->getSourceRange()); } else if (!Pointee->isDependentType()) { if (!RequireCompleteType(StartLoc, Pointee, - PDiag(diag::warn_delete_incomplete) - << Ex.get()->getSourceRange())) { + diag::warn_delete_incomplete, Ex.get())) { if (const RecordType *RT = PointeeElem->getAs()) PointeeRD = cast(RT->getDecl()); } @@ -2096,9 +2148,6 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, // delete-expression; it is not necessary to cast away the constness // (5.2.11) of the pointer expression before it is used as the operand // of the delete-expression. ] - if (!Context.hasSameType(Ex.get()->getType(), Context.VoidPtrTy)) - Ex = Owned(ImplicitCastExpr::Create(Context, Context.VoidPtrTy, - CK_BitCast, Ex.take(), 0, VK_RValue)); if (Pointee->isArrayType() && !ArrayForm) { Diag(StartLoc, diag::warn_delete_array_type) @@ -2176,6 +2225,9 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal, DeclareGlobalNewDelete(); DeclContext *TUDecl = Context.getTranslationUnitDecl(); Expr *Arg = Ex.get(); + if (!Context.hasSameType(Arg->getType(), Context.VoidPtrTy)) + Arg = ImplicitCastExpr::Create(Context, Context.VoidPtrTy, + CK_BitCast, Arg, 0, VK_RValue); if (FindAllocationOverload(StartLoc, SourceRange(), DeleteName, &Arg, 1, TUDecl, /*AllowMissing=*/false, OperatorDelete)) @@ -3138,8 +3190,6 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, UnaryTypeTrait UTT, CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT) return false; - if (CPT->getExceptionSpecType() == EST_Delayed) - return false; if (!CPT->isNothrow(Self.Context)) return false; } @@ -3180,8 +3230,6 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, UnaryTypeTrait UTT, CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT) return false; - if (CPT->getExceptionSpecType() == EST_Delayed) - return false; // FIXME: check whether evaluating default arguments can throw. // For now, we'll be conservative and assume that they can throw. if (!CPT->isNothrow(Self.Context) || CPT->getNumArgs() > 1) @@ -3218,8 +3266,6 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, UnaryTypeTrait UTT, CPT = Self.ResolveExceptionSpec(KeyLoc, CPT); if (!CPT) return false; - if (CPT->getExceptionSpecType() == EST_Delayed) - return false; // TODO: check whether evaluating default arguments can throw. // For now, we'll be conservative and assume that they can throw. return CPT->isNothrow(Self.Context) && CPT->getNumArgs() == 0; @@ -3284,6 +3330,25 @@ ExprResult Sema::ActOnBinaryTypeTrait(BinaryTypeTrait BTT, return BuildBinaryTypeTrait(BTT, KWLoc, LhsTSInfo, RhsTSInfo, RParen); } +/// \brief Determine whether T has a non-trivial Objective-C lifetime in +/// ARC mode. +static bool hasNontrivialObjCLifetime(QualType T) { + switch (T.getObjCLifetime()) { + case Qualifiers::OCL_ExplicitNone: + return false; + + case Qualifiers::OCL_Strong: + case Qualifiers::OCL_Weak: + case Qualifiers::OCL_Autoreleasing: + return true; + + case Qualifiers::OCL_None: + return T->isObjCLifetimeType(); + } + + llvm_unreachable("Unknown ObjC lifetime qualifier"); +} + static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, ArrayRef Args, SourceLocation RParenLoc) { @@ -3357,8 +3422,14 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, ArgExprs.size())); if (Result.isInvalid() || SFINAE.hasErrorOccurred()) return false; - - // The initialization succeeded; not make sure there are no non-trivial + + // Under Objective-C ARC, if the destination has non-trivial Objective-C + // lifetime, this is a non-trivial construction. + if (S.getLangOpts().ObjCAutoRefCount && + hasNontrivialObjCLifetime(Args[0]->getType().getNonReferenceType())) + return false; + + // The initialization succeeded; now make sure there are no non-trivial // calls. return !Result.get()->hasNonTrivialCall(S.Context); } @@ -3471,9 +3542,25 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, BinaryTypeTrait BTT, // We model the initialization as a copy-initialization of a temporary // of the appropriate type, which for this expression is identical to the // return statement (since NRVO doesn't apply). + + // Functions aren't allowed to return function or array types. + if (RhsT->isFunctionType() || RhsT->isArrayType()) + return false; + + // A return statement in a void function must have void type. + if (RhsT->isVoidType()) + return LhsT->isVoidType(); + + // A function definition requires a complete, non-abstract return type. + if (Self.RequireCompleteType(KeyLoc, RhsT, 0) || + Self.RequireNonAbstractType(KeyLoc, RhsT, 0)) + return false; + + // Compute the result of add_rvalue_reference. if (LhsT->isObjectType() || LhsT->isFunctionType()) LhsT = Self.Context.getRValueReferenceType(LhsT); - + + // Build a fake source and destination for initialization. InitializedEntity To(InitializedEntity::InitializeTemporary(RhsT)); OpaqueValueExpr From(KeyLoc, LhsT.getNonLValueExprType(Self.Context), Expr::getValueKindForType(LhsT)); @@ -3539,6 +3626,12 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, BinaryTypeTrait BTT, if (Result.isInvalid() || SFINAE.hasErrorOccurred()) return false; + // Under Objective-C ARC, if the destination has non-trivial Objective-C + // lifetime, this is a non-trivial assignment. + if (Self.getLangOpts().ObjCAutoRefCount && + hasNontrivialObjCLifetime(LhsT.getNonReferenceType())) + return false; + return !Result.get()->hasNonTrivialCall(Self.Context); } } @@ -3615,7 +3708,7 @@ static uint64_t EvaluateArrayTypeTrait(Sema &Self, ArrayTypeTrait ATT, llvm::APSInt Value; uint64_t Dim; if (Self.VerifyIntegerConstantExpression(DimExpr, &Value, - Self.PDiag(diag::err_dimension_expr_not_constant_integer), + diag::err_dimension_expr_not_constant_integer, false).isInvalid()) return 0; if (Value.isSigned() && Value.isNegative()) { @@ -3767,8 +3860,8 @@ QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS, if (!Context.hasSameUnqualifiedType(Class, LHSType)) { // If we want to check the hierarchy, we need a complete type. - if (RequireCompleteType(Loc, LHSType, PDiag(diag::err_bad_memptr_lhs) - << OpSpelling << (int)isIndirect)) { + if (RequireCompleteType(Loc, LHSType, diag::err_bad_memptr_lhs, + OpSpelling, (int)isIndirect)) { return QualType(); } CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true, @@ -4023,13 +4116,14 @@ static bool ConvertForConditional(Sema &Self, ExprResult &E, QualType T) { /// /// See C++ [expr.cond]. Note that LHS is never null, even for the GNU x ?: y /// extension. In this case, LHS == Cond. (But they're not aliases.) -QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, ExprResult &RHS, - ExprValueKind &VK, ExprObjectKind &OK, +QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, + ExprResult &RHS, ExprValueKind &VK, + ExprObjectKind &OK, SourceLocation QuestionLoc) { // FIXME: Handle C99's complex types, vector types, block pointers and Obj-C++ // interface pointers. - // C++0x 5.16p1 + // C++11 [expr.cond]p1 // The first expression is contextually converted to bool. if (!Cond.get()->isTypeDependent()) { ExprResult CondRes = CheckCXXBooleanCondition(Cond.take()); @@ -4046,7 +4140,7 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex if (LHS.get()->isTypeDependent() || RHS.get()->isTypeDependent()) return Context.DependentTy; - // C++0x 5.16p2 + // C++11 [expr.cond]p2 // If either the second or the third operand has type (cv) void, ... QualType LTy = LHS.get()->getType(); QualType RTy = RHS.get()->getType(); @@ -4059,12 +4153,26 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex RHS = DefaultFunctionArrayLvalueConversion(RHS.take()); if (LHS.isInvalid() || RHS.isInvalid()) return QualType(); + + // Finish off the lvalue-to-rvalue conversion by copy-initializing a + // temporary if necessary. DefaultFunctionArrayLvalueConversion doesn't + // do this part for us. + ExprResult &NonVoid = LVoid ? RHS : LHS; + if (NonVoid.get()->getType()->isRecordType() && + NonVoid.get()->isGLValue()) { + InitializedEntity Entity = + InitializedEntity::InitializeTemporary(NonVoid.get()->getType()); + NonVoid = PerformCopyInitialization(Entity, SourceLocation(), NonVoid); + if (NonVoid.isInvalid()) + return QualType(); + } + LTy = LHS.get()->getType(); RTy = RHS.get()->getType(); // ... and one of the following shall hold: // -- The second or the third operand (but not both) is a throw- - // expression; the result is of the type of the other and is an rvalue. + // expression; the result is of the type of the other and is a prvalue. bool LThrow = isa(LHS.get()); bool RThrow = isa(RHS.get()); if (LThrow && !RThrow) @@ -4073,7 +4181,7 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex return LTy; // -- Both the second and third operands have type void; the result is of - // type void and is an rvalue. + // type void and is a prvalue. if (LVoid && RVoid) return Context.VoidTy; @@ -4086,10 +4194,10 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex // Neither is void. - // C++0x 5.16p3 + // C++11 [expr.cond]p3 // Otherwise, if the second and third operand have different types, and - // either has (cv) class type, and attempt is made to convert each of those - // operands to the other. + // either has (cv) class type [...] an attempt is made to convert each of + // those operands to the type of the other. if (!Context.hasSameType(LTy, RTy) && (LTy->isRecordType() || RTy->isRecordType())) { ImplicitConversionSequence ICSLeftToRight, ICSRightToLeft; @@ -4122,7 +4230,31 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex } } - // C++0x 5.16p4 + // C++11 [expr.cond]p3 + // if both are glvalues of the same value category and the same type except + // for cv-qualification, an attempt is made to convert each of those + // operands to the type of the other. + ExprValueKind LVK = LHS.get()->getValueKind(); + ExprValueKind RVK = RHS.get()->getValueKind(); + if (!Context.hasSameType(LTy, RTy) && + Context.hasSameUnqualifiedType(LTy, RTy) && + LVK == RVK && LVK != VK_RValue) { + // Since the unqualified types are reference-related and we require the + // result to be as if a reference bound directly, the only conversion + // we can perform is to add cv-qualifiers. + Qualifiers LCVR = Qualifiers::fromCVRMask(LTy.getCVRQualifiers()); + Qualifiers RCVR = Qualifiers::fromCVRMask(RTy.getCVRQualifiers()); + if (RCVR.isStrictSupersetOf(LCVR)) { + LHS = ImpCastExprToType(LHS.take(), RTy, CK_NoOp, LVK); + LTy = LHS.get()->getType(); + } + else if (LCVR.isStrictSupersetOf(RCVR)) { + RHS = ImpCastExprToType(RHS.take(), LTy, CK_NoOp, RVK); + RTy = RHS.get()->getType(); + } + } + + // C++11 [expr.cond]p4 // If the second and third operands are glvalues of the same value // category and have the same type, the result is of that type and // value category and it is a bit-field if the second or the third @@ -4130,9 +4262,7 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex // We only extend this to bitfields, not to the crazy other kinds of // l-values. bool Same = Context.hasSameType(LTy, RTy); - if (Same && - LHS.get()->isGLValue() && - LHS.get()->getValueKind() == RHS.get()->getValueKind() && + if (Same && LVK == RVK && LVK != VK_RValue && LHS.get()->isOrdinaryOrBitFieldObject() && RHS.get()->isOrdinaryOrBitFieldObject()) { VK = LHS.get()->getValueKind(); @@ -4142,8 +4272,8 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex return LTy; } - // C++0x 5.16p5 - // Otherwise, the result is an rvalue. If the second and third operands + // C++11 [expr.cond]p5 + // Otherwise, the result is a prvalue. If the second and third operands // do not have the same type, and either has (cv) class type, ... if (!Same && (LTy->isRecordType() || RTy->isRecordType())) { // ... overload resolution is used to determine the conversions (if any) @@ -4153,8 +4283,8 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex return QualType(); } - // C++0x 5.16p6 - // LValue-to-rvalue, array-to-pointer, and function-to-pointer standard + // C++11 [expr.cond]p6 + // Lvalue-to-rvalue, array-to-pointer, and function-to-pointer standard // conversions are performed on the second and third operands. LHS = DefaultFunctionArrayLvalueConversion(LHS.take()); RHS = DefaultFunctionArrayLvalueConversion(RHS.take()); @@ -4207,9 +4337,11 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex } // -- The second and third operands have pointer type, or one has pointer - // type and the other is a null pointer constant; pointer conversions - // and qualification conversions are performed to bring them to their - // composite pointer type. The result is of the composite pointer type. + // type and the other is a null pointer constant, or both are null + // pointer constants, at least one of which is non-integral; pointer + // conversions and qualification conversions are performed to bring them + // to their composite pointer type. The result is of the composite + // pointer type. // -- The second and third operands have pointer to member type, or one has // pointer to member type and the other is a null pointer constant; // pointer to member conversions and qualification conversions are @@ -4247,7 +4379,7 @@ QualType Sema::CXXCheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, Ex /// \brief Find a merged pointer type and convert the two expressions to it. /// /// This finds the composite pointer type (or member pointer type) for @p E1 -/// and @p E2 according to C++0x 5.9p2. It converts both expressions to this +/// and @p E2 according to C++11 5.9p2. It converts both expressions to this /// type and returns it. /// It does not emit diagnostics. /// @@ -4267,15 +4399,27 @@ QualType Sema::FindCompositePointerType(SourceLocation Loc, assert(getLangOpts().CPlusPlus && "This function assumes C++"); QualType T1 = E1->getType(), T2 = E2->getType(); - if (!T1->isAnyPointerType() && !T1->isMemberPointerType() && - !T2->isAnyPointerType() && !T2->isMemberPointerType()) - return QualType(); - - // C++0x 5.9p2 + // C++11 5.9p2 // Pointer conversions and qualification conversions are performed on // pointer operands to bring them to their composite pointer type. If // one operand is a null pointer constant, the composite pointer type is - // the type of the other operand. + // std::nullptr_t if the other operand is also a null pointer constant or, + // if the other operand is a pointer, the type of the other operand. + if (!T1->isAnyPointerType() && !T1->isMemberPointerType() && + !T2->isAnyPointerType() && !T2->isMemberPointerType()) { + if (T1->isNullPtrType() && + E2->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { + E2 = ImpCastExprToType(E2, T1, CK_NullToPointer).take(); + return T1; + } + if (T2->isNullPtrType() && + E1->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { + E1 = ImpCastExprToType(E1, T2, CK_NullToPointer).take(); + return T2; + } + return QualType(); + } + if (E1->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull)) { if (T2->isMemberPointerType()) E1 = ImpCastExprToType(E1, T2, CK_NullToMemberPointer).take(); @@ -4522,8 +4666,8 @@ ExprResult Sema::MaybeBindToTemporary(Expr *E) { ObjCMethodDecl *D = 0; if (ObjCMessageExpr *Send = dyn_cast(E)) { D = Send->getMethodDecl(); - } else if (ObjCNumericLiteral *NumLit = dyn_cast(E)) { - D = NumLit->getObjCNumericLiteralMethod(); + } else if (ObjCBoxedExpr *BoxedExpr = dyn_cast(E)) { + D = BoxedExpr->getBoxingMethod(); } else if (ObjCArrayLiteral *ArrayLit = dyn_cast(E)) { D = ArrayLit->getArrayWithObjectsMethod(); } else if (ObjCDictionaryLiteral *DictLit @@ -4706,6 +4850,11 @@ ExprResult Sema::ActOnDecltypeExpression(Expr *E) { // Disable the special decltype handling now. Rec.IsDecltype = false; + // In MS mode, don't perform any extra checking of call return types within a + // decltype expression. + if (getLangOpts().MicrosoftMode) + return Owned(E); + // Perform the semantic checks we delayed until this point. CallExpr *TopCall = dyn_cast(E); for (unsigned I = 0, N = Rec.DelayedDecltypeCalls.size(); I != N; ++I) { @@ -4733,11 +4882,11 @@ ExprResult Sema::ActOnDecltypeExpression(Expr *E) { CXXDestructorDecl *Destructor = LookupDestructor(RD); Temp->setDestructor(Destructor); - MarkFunctionReferenced(E->getExprLoc(), Destructor); - CheckDestructorAccess(E->getExprLoc(), Destructor, + MarkFunctionReferenced(Bind->getExprLoc(), Destructor); + CheckDestructorAccess(Bind->getExprLoc(), Destructor, PDiag(diag::err_access_dtor_temp) - << E->getType()); - DiagnoseUseOfDecl(Destructor, E->getExprLoc()); + << Bind->getType()); + DiagnoseUseOfDecl(Destructor, Bind->getExprLoc()); // We need a cleanup, but we don't need to remember the temporary. ExprNeedsCleanups = true; @@ -4833,8 +4982,7 @@ Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base, SourceLocation OpLoc, // the member function body. if (!BaseType->isDependentType() && !isThisOutsideMemberFunctionBody(BaseType) && - RequireCompleteType(OpLoc, BaseType, - PDiag(diag::err_incomplete_member_access))) + RequireCompleteType(OpLoc, BaseType, diag::err_incomplete_member_access)) return ExprError(); // C++ [basic.lookup.classref]p2: @@ -5222,6 +5370,61 @@ ExprResult Sema::ActOnNoexceptExpr(SourceLocation KeyLoc, SourceLocation, return BuildCXXNoexceptExpr(KeyLoc, Operand, RParen); } +static bool IsSpecialDiscardedValue(Expr *E) { + // In C++11, discarded-value expressions of a certain form are special, + // according to [expr]p10: + // The lvalue-to-rvalue conversion (4.1) is applied only if the + // expression is an lvalue of volatile-qualified type and it has + // one of the following forms: + E = E->IgnoreParens(); + + // - id-expression (5.1.1), + if (isa(E)) + return true; + + // - subscripting (5.2.1), + if (isa(E)) + return true; + + // - class member access (5.2.5), + if (isa(E)) + return true; + + // - indirection (5.3.1), + if (UnaryOperator *UO = dyn_cast(E)) + if (UO->getOpcode() == UO_Deref) + return true; + + if (BinaryOperator *BO = dyn_cast(E)) { + // - pointer-to-member operation (5.5), + if (BO->isPtrMemOp()) + return true; + + // - comma expression (5.18) where the right operand is one of the above. + if (BO->getOpcode() == BO_Comma) + return IsSpecialDiscardedValue(BO->getRHS()); + } + + // - conditional expression (5.16) where both the second and the third + // operands are one of the above, or + if (ConditionalOperator *CO = dyn_cast(E)) + return IsSpecialDiscardedValue(CO->getTrueExpr()) && + IsSpecialDiscardedValue(CO->getFalseExpr()); + // The related edge case of "*x ?: *x". + if (BinaryConditionalOperator *BCO = + dyn_cast(E)) { + if (OpaqueValueExpr *OVE = dyn_cast(BCO->getTrueExpr())) + return IsSpecialDiscardedValue(OVE->getSourceExpr()) && + IsSpecialDiscardedValue(BCO->getFalseExpr()); + } + + // Objective-C++ extensions to the rule. + if (isa(E) || isa(E)) + return true; + + return false; +} + /// Perform the conversions required for an expression used in a /// context that ignores the result. ExprResult Sema::IgnoredValueConversions(Expr *E) { @@ -5246,8 +5449,21 @@ ExprResult Sema::IgnoredValueConversions(Expr *E) { return Owned(E); } - // Otherwise, this rule does not apply in C++, at least not for the moment. - if (getLangOpts().CPlusPlus) return Owned(E); + if (getLangOpts().CPlusPlus) { + // The C++11 standard defines the notion of a discarded-value expression; + // normally, we don't need to do anything to handle it, but if it is a + // volatile lvalue with a special form, we perform an lvalue-to-rvalue + // conversion. + if (getLangOpts().CPlusPlus0x && E->isGLValue() && + E->getType().isVolatileQualified() && + IsSpecialDiscardedValue(E)) { + ExprResult Res = DefaultLvalueConversion(E); + if (Res.isInvalid()) + return Owned(E); + E = Res.take(); + } + return Owned(E); + } // GCC seems to also exclude expressions of incomplete enum type. if (const EnumType *T = E->getType()->getAs()) { @@ -5269,7 +5485,7 @@ ExprResult Sema::IgnoredValueConversions(Expr *E) { return Owned(E); } -ExprResult Sema::ActOnFinishFullExpr(Expr *FE) { +ExprResult Sema::ActOnFinishFullExpr(Expr *FE, SourceLocation CC) { ExprResult FullExpr = Owned(FE); if (!FullExpr.get()) @@ -5295,7 +5511,7 @@ ExprResult Sema::ActOnFinishFullExpr(Expr *FE) { if (FullExpr.isInvalid()) return ExprError(); - CheckImplicitConversions(FullExpr.get(), FullExpr.get()->getExprLoc()); + CheckImplicitConversions(FullExpr.get(), CC); return MaybeCreateExprWithCleanups(FullExpr); } diff --git a/lib/Sema/SemaExprMember.cpp b/lib/Sema/SemaExprMember.cpp index 6c84caa..53f22f6 100644 --- a/lib/Sema/SemaExprMember.cpp +++ b/lib/Sema/SemaExprMember.cpp @@ -115,7 +115,7 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef, NamedDecl *D = *I; if (D->isCXXInstanceMember()) { - if (dyn_cast(D)) + if (dyn_cast(D) || dyn_cast(D)) isField = true; CXXRecordDecl *R = cast(D->getDeclContext()); @@ -436,8 +436,8 @@ Sema::ActOnDependentMemberExpr(Expr *BaseExpr, QualType BaseType, if (PT && (!getLangOpts().ObjC1 || PT->getPointeeType()->isRecordType())) { assert(BaseExpr && "cannot happen with implicit member accesses"); - Diag(NameInfo.getLoc(), diag::err_typecheck_member_reference_struct_union) - << BaseType << BaseExpr->getSourceRange(); + Diag(OpLoc, diag::err_typecheck_member_reference_struct_union) + << BaseType << BaseExpr->getSourceRange() << NameInfo.getSourceRange(); return ExprError(); } } @@ -548,8 +548,8 @@ LookupMemberExprInRecord(Sema &SemaRef, LookupResult &R, RecordDecl *RDecl = RTy->getDecl(); if (!SemaRef.isThisOutsideMemberFunctionBody(QualType(RTy, 0)) && SemaRef.RequireCompleteType(OpLoc, QualType(RTy, 0), - SemaRef.PDiag(diag::err_typecheck_incomplete_tag) - << BaseRange)) + diag::err_typecheck_incomplete_tag, + BaseRange)) return true; if (HasTemplateArgs) { @@ -813,8 +813,9 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, SourceLocation TemplateKWLoc, NamedDecl *FirstQualifierInScope, LookupResult &R, - const TemplateArgumentListInfo *TemplateArgs, - bool SuppressQualifierCheck) { + const TemplateArgumentListInfo *TemplateArgs, + bool SuppressQualifierCheck, + ActOnMemberAccessExtraArgs *ExtraArgs) { QualType BaseType = BaseExprType; if (IsArrow) { assert(BaseType->isPointerType()); @@ -835,6 +836,32 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, ? computeDeclContext(SS, false) : BaseType->getAs()->getDecl()); + if (ExtraArgs) { + ExprResult RetryExpr; + if (!IsArrow && BaseExpr) { + SFINAETrap Trap(*this, true); + ParsedType ObjectType; + bool MayBePseudoDestructor = false; + RetryExpr = ActOnStartCXXMemberReference(getCurScope(), BaseExpr, + OpLoc, tok::arrow, ObjectType, + MayBePseudoDestructor); + if (RetryExpr.isUsable() && !Trap.hasErrorOccurred()) { + CXXScopeSpec TempSS(SS); + RetryExpr = ActOnMemberAccessExpr( + ExtraArgs->S, RetryExpr.get(), OpLoc, tok::arrow, TempSS, + TemplateKWLoc, ExtraArgs->Id, ExtraArgs->ObjCImpDecl, + ExtraArgs->HasTrailingLParen); + } + if (Trap.hasErrorOccurred()) + RetryExpr = ExprError(); + } + if (RetryExpr.isUsable()) { + Diag(OpLoc, diag::err_no_member_overloaded_arrow) + << MemberName << DC << FixItHint::CreateReplacement(OpLoc, "->"); + return RetryExpr; + } + } + Diag(R.getNameLoc(), diag::err_no_member) << MemberName << DC << (BaseExpr ? BaseExpr->getSourceRange() : SourceRange()); @@ -1122,10 +1149,22 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, ObjCImpDecl, HasTemplateArgs); goto fail; } - - if (RequireCompleteType(OpLoc, BaseType, - PDiag(diag::err_typecheck_incomplete_tag) - << BaseExpr.get()->getSourceRange())) + else if (Member && Member->isStr("isa")) { + // If an ivar is (1) the first ivar in a root class and (2) named `isa`, + // then issue the same deprecated warning that id->isa gets. + ObjCInterfaceDecl *ClassDeclared = 0; + if (ObjCIvarDecl *IV = + IDecl->lookupInstanceVariable(Member, ClassDeclared)) { + if (!ClassDeclared->getSuperClass() + && (*ClassDeclared->ivar_begin()) == IV) { + Diag(MemberLoc, diag::warn_objc_isa_use); + Diag(IV->getLocation(), diag::note_ivar_decl); + } + } + } + + if (RequireCompleteType(OpLoc, BaseType, diag::err_typecheck_incomplete_tag, + BaseExpr.get())) return ExprError(); ObjCInterfaceDecl *ClassDeclared = 0; @@ -1211,6 +1250,7 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, << IV->getDeclName(); } } + bool warn = true; if (getLangOpts().ObjCAutoRefCount) { Expr *BaseExp = BaseExpr.get()->IgnoreParenImpCasts(); if (UnaryOperator *UO = dyn_cast(BaseExp)) @@ -1218,10 +1258,20 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, BaseExp = UO->getSubExpr()->IgnoreParenCasts(); if (DeclRefExpr *DE = dyn_cast(BaseExp)) - if (DE->getType().getObjCLifetime() == Qualifiers::OCL_Weak) + if (DE->getType().getObjCLifetime() == Qualifiers::OCL_Weak) { Diag(DE->getLocation(), diag::error_arc_weak_ivar_access); + warn = false; + } + } + if (warn) { + if (ObjCMethodDecl *MD = getCurMethodDecl()) { + ObjCMethodFamily MF = MD->getMethodFamily(); + warn = (MF != OMF_init && MF != OMF_dealloc && + MF != OMF_finalize); + } + if (warn) + Diag(MemberLoc, diag::warn_direct_ivar_access) << IV->getDeclName(); } - return Owned(new (Context) ObjCIvarRefExpr(IV, IV->getType(), MemberLoc, BaseExpr.take(), IsArrow)); @@ -1327,9 +1377,6 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, // methods. Setter = IFace->lookupPrivateMethod(SetterSel, false); } - // Look through local category implementations associated with the class. - if (!Setter) - Setter = IFace->getCategoryClassMethod(SetterSel); if (Setter && DiagnoseUseOfDecl(Setter, MemberLoc)) return ExprError(); @@ -1418,8 +1465,8 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, ObjCImpDecl, HasTemplateArgs); } - Diag(MemberLoc, diag::err_typecheck_member_reference_struct_union) - << BaseType << BaseExpr.get()->getSourceRange(); + Diag(OpLoc, diag::err_typecheck_member_reference_struct_union) + << BaseType << BaseExpr.get()->getSourceRange() << MemberLoc; return ExprError(); } @@ -1434,9 +1481,9 @@ Sema::LookupMemberExpr(LookupResult &R, ExprResult &BaseExpr, /// \param HasTrailingLParen whether the next token is '(', which /// is used to diagnose mis-uses of special members that can /// only be called -/// \param ObjCImpDecl the current ObjC @implementation decl; -/// this is an ugly hack around the fact that ObjC @implementations -/// aren't properly put in the context chain +/// \param ObjCImpDecl the current Objective-C \@implementation +/// decl; this is an ugly hack around the fact that Objective-C +/// \@implementations aren't properly put in the context chain ExprResult Sema::ActOnMemberAccessExpr(Scope *S, Expr *Base, SourceLocation OpLoc, tok::TokenKind OpKind, @@ -1506,9 +1553,11 @@ ExprResult Sema::ActOnMemberAccessExpr(Scope *S, Expr *Base, return move(Result); } + ActOnMemberAccessExtraArgs ExtraArgs = {S, Id, ObjCImpDecl, HasTrailingLParen}; Result = BuildMemberReferenceExpr(Base, Base->getType(), OpLoc, IsArrow, SS, TemplateKWLoc, - FirstQualifierInScope, R, TemplateArgs); + FirstQualifierInScope, R, TemplateArgs, + false, &ExtraArgs); } return move(Result); @@ -1563,6 +1612,8 @@ BuildFieldReferenceExpr(Sema &S, Expr *BaseExpr, bool IsArrow, MemberType = S.Context.getQualifiedType(MemberType, Combined); } + S.UnusedPrivateFields.remove(Field); + ExprResult Base = S.PerformObjectMemberConversion(BaseExpr, SS.getScopeRep(), FoundDecl, Field); diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp index b62d56e..0aabf8b 100644 --- a/lib/Sema/SemaExprObjC.cpp +++ b/lib/Sema/SemaExprObjC.cpp @@ -111,7 +111,7 @@ ExprResult Sema::BuildObjCStringLiteral(SourceLocation AtLoc, StringLiteral *S){ Ty = Context.getObjCIdType(); } } else { - IdentifierInfo *NSIdent = &Context.Idents.get("NSString"); + IdentifierInfo *NSIdent = NSAPIObj->getNSClassId(NSAPI::ClassId_NSString); NamedDecl *IF = LookupSingleName(TUScope, NSIdent, AtLoc, LookupOrdinaryName); if (ObjCInterfaceDecl *StrIF = dyn_cast_or_null(IF)) { @@ -140,20 +140,47 @@ ExprResult Sema::BuildObjCStringLiteral(SourceLocation AtLoc, StringLiteral *S){ return new (Context) ObjCStringLiteral(S, Ty, AtLoc); } +/// \brief Emits an error if the given method does not exist, or if the return +/// type is not an Objective-C object. +static bool validateBoxingMethod(Sema &S, SourceLocation Loc, + const ObjCInterfaceDecl *Class, + Selector Sel, const ObjCMethodDecl *Method) { + if (!Method) { + // FIXME: Is there a better way to avoid quotes than using getName()? + S.Diag(Loc, diag::err_undeclared_boxing_method) << Sel << Class->getName(); + return false; + } + + // Make sure the return type is reasonable. + QualType ReturnType = Method->getResultType(); + if (!ReturnType->isObjCObjectPointerType()) { + S.Diag(Loc, diag::err_objc_literal_method_sig) + << Sel; + S.Diag(Method->getLocation(), diag::note_objc_literal_method_return) + << ReturnType; + return false; + } + + return true; +} + /// \brief Retrieve the NSNumber factory method that should be used to create /// an Objective-C literal for the given type. static ObjCMethodDecl *getNSNumberFactoryMethod(Sema &S, SourceLocation Loc, - QualType T, QualType ReturnType, - SourceRange Range) { + QualType NumberType, + bool isLiteral = false, + SourceRange R = SourceRange()) { llvm::Optional Kind - = S.NSAPIObj->getNSNumberFactoryMethodKind(T); + = S.NSAPIObj->getNSNumberFactoryMethodKind(NumberType); if (!Kind) { - S.Diag(Loc, diag::err_invalid_nsnumber_type) - << T << Range; + if (isLiteral) { + S.Diag(Loc, diag::err_invalid_nsnumber_type) + << NumberType << R; + } return 0; } - + // If we already looked up this method, we're done. if (S.NSNumberLiteralMethods[*Kind]) return S.NSNumberLiteralMethods[*Kind]; @@ -161,39 +188,62 @@ static ObjCMethodDecl *getNSNumberFactoryMethod(Sema &S, SourceLocation Loc, Selector Sel = S.NSAPIObj->getNSNumberLiteralSelector(*Kind, /*Instance=*/false); + ASTContext &CX = S.Context; + + // Look up the NSNumber class, if we haven't done so already. It's cached + // in the Sema instance. + if (!S.NSNumberDecl) { + IdentifierInfo *NSNumberId = + S.NSAPIObj->getNSClassId(NSAPI::ClassId_NSNumber); + NamedDecl *IF = S.LookupSingleName(S.TUScope, NSNumberId, + Loc, Sema::LookupOrdinaryName); + S.NSNumberDecl = dyn_cast_or_null(IF); + if (!S.NSNumberDecl) { + if (S.getLangOpts().DebuggerObjCLiteral) { + // Create a stub definition of NSNumber. + S.NSNumberDecl = ObjCInterfaceDecl::Create(CX, + CX.getTranslationUnitDecl(), + SourceLocation(), NSNumberId, + 0, SourceLocation()); + } else { + // Otherwise, require a declaration of NSNumber. + S.Diag(Loc, diag::err_undeclared_nsnumber); + return 0; + } + } else if (!S.NSNumberDecl->hasDefinition()) { + S.Diag(Loc, diag::err_undeclared_nsnumber); + return 0; + } + + // generate the pointer to NSNumber type. + QualType NSNumberObject = CX.getObjCInterfaceType(S.NSNumberDecl); + S.NSNumberPointer = CX.getObjCObjectPointerType(NSNumberObject); + } + // Look for the appropriate method within NSNumber. - ObjCMethodDecl *Method = S.NSNumberDecl->lookupClassMethod(Sel);; + ObjCMethodDecl *Method = S.NSNumberDecl->lookupClassMethod(Sel); if (!Method && S.getLangOpts().DebuggerObjCLiteral) { + // create a stub definition this NSNumber factory method. TypeSourceInfo *ResultTInfo = 0; - Method = ObjCMethodDecl::Create(S.Context, SourceLocation(), SourceLocation(), Sel, - ReturnType, - ResultTInfo, - S.Context.getTranslationUnitDecl(), - false /*Instance*/, false/*isVariadic*/, - /*isSynthesized=*/false, - /*isImplicitlyDeclared=*/true, /*isDefined=*/false, - ObjCMethodDecl::Required, - false); + Method = ObjCMethodDecl::Create(CX, SourceLocation(), SourceLocation(), Sel, + S.NSNumberPointer, ResultTInfo, + S.NSNumberDecl, + /*isInstance=*/false, /*isVariadic=*/false, + /*isSynthesized=*/false, + /*isImplicitlyDeclared=*/true, + /*isDefined=*/false, + ObjCMethodDecl::Required, + /*HasRelatedResultType=*/false); ParmVarDecl *value = ParmVarDecl::Create(S.Context, Method, SourceLocation(), SourceLocation(), - &S.Context.Idents.get("value"), - T, /*TInfo=*/0, SC_None, SC_None, 0); + &CX.Idents.get("value"), + NumberType, /*TInfo=*/0, SC_None, + SC_None, 0); Method->setMethodParams(S.Context, value, ArrayRef()); } - if (!Method) { - S.Diag(Loc, diag::err_undeclared_nsnumber_method) << Sel; - return 0; - } - - // Make sure the return type is reasonable. - if (!Method->getResultType()->isObjCObjectPointerType()) { - S.Diag(Loc, diag::err_objc_literal_method_sig) - << Sel; - S.Diag(Method->getLocation(), diag::note_objc_literal_method_return) - << Method->getResultType(); + if (!validateBoxingMethod(S, Loc, S.NSNumberDecl, Sel, Method)) return 0; - } // Note: if the parameter type is out-of-line, we'll catch it later in the // implicit conversion. @@ -202,29 +252,9 @@ static ObjCMethodDecl *getNSNumberFactoryMethod(Sema &S, SourceLocation Loc, return Method; } -/// BuildObjCNumericLiteral - builds an ObjCNumericLiteral AST node for the -/// numeric literal expression. Type of the expression will be "NSNumber *" -/// or "id" if NSNumber is unavailable. +/// BuildObjCNumericLiteral - builds an ObjCBoxedExpr AST node for the +/// numeric literal expression. Type of the expression will be "NSNumber *". ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) { - // Look up the NSNumber class, if we haven't done so already. - if (!NSNumberDecl) { - NamedDecl *IF = LookupSingleName(TUScope, - NSAPIObj->getNSClassId(NSAPI::ClassId_NSNumber), - AtLoc, LookupOrdinaryName); - NSNumberDecl = dyn_cast_or_null(IF); - - if (!NSNumberDecl && getLangOpts().DebuggerObjCLiteral) - NSNumberDecl = ObjCInterfaceDecl::Create (Context, - Context.getTranslationUnitDecl(), - SourceLocation(), - NSAPIObj->getNSClassId(NSAPI::ClassId_NSNumber), - 0, SourceLocation()); - if (!NSNumberDecl) { - Diag(AtLoc, diag::err_undeclared_nsnumber); - return ExprError(); - } - } - // Determine the type of the literal. QualType NumberType = Number->getType(); if (CharacterLiteral *Char = dyn_cast(Number)) { @@ -249,29 +279,29 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) { } } - ObjCMethodDecl *Method = 0; // Look for the appropriate method within NSNumber. // Construct the literal. - QualType Ty - = Context.getObjCObjectPointerType( - Context.getObjCInterfaceType(NSNumberDecl)); - Method = getNSNumberFactoryMethod(*this, AtLoc, - NumberType, Ty, - Number->getSourceRange()); - + SourceRange NR(Number->getSourceRange()); + ObjCMethodDecl *Method = getNSNumberFactoryMethod(*this, AtLoc, NumberType, + true, NR); if (!Method) return ExprError(); // Convert the number to the type that the parameter expects. - QualType ElementT = Method->param_begin()[0]->getType(); - ExprResult ConvertedNumber = PerformImplicitConversion(Number, ElementT, - AA_Sending); + ParmVarDecl *ParamDecl = Method->param_begin()[0]; + InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, + ParamDecl); + ExprResult ConvertedNumber = PerformCopyInitialization(Entity, + SourceLocation(), + Owned(Number)); if (ConvertedNumber.isInvalid()) return ExprError(); Number = ConvertedNumber.get(); + // Use the effective source range of the literal, including the leading '@'. return MaybeBindToTemporary( - new (Context) ObjCNumericLiteral(Number, Ty, Method, AtLoc)); + new (Context) ObjCBoxedExpr(Number, NSNumberPointer, Method, + SourceRange(AtLoc, NR.getEnd()))); } ExprResult Sema::ActOnObjCBoolLiteral(SourceLocation AtLoc, @@ -308,9 +338,11 @@ static ExprResult CheckObjCCollectionLiteralElement(Sema &S, Expr *Element, // type. if (S.getLangOpts().CPlusPlus && Element->getType()->isRecordType()) { InitializedEntity Entity - = InitializedEntity::InitializeParameter(S.Context, T, /*Consumed=*/false); + = InitializedEntity::InitializeParameter(S.Context, T, + /*Consumed=*/false); InitializationKind Kind - = InitializationKind::CreateCopy(Element->getLocStart(), SourceLocation()); + = InitializationKind::CreateCopy(Element->getLocStart(), + SourceLocation()); InitializationSequence Seq(S, Entity, Kind, &Element, 1); if (!Seq.Failed()) return Seq.Perform(S, Entity, Kind, MultiExprArg(S, &Element, 1)); @@ -385,26 +417,191 @@ static ExprResult CheckObjCCollectionLiteralElement(Sema &S, Expr *Element, Element->getLocStart(), Element); } +ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) { + if (ValueExpr->isTypeDependent()) { + ObjCBoxedExpr *BoxedExpr = + new (Context) ObjCBoxedExpr(ValueExpr, Context.DependentTy, NULL, SR); + return Owned(BoxedExpr); + } + ObjCMethodDecl *BoxingMethod = NULL; + QualType BoxedType; + // Convert the expression to an RValue, so we can check for pointer types... + ExprResult RValue = DefaultFunctionArrayLvalueConversion(ValueExpr); + if (RValue.isInvalid()) { + return ExprError(); + } + ValueExpr = RValue.get(); + QualType ValueType(ValueExpr->getType()); + if (const PointerType *PT = ValueType->getAs()) { + QualType PointeeType = PT->getPointeeType(); + if (Context.hasSameUnqualifiedType(PointeeType, Context.CharTy)) { + + if (!NSStringDecl) { + IdentifierInfo *NSStringId = + NSAPIObj->getNSClassId(NSAPI::ClassId_NSString); + NamedDecl *Decl = LookupSingleName(TUScope, NSStringId, + SR.getBegin(), LookupOrdinaryName); + NSStringDecl = dyn_cast_or_null(Decl); + if (!NSStringDecl) { + if (getLangOpts().DebuggerObjCLiteral) { + // Support boxed expressions in the debugger w/o NSString declaration. + DeclContext *TU = Context.getTranslationUnitDecl(); + NSStringDecl = ObjCInterfaceDecl::Create(Context, TU, + SourceLocation(), + NSStringId, + 0, SourceLocation()); + } else { + Diag(SR.getBegin(), diag::err_undeclared_nsstring); + return ExprError(); + } + } else if (!NSStringDecl->hasDefinition()) { + Diag(SR.getBegin(), diag::err_undeclared_nsstring); + return ExprError(); + } + assert(NSStringDecl && "NSStringDecl should not be NULL"); + QualType NSStringObject = Context.getObjCInterfaceType(NSStringDecl); + NSStringPointer = Context.getObjCObjectPointerType(NSStringObject); + } + + if (!StringWithUTF8StringMethod) { + IdentifierInfo *II = &Context.Idents.get("stringWithUTF8String"); + Selector stringWithUTF8String = Context.Selectors.getUnarySelector(II); + + // Look for the appropriate method within NSString. + BoxingMethod = NSStringDecl->lookupClassMethod(stringWithUTF8String); + if (!BoxingMethod && getLangOpts().DebuggerObjCLiteral) { + // Debugger needs to work even if NSString hasn't been defined. + TypeSourceInfo *ResultTInfo = 0; + ObjCMethodDecl *M = + ObjCMethodDecl::Create(Context, SourceLocation(), SourceLocation(), + stringWithUTF8String, NSStringPointer, + ResultTInfo, NSStringDecl, + /*isInstance=*/false, /*isVariadic=*/false, + /*isSynthesized=*/false, + /*isImplicitlyDeclared=*/true, + /*isDefined=*/false, + ObjCMethodDecl::Required, + /*HasRelatedResultType=*/false); + QualType ConstCharType = Context.CharTy.withConst(); + ParmVarDecl *value = + ParmVarDecl::Create(Context, M, + SourceLocation(), SourceLocation(), + &Context.Idents.get("value"), + Context.getPointerType(ConstCharType), + /*TInfo=*/0, + SC_None, SC_None, 0); + M->setMethodParams(Context, value, ArrayRef()); + BoxingMethod = M; + } + + if (!validateBoxingMethod(*this, SR.getBegin(), NSStringDecl, + stringWithUTF8String, BoxingMethod)) + return ExprError(); + + StringWithUTF8StringMethod = BoxingMethod; + } + + BoxingMethod = StringWithUTF8StringMethod; + BoxedType = NSStringPointer; + } + } else if (ValueType->isBuiltinType()) { + // The other types we support are numeric, char and BOOL/bool. We could also + // provide limited support for structure types, such as NSRange, NSRect, and + // NSSize. See NSValue (NSValueGeometryExtensions) in + // for more details. + + // Check for a top-level character literal. + if (const CharacterLiteral *Char = + dyn_cast(ValueExpr->IgnoreParens())) { + // In C, character literals have type 'int'. That's not the type we want + // to use to determine the Objective-c literal kind. + switch (Char->getKind()) { + case CharacterLiteral::Ascii: + ValueType = Context.CharTy; + break; + + case CharacterLiteral::Wide: + ValueType = Context.getWCharType(); + break; + + case CharacterLiteral::UTF16: + ValueType = Context.Char16Ty; + break; + + case CharacterLiteral::UTF32: + ValueType = Context.Char32Ty; + break; + } + } + + // FIXME: Do I need to do anything special with BoolTy expressions? + + // Look for the appropriate method within NSNumber. + BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(), ValueType); + BoxedType = NSNumberPointer; + + } else if (const EnumType *ET = ValueType->getAs()) { + if (!ET->getDecl()->isComplete()) { + Diag(SR.getBegin(), diag::err_objc_incomplete_boxed_expression_type) + << ValueType << ValueExpr->getSourceRange(); + return ExprError(); + } + + BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(), + ET->getDecl()->getIntegerType()); + BoxedType = NSNumberPointer; + } + + if (!BoxingMethod) { + Diag(SR.getBegin(), diag::err_objc_illegal_boxed_expression_type) + << ValueType << ValueExpr->getSourceRange(); + return ExprError(); + } + + // Convert the expression to the type that the parameter requires. + ParmVarDecl *ParamDecl = BoxingMethod->param_begin()[0]; + InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, + ParamDecl); + ExprResult ConvertedValueExpr = PerformCopyInitialization(Entity, + SourceLocation(), + Owned(ValueExpr)); + if (ConvertedValueExpr.isInvalid()) + return ExprError(); + ValueExpr = ConvertedValueExpr.get(); + + ObjCBoxedExpr *BoxedExpr = + new (Context) ObjCBoxedExpr(ValueExpr, BoxedType, + BoxingMethod, SR); + return MaybeBindToTemporary(BoxedExpr); +} + +/// Build an ObjC subscript pseudo-object expression, given that +/// that's supported by the runtime. ExprResult Sema::BuildObjCSubscriptExpression(SourceLocation RB, Expr *BaseExpr, Expr *IndexExpr, ObjCMethodDecl *getterMethod, ObjCMethodDecl *setterMethod) { - // Feature support is for modern abi. - if (!LangOpts.ObjCNonFragileABI) - return ExprError(); - // If the expression is type-dependent, there's nothing for us to do. - assert ((!BaseExpr->isTypeDependent() && !IndexExpr->isTypeDependent()) && - "base or index cannot have dependent type here"); + assert(!LangOpts.ObjCRuntime.isSubscriptPointerArithmetic()); + + // We can't get dependent types here; our callers should have + // filtered them out. + assert((!BaseExpr->isTypeDependent() && !IndexExpr->isTypeDependent()) && + "base or index cannot have dependent type here"); + + // Filter out placeholders in the index. In theory, overloads could + // be preserved here, although that might not actually work correctly. ExprResult Result = CheckPlaceholderExpr(IndexExpr); if (Result.isInvalid()) return ExprError(); IndexExpr = Result.get(); - // Perform lvalue-to-rvalue conversion. + // Perform lvalue-to-rvalue conversion on the base. Result = DefaultLvalueConversion(BaseExpr); if (Result.isInvalid()) return ExprError(); BaseExpr = Result.get(); + + // Build the pseudo-object expression. return Owned(ObjCSubscriptRefExpr::Create(Context, BaseExpr, IndexExpr, @@ -440,11 +637,10 @@ ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) { if (!ArrayWithObjectsMethod) { Selector Sel = NSAPIObj->getNSArraySelector(NSAPI::NSArr_arrayWithObjectsCount); - ArrayWithObjectsMethod = NSArrayDecl->lookupClassMethod(Sel); - if (!ArrayWithObjectsMethod && getLangOpts().DebuggerObjCLiteral) { + ObjCMethodDecl *Method = NSArrayDecl->lookupClassMethod(Sel); + if (!Method && getLangOpts().DebuggerObjCLiteral) { TypeSourceInfo *ResultTInfo = 0; - ArrayWithObjectsMethod = - ObjCMethodDecl::Create(Context, + Method = ObjCMethodDecl::Create(Context, SourceLocation(), SourceLocation(), Sel, IdT, ResultTInfo, @@ -455,80 +651,68 @@ ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) { ObjCMethodDecl::Required, false); SmallVector Params; - ParmVarDecl *objects = ParmVarDecl::Create(Context, ArrayWithObjectsMethod, - SourceLocation(), SourceLocation(), - &Context.Idents.get("objects"), - Context.getPointerType(IdT), - /*TInfo=*/0, - SC_None, - SC_None, - 0); + ParmVarDecl *objects = ParmVarDecl::Create(Context, Method, + SourceLocation(), + SourceLocation(), + &Context.Idents.get("objects"), + Context.getPointerType(IdT), + /*TInfo=*/0, SC_None, SC_None, + 0); Params.push_back(objects); - ParmVarDecl *cnt = ParmVarDecl::Create(Context, ArrayWithObjectsMethod, - SourceLocation(), SourceLocation(), - &Context.Idents.get("cnt"), - Context.UnsignedLongTy, - /*TInfo=*/0, - SC_None, - SC_None, - 0); + ParmVarDecl *cnt = ParmVarDecl::Create(Context, Method, + SourceLocation(), + SourceLocation(), + &Context.Idents.get("cnt"), + Context.UnsignedLongTy, + /*TInfo=*/0, SC_None, SC_None, + 0); Params.push_back(cnt); - ArrayWithObjectsMethod->setMethodParams(Context, Params, - ArrayRef()); - - + Method->setMethodParams(Context, Params, ArrayRef()); } - if (!ArrayWithObjectsMethod) { - Diag(SR.getBegin(), diag::err_undeclared_arraywithobjects) << Sel; + if (!validateBoxingMethod(*this, SR.getBegin(), NSArrayDecl, Sel, Method)) + return ExprError(); + + // Dig out the type that all elements should be converted to. + QualType T = Method->param_begin()[0]->getType(); + const PointerType *PtrT = T->getAs(); + if (!PtrT || + !Context.hasSameUnqualifiedType(PtrT->getPointeeType(), IdT)) { + Diag(SR.getBegin(), diag::err_objc_literal_method_sig) + << Sel; + Diag(Method->param_begin()[0]->getLocation(), + diag::note_objc_literal_method_param) + << 0 << T + << Context.getPointerType(IdT.withConst()); return ExprError(); } - } - // Make sure the return type is reasonable. - if (!ArrayWithObjectsMethod->getResultType()->isObjCObjectPointerType()) { - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << ArrayWithObjectsMethod->getSelector(); - Diag(ArrayWithObjectsMethod->getLocation(), - diag::note_objc_literal_method_return) - << ArrayWithObjectsMethod->getResultType(); - return ExprError(); - } + // Check that the 'count' parameter is integral. + if (!Method->param_begin()[1]->getType()->isIntegerType()) { + Diag(SR.getBegin(), diag::err_objc_literal_method_sig) + << Sel; + Diag(Method->param_begin()[1]->getLocation(), + diag::note_objc_literal_method_param) + << 1 + << Method->param_begin()[1]->getType() + << "integral"; + return ExprError(); + } - // Dig out the type that all elements should be converted to. - QualType T = ArrayWithObjectsMethod->param_begin()[0]->getType(); - const PointerType *PtrT = T->getAs(); - if (!PtrT || - !Context.hasSameUnqualifiedType(PtrT->getPointeeType(), IdT)) { - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << ArrayWithObjectsMethod->getSelector(); - Diag(ArrayWithObjectsMethod->param_begin()[0]->getLocation(), - diag::note_objc_literal_method_param) - << 0 << T - << Context.getPointerType(IdT.withConst()); - return ExprError(); - } - T = PtrT->getPointeeType(); - - // Check that the 'count' parameter is integral. - if (!ArrayWithObjectsMethod->param_begin()[1]->getType()->isIntegerType()) { - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << ArrayWithObjectsMethod->getSelector(); - Diag(ArrayWithObjectsMethod->param_begin()[1]->getLocation(), - diag::note_objc_literal_method_param) - << 1 - << ArrayWithObjectsMethod->param_begin()[1]->getType() - << "integral"; - return ExprError(); + // We've found a good +arrayWithObjects:count: method. Save it! + ArrayWithObjectsMethod = Method; } + QualType ObjectsType = ArrayWithObjectsMethod->param_begin()[0]->getType(); + QualType RequiredType = ObjectsType->castAs()->getPointeeType(); + // Check that each of the elements provided is valid in a collection literal, // performing conversions as necessary. Expr **ElementsBuffer = Elements.get(); for (unsigned I = 0, N = Elements.size(); I != N; ++I) { ExprResult Converted = CheckObjCCollectionLiteralElement(*this, ElementsBuffer[I], - T); + RequiredType); if (Converted.isInvalid()) return ExprError(); @@ -573,11 +757,10 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR, QualType IdT = Context.getObjCIdType(); if (!DictionaryWithObjectsMethod) { Selector Sel = NSAPIObj->getNSDictionarySelector( - NSAPI::NSDict_dictionaryWithObjectsForKeysCount); - DictionaryWithObjectsMethod = NSDictionaryDecl->lookupClassMethod(Sel); - if (!DictionaryWithObjectsMethod && getLangOpts().DebuggerObjCLiteral) { - DictionaryWithObjectsMethod = - ObjCMethodDecl::Create(Context, + NSAPI::NSDict_dictionaryWithObjectsForKeysCount); + ObjCMethodDecl *Method = NSDictionaryDecl->lookupClassMethod(Sel); + if (!Method && getLangOpts().DebuggerObjCLiteral) { + Method = ObjCMethodDecl::Create(Context, SourceLocation(), SourceLocation(), Sel, IdT, 0 /*TypeSourceInfo */, @@ -588,117 +771,107 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR, ObjCMethodDecl::Required, false); SmallVector Params; - ParmVarDecl *objects = ParmVarDecl::Create(Context, DictionaryWithObjectsMethod, - SourceLocation(), SourceLocation(), - &Context.Idents.get("objects"), - Context.getPointerType(IdT), - /*TInfo=*/0, - SC_None, - SC_None, - 0); + ParmVarDecl *objects = ParmVarDecl::Create(Context, Method, + SourceLocation(), + SourceLocation(), + &Context.Idents.get("objects"), + Context.getPointerType(IdT), + /*TInfo=*/0, SC_None, SC_None, + 0); Params.push_back(objects); - ParmVarDecl *keys = ParmVarDecl::Create(Context, DictionaryWithObjectsMethod, - SourceLocation(), SourceLocation(), - &Context.Idents.get("keys"), - Context.getPointerType(IdT), - /*TInfo=*/0, - SC_None, - SC_None, - 0); + ParmVarDecl *keys = ParmVarDecl::Create(Context, Method, + SourceLocation(), + SourceLocation(), + &Context.Idents.get("keys"), + Context.getPointerType(IdT), + /*TInfo=*/0, SC_None, SC_None, + 0); Params.push_back(keys); - ParmVarDecl *cnt = ParmVarDecl::Create(Context, DictionaryWithObjectsMethod, - SourceLocation(), SourceLocation(), - &Context.Idents.get("cnt"), - Context.UnsignedLongTy, - /*TInfo=*/0, - SC_None, - SC_None, - 0); + ParmVarDecl *cnt = ParmVarDecl::Create(Context, Method, + SourceLocation(), + SourceLocation(), + &Context.Idents.get("cnt"), + Context.UnsignedLongTy, + /*TInfo=*/0, SC_None, SC_None, + 0); Params.push_back(cnt); - DictionaryWithObjectsMethod->setMethodParams(Context, Params, - ArrayRef()); + Method->setMethodParams(Context, Params, ArrayRef()); } - if (!DictionaryWithObjectsMethod) { - Diag(SR.getBegin(), diag::err_undeclared_dictwithobjects) << Sel; - return ExprError(); + if (!validateBoxingMethod(*this, SR.getBegin(), NSDictionaryDecl, Sel, + Method)) + return ExprError(); + + // Dig out the type that all values should be converted to. + QualType ValueT = Method->param_begin()[0]->getType(); + const PointerType *PtrValue = ValueT->getAs(); + if (!PtrValue || + !Context.hasSameUnqualifiedType(PtrValue->getPointeeType(), IdT)) { + Diag(SR.getBegin(), diag::err_objc_literal_method_sig) + << Sel; + Diag(Method->param_begin()[0]->getLocation(), + diag::note_objc_literal_method_param) + << 0 << ValueT + << Context.getPointerType(IdT.withConst()); + return ExprError(); } - } - - // Make sure the return type is reasonable. - if (!DictionaryWithObjectsMethod->getResultType()->isObjCObjectPointerType()){ - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << DictionaryWithObjectsMethod->getSelector(); - Diag(DictionaryWithObjectsMethod->getLocation(), - diag::note_objc_literal_method_return) - << DictionaryWithObjectsMethod->getResultType(); - return ExprError(); - } - // Dig out the type that all values should be converted to. - QualType ValueT = DictionaryWithObjectsMethod->param_begin()[0]->getType(); - const PointerType *PtrValue = ValueT->getAs(); - if (!PtrValue || - !Context.hasSameUnqualifiedType(PtrValue->getPointeeType(), IdT)) { - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << DictionaryWithObjectsMethod->getSelector(); - Diag(DictionaryWithObjectsMethod->param_begin()[0]->getLocation(), - diag::note_objc_literal_method_param) - << 0 << ValueT - << Context.getPointerType(IdT.withConst()); - return ExprError(); - } - ValueT = PtrValue->getPointeeType(); - - // Dig out the type that all keys should be converted to. - QualType KeyT = DictionaryWithObjectsMethod->param_begin()[1]->getType(); - const PointerType *PtrKey = KeyT->getAs(); - if (!PtrKey || - !Context.hasSameUnqualifiedType(PtrKey->getPointeeType(), - IdT)) { - bool err = true; - if (PtrKey) { - if (QIDNSCopying.isNull()) { - // key argument of selector is id? - if (ObjCProtocolDecl *NSCopyingPDecl = - LookupProtocol(&Context.Idents.get("NSCopying"), SR.getBegin())) { - ObjCProtocolDecl *PQ[] = {NSCopyingPDecl}; - QIDNSCopying = - Context.getObjCObjectType(Context.ObjCBuiltinIdTy, - (ObjCProtocolDecl**) PQ,1); - QIDNSCopying = Context.getObjCObjectPointerType(QIDNSCopying); + // Dig out the type that all keys should be converted to. + QualType KeyT = Method->param_begin()[1]->getType(); + const PointerType *PtrKey = KeyT->getAs(); + if (!PtrKey || + !Context.hasSameUnqualifiedType(PtrKey->getPointeeType(), + IdT)) { + bool err = true; + if (PtrKey) { + if (QIDNSCopying.isNull()) { + // key argument of selector is id? + if (ObjCProtocolDecl *NSCopyingPDecl = + LookupProtocol(&Context.Idents.get("NSCopying"), SR.getBegin())) { + ObjCProtocolDecl *PQ[] = {NSCopyingPDecl}; + QIDNSCopying = + Context.getObjCObjectType(Context.ObjCBuiltinIdTy, + (ObjCProtocolDecl**) PQ,1); + QIDNSCopying = Context.getObjCObjectPointerType(QIDNSCopying); + } } + if (!QIDNSCopying.isNull()) + err = !Context.hasSameUnqualifiedType(PtrKey->getPointeeType(), + QIDNSCopying); } - if (!QIDNSCopying.isNull()) - err = !Context.hasSameUnqualifiedType(PtrKey->getPointeeType(), - QIDNSCopying); - } - if (err) { + if (err) { + Diag(SR.getBegin(), diag::err_objc_literal_method_sig) + << Sel; + Diag(Method->param_begin()[1]->getLocation(), + diag::note_objc_literal_method_param) + << 1 << KeyT + << Context.getPointerType(IdT.withConst()); + return ExprError(); + } + } + + // Check that the 'count' parameter is integral. + QualType CountType = Method->param_begin()[2]->getType(); + if (!CountType->isIntegerType()) { Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << DictionaryWithObjectsMethod->getSelector(); - Diag(DictionaryWithObjectsMethod->param_begin()[1]->getLocation(), + << Sel; + Diag(Method->param_begin()[2]->getLocation(), diag::note_objc_literal_method_param) - << 1 << KeyT - << Context.getPointerType(IdT.withConst()); + << 2 << CountType + << "integral"; return ExprError(); } - } - KeyT = PtrKey->getPointeeType(); - // Check that the 'count' parameter is integral. - if (!DictionaryWithObjectsMethod->param_begin()[2]->getType() - ->isIntegerType()) { - Diag(SR.getBegin(), diag::err_objc_literal_method_sig) - << DictionaryWithObjectsMethod->getSelector(); - Diag(DictionaryWithObjectsMethod->param_begin()[2]->getLocation(), - diag::note_objc_literal_method_param) - << 2 - << DictionaryWithObjectsMethod->param_begin()[2]->getType() - << "integral"; - return ExprError(); + // We've found a good +dictionaryWithObjects:keys:count: method; save it! + DictionaryWithObjectsMethod = Method; } + QualType ValuesT = DictionaryWithObjectsMethod->param_begin()[0]->getType(); + QualType ValueT = ValuesT->castAs()->getPointeeType(); + QualType KeysT = DictionaryWithObjectsMethod->param_begin()[1]->getType(); + QualType KeyT = KeysT->castAs()->getPointeeType(); + // Check that each of the keys and values provided is valid in a collection // literal, performing conversions as necessary. bool HasPackExpansions = false; @@ -757,8 +930,8 @@ ExprResult Sema::BuildObjCEncodeExpression(SourceLocation AtLoc, if (!EncodedType->getAsArrayTypeUnsafe() && //// Incomplete array is handled. !EncodedType->isVoidType()) // void is handled too. if (RequireCompleteType(AtLoc, EncodedType, - PDiag(diag::err_incomplete_type_objc_at_encode) - << EncodedTypeInfo->getTypeLoc().getSourceRange())) + diag::err_incomplete_type_objc_at_encode, + EncodedTypeInfo->getTypeLoc())) return ExprError(); std::string Str; @@ -846,8 +1019,9 @@ ExprResult Sema::ParseObjCProtocolExpression(IdentifierInfo *ProtocolId, SourceLocation AtLoc, SourceLocation ProtoLoc, SourceLocation LParenLoc, + SourceLocation ProtoIdLoc, SourceLocation RParenLoc) { - ObjCProtocolDecl* PDecl = LookupProtocol(ProtocolId, ProtoLoc); + ObjCProtocolDecl* PDecl = LookupProtocol(ProtocolId, ProtoIdLoc); if (!PDecl) { Diag(ProtoLoc, diag::err_undeclared_protocol) << ProtocolId; return true; @@ -857,7 +1031,7 @@ ExprResult Sema::ParseObjCProtocolExpression(IdentifierInfo *ProtocolId, if (Ty.isNull()) return true; Ty = Context.getObjCObjectPointerType(Ty); - return new (Context) ObjCProtocolExpr(Ty, PDecl, AtLoc, RParenLoc); + return new (Context) ObjCProtocolExpr(Ty, PDecl, AtLoc, ProtoIdLoc, RParenLoc); } /// Try to capture an implicit reference to 'self'. @@ -1023,8 +1197,7 @@ bool Sema::CheckMessageArgumentTypes(QualType ReceiverType, if (RequireCompleteType(argExpr->getSourceRange().getBegin(), param->getType(), - PDiag(diag::err_call_incomplete_argument) - << argExpr->getSourceRange())) + diag::err_call_incomplete_argument, argExpr)) return true; InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, @@ -1042,7 +1215,8 @@ bool Sema::CheckMessageArgumentTypes(QualType ReceiverType, if (Args[i]->isTypeDependent()) continue; - ExprResult Arg = DefaultVariadicArgumentPromotion(Args[i], VariadicMethod, 0); + ExprResult Arg = DefaultVariadicArgumentPromotion(Args[i], VariadicMethod, + 0); IsError |= Arg.isInvalid(); Args[i] = Arg.take(); } @@ -1079,57 +1253,6 @@ bool Sema::isSelfExpr(Expr *receiver) { return false; } -// Helper method for ActOnClassMethod/ActOnInstanceMethod. -// Will search "local" class/category implementations for a method decl. -// If failed, then we search in class's root for an instance method. -// Returns 0 if no method is found. -ObjCMethodDecl *Sema::LookupPrivateClassMethod(Selector Sel, - ObjCInterfaceDecl *ClassDecl) { - ObjCMethodDecl *Method = 0; - // lookup in class and all superclasses - while (ClassDecl && !Method) { - if (ObjCImplementationDecl *ImpDecl = ClassDecl->getImplementation()) - Method = ImpDecl->getClassMethod(Sel); - - // Look through local category implementations associated with the class. - if (!Method) - Method = ClassDecl->getCategoryClassMethod(Sel); - - // Before we give up, check if the selector is an instance method. - // But only in the root. This matches gcc's behaviour and what the - // runtime expects. - if (!Method && !ClassDecl->getSuperClass()) { - Method = ClassDecl->lookupInstanceMethod(Sel); - // Look through local category implementations associated - // with the root class. - if (!Method) - Method = LookupPrivateInstanceMethod(Sel, ClassDecl); - } - - ClassDecl = ClassDecl->getSuperClass(); - } - return Method; -} - -ObjCMethodDecl *Sema::LookupPrivateInstanceMethod(Selector Sel, - ObjCInterfaceDecl *ClassDecl) { - if (!ClassDecl->hasDefinition()) - return 0; - - ObjCMethodDecl *Method = 0; - while (ClassDecl && !Method) { - // If we have implementations in scope, check "private" methods. - if (ObjCImplementationDecl *ImpDecl = ClassDecl->getImplementation()) - Method = ImpDecl->getInstanceMethod(Sel); - - // Look through local category implementations associated with the class. - if (!Method) - Method = ClassDecl->getCategoryInstanceMethod(Sel); - ClassDecl = ClassDecl->getSuperClass(); - } - return Method; -} - /// LookupMethodInType - Look up a method in an ObjCObjectType. ObjCMethodDecl *Sema::LookupMethodInObjectType(Selector sel, QualType type, bool isInstance) { @@ -1141,13 +1264,8 @@ ObjCMethodDecl *Sema::LookupMethodInObjectType(Selector sel, QualType type, // Okay, look for "private" methods declared in any // @implementations we've seen. - if (isInstance) { - if (ObjCMethodDecl *method = LookupPrivateInstanceMethod(sel, iface)) - return method; - } else { - if (ObjCMethodDecl *method = LookupPrivateClassMethod(sel, iface)) - return method; - } + if (ObjCMethodDecl *method = iface->lookupPrivateMethod(sel, isInstance)) + return method; } // Check qualifiers. @@ -1176,6 +1294,69 @@ ObjCMethodDecl *Sema::LookupMethodInQualifiedType(Selector Sel, return 0; } +static void DiagnoseARCUseOfWeakReceiver(Sema &S, Expr *Receiver) { + if (!Receiver) + return; + + if (OpaqueValueExpr *OVE = dyn_cast(Receiver)) + Receiver = OVE->getSourceExpr(); + + Expr *RExpr = Receiver->IgnoreParenImpCasts(); + SourceLocation Loc = RExpr->getLocStart(); + QualType T = RExpr->getType(); + ObjCPropertyDecl *PDecl = 0; + ObjCMethodDecl *GDecl = 0; + if (PseudoObjectExpr *POE = dyn_cast(RExpr)) { + RExpr = POE->getSyntacticForm(); + if (ObjCPropertyRefExpr *PRE = dyn_cast(RExpr)) { + if (PRE->isImplicitProperty()) { + GDecl = PRE->getImplicitPropertyGetter(); + if (GDecl) { + T = GDecl->getResultType(); + } + } + else { + PDecl = PRE->getExplicitProperty(); + if (PDecl) { + T = PDecl->getType(); + } + } + } + } + else if (ObjCMessageExpr *ME = dyn_cast(RExpr)) { + // See if receiver is a method which envokes a synthesized getter + // backing a 'weak' property. + ObjCMethodDecl *Method = ME->getMethodDecl(); + if (Method && Method->isSynthesized()) { + Selector Sel = Method->getSelector(); + if (Sel.getNumArgs() == 0) { + const DeclContext *Container = Method->getDeclContext(); + PDecl = + S.LookupPropertyDecl(cast(Container), + Sel.getIdentifierInfoForSlot(0)); + } + if (PDecl) + T = PDecl->getType(); + } + } + + if (T.getObjCLifetime() == Qualifiers::OCL_Weak) { + S.Diag(Loc, diag::warn_receiver_is_weak) + << ((!PDecl && !GDecl) ? 0 : (PDecl ? 1 : 2)); + if (PDecl) + S.Diag(PDecl->getLocation(), diag::note_property_declare); + else if (GDecl) + S.Diag(GDecl->getLocation(), diag::note_method_declared_at) << GDecl; + return; + } + + if (PDecl && + (PDecl->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_weak)) { + S.Diag(Loc, diag::warn_receiver_is_weak) << 1; + S.Diag(PDecl->getLocation(), diag::note_property_declare); + } +} + /// HandleExprPropertyRefExpr - Handle foo.bar where foo is a pointer to an /// objective C interface. This is a property reference expression. ExprResult Sema:: @@ -1187,19 +1368,20 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, bool Super) { const ObjCInterfaceType *IFaceT = OPT->getInterfaceType(); ObjCInterfaceDecl *IFace = IFaceT->getDecl(); - - if (MemberName.getNameKind() != DeclarationName::Identifier) { + + if (!MemberName.isIdentifier()) { Diag(MemberLoc, diag::err_invalid_property_name) << MemberName << QualType(OPT, 0); return ExprError(); } - + IdentifierInfo *Member = MemberName.getAsIdentifierInfo(); + SourceRange BaseRange = Super? SourceRange(SuperLoc) : BaseExpr->getSourceRange(); if (RequireCompleteType(MemberLoc, OPT->getPointeeType(), - PDiag(diag::err_property_not_found_forward_class) - << MemberName << BaseRange)) + diag::err_property_not_found_forward_class, + MemberName, BaseRange)) return ExprError(); // Search for a declared property first. @@ -1207,7 +1389,6 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, // Check whether we can reference this property. if (DiagnoseUseOfDecl(PD, MemberLoc)) return ExprError(); - if (Super) return Owned(new (Context) ObjCPropertyRefExpr(PD, Context.PseudoObjectTy, VK_LValue, OK_ObjCProperty, @@ -1225,7 +1406,7 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, // Check whether we can reference this property. if (DiagnoseUseOfDecl(PD, MemberLoc)) return ExprError(); - + if (Super) return Owned(new (Context) ObjCPropertyRefExpr(PD, Context.PseudoObjectTy, @@ -1258,9 +1439,6 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, if (!Getter) Getter = IFace->lookupPrivateMethod(Sel); - // Look through local category implementations associated with the class. - if (!Getter) - Getter = IFace->getCategoryInstanceMethod(Sel); if (Getter) { // Check if we can reference this property. if (DiagnoseUseOfDecl(Getter, MemberLoc)) @@ -1272,7 +1450,7 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, SelectorTable::constructSetterName(PP.getIdentifierTable(), PP.getSelectorTable(), Member); ObjCMethodDecl *Setter = IFace->lookupInstanceMethod(SetterSel); - + // May be founf in property's qualified list. if (!Setter) Setter = LookupMethodInQualifiedType(SetterSel, OPT, true); @@ -1282,9 +1460,6 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, // methods. Setter = IFace->lookupPrivateMethod(SetterSel); } - // Look through local category implementations associated with the class. - if (!Setter) - Setter = IFace->getCategoryInstanceMethod(SetterSel); if (Setter && DiagnoseUseOfDecl(Setter, MemberLoc)) return ExprError(); @@ -1328,8 +1503,8 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, if (const ObjCObjectPointerType * OBJPT = T->getAsObjCInterfacePointerType()) { if (RequireCompleteType(MemberLoc, OBJPT->getPointeeType(), - PDiag(diag::err_property_not_as_forward_class) - << MemberName << BaseExpr->getSourceRange())) + diag::err_property_not_as_forward_class, + MemberName, BaseExpr)) return ExprError(); } Diag(MemberLoc, @@ -1603,9 +1778,9 @@ ExprResult Sema::ActOnSuperMessage(Scope *S, // is acting as a keyword. if (Method->isInstanceMethod()) { if (Sel.getMethodFamily() == OMF_dealloc) - ObjCShouldCallSuperDealloc = false; + getCurFunction()->ObjCShouldCallSuperDealloc = false; if (Sel.getMethodFamily() == OMF_finalize) - ObjCShouldCallSuperFinalize = false; + getCurFunction()->ObjCShouldCallSuperFinalize = false; // Since we are in an instance method, this is an instance // message to the superclass instance. @@ -1711,9 +1886,9 @@ static void checkCocoaAPI(Sema &S, const ObjCMessageExpr *Msg) { /// /// \param LBracLoc The location of the opening square bracket ']'. /// -/// \param RBrac The location of the closing square bracket ']'. +/// \param RBracLoc The location of the closing square bracket ']'. /// -/// \param Args The message arguments. +/// \param ArgsIn The message arguments. ExprResult Sema::BuildClassMessage(TypeSourceInfo *ReceiverTypeInfo, QualType ReceiverType, SourceLocation SuperLoc, @@ -1762,11 +1937,11 @@ ExprResult Sema::BuildClassMessage(TypeSourceInfo *ReceiverTypeInfo, SourceRange TypeRange = SuperLoc.isValid()? SourceRange(SuperLoc) : ReceiverTypeInfo->getTypeLoc().getSourceRange(); - if (RequireCompleteType(Loc, Context.getObjCInterfaceType(Class), + if (RequireCompleteType(Loc, Context.getObjCInterfaceType(Class), (getLangOpts().ObjCAutoRefCount - ? PDiag(diag::err_arc_receiver_forward_class) - : PDiag(diag::warn_receiver_forward_class)) - << TypeRange)) { + ? diag::err_arc_receiver_forward_class + : diag::warn_receiver_forward_class), + TypeRange)) { // A forward class used in messaging is treated as a 'Class' Method = LookupFactoryMethodInGlobalPool(Sel, SourceRange(LBracLoc, RBracLoc)); @@ -1779,7 +1954,7 @@ ExprResult Sema::BuildClassMessage(TypeSourceInfo *ReceiverTypeInfo, // If we have an implementation in scope, check "private" methods. if (!Method) - Method = LookupPrivateClassMethod(Sel, Class); + Method = Class->lookupPrivateClassMethod(Sel); if (Method && DiagnoseUseOfDecl(Method, Loc)) return ExprError(); @@ -1881,9 +2056,9 @@ ExprResult Sema::BuildInstanceMessageImplicit(Expr *Receiver, /// /// \param LBracLoc The location of the opening square bracket ']'. /// -/// \param RBrac The location of the closing square bracket ']'. +/// \param RBracLoc The location of the closing square bracket ']'. /// -/// \param Args The message arguments. +/// \param ArgsIn The message arguments. ExprResult Sema::BuildInstanceMessage(Expr *Receiver, QualType ReceiverType, SourceLocation SuperLoc, @@ -1948,7 +2123,7 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, receiverIsId); if (!Method) Method = LookupFactoryMethodInGlobalPool(Sel, - SourceRange(LBracLoc, RBracLoc), + SourceRange(LBracLoc,RBracLoc), receiverIsId); } else if (ReceiverType->isObjCClassType() || ReceiverType->isObjCQualifiedClassType()) { @@ -1976,7 +2151,7 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, Method = ClassDecl->lookupClassMethod(Sel); if (!Method) - Method = LookupPrivateClassMethod(Sel, ClassDecl); + Method = ClassDecl->lookupPrivateClassMethod(Sel); } if (Method && DiagnoseUseOfDecl(Method, Loc)) return ExprError(); @@ -2009,12 +2184,15 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, // We allow sending a message to a qualified ID ("id"), which is ok as // long as one of the protocols implements the selector (if not, warn). + // And as long as message is not deprecated/unavailable (warn if it is). if (const ObjCObjectPointerType *QIdTy = ReceiverType->getAsObjCQualifiedIdType()) { // Search protocols for instance methods. Method = LookupMethodInQualifiedType(Sel, QIdTy, true); if (!Method) Method = LookupMethodInQualifiedType(Sel, QIdTy, false); + if (Method && DiagnoseUseOfDecl(Method, Loc)) + return ExprError(); } else if (const ObjCObjectPointerType *OCIType = ReceiverType->getAsObjCInterfacePointerType()) { // We allow sending a message to a pointer to an interface (an object). @@ -2025,12 +2203,10 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, const ObjCInterfaceDecl *forwardClass = 0; if (RequireCompleteType(Loc, OCIType->getPointeeType(), getLangOpts().ObjCAutoRefCount - ? PDiag(diag::err_arc_receiver_forward_instance) - << (Receiver ? Receiver->getSourceRange() - : SourceRange(SuperLoc)) - : PDiag(diag::warn_receiver_forward_instance) - << (Receiver ? Receiver->getSourceRange() - : SourceRange(SuperLoc)))) { + ? diag::err_arc_receiver_forward_instance + : diag::warn_receiver_forward_instance, + Receiver? Receiver->getSourceRange() + : SourceRange(SuperLoc))) { if (getLangOpts().ObjCAutoRefCount) return ExprError(); @@ -2048,7 +2224,7 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, if (!Method) { // If we have implementations in scope, check "private" methods. - Method = LookupPrivateInstanceMethod(Sel, ClassDecl); + Method = ClassDecl->lookupPrivateMethod(Sel); if (!Method && getLangOpts().ObjCAutoRefCount) { Diag(Loc, diag::err_arc_may_not_respond) @@ -2062,7 +2238,7 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, // compatibility. FIXME: should we deviate?? if (OCIType->qual_empty()) { Method = LookupInstanceMethodInGlobalPool(Sel, - SourceRange(LBracLoc, RBracLoc)); + SourceRange(LBracLoc, RBracLoc)); if (Method && !forwardClass) Diag(Loc, diag::warn_maynot_respond) << OCIType->getInterfaceDecl()->getIdentifier() << Sel; @@ -2087,8 +2263,9 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, // TODO: specialized warning on null receivers? bool IsNull = Receiver->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNull); + CastKind Kind = IsNull ? CK_NullToPointer : CK_IntegralToPointer; Receiver = ImpCastExprToType(Receiver, Context.getObjCIdType(), - IsNull ? CK_NullToPointer : CK_IntegralToPointer).take(); + Kind).take(); } ReceiverType = Receiver->getType(); } else { @@ -2232,10 +2409,7 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver, } if (getLangOpts().ObjCAutoRefCount) { - if (Receiver && - (Receiver->IgnoreParenImpCasts()->getType().getObjCLifetime() - == Qualifiers::OCL_Weak)) - Diag(Receiver->getLocStart(), diag::warn_receiver_is_weak); + DiagnoseARCUseOfWeakReceiver(*this, Receiver); // In ARC, annotate delegate init calls. if (Result->getMethodFamily() == OMF_init && @@ -2373,6 +2547,7 @@ namespace { ASTContext &Context; ARCConversionTypeClass SourceClass; ARCConversionTypeClass TargetClass; + bool Diagnose; static bool isCFType(QualType type) { // Someday this can use ns_bridged. For now, it has to do this. @@ -2381,8 +2556,9 @@ namespace { public: ARCCastChecker(ASTContext &Context, ARCConversionTypeClass source, - ARCConversionTypeClass target) - : Context(Context), SourceClass(source), TargetClass(target) {} + ARCConversionTypeClass target, bool diagnose) + : Context(Context), SourceClass(source), TargetClass(target), + Diagnose(diagnose) {} using super::Visit; ACCResult Visit(Expr *e) { @@ -2500,7 +2676,8 @@ namespace { // now we're not going to permit implicit handling of +1 results, // because it's a bit frightening. if (fn->hasAttr()) - return ACC_invalid; // ACC_plusOne if we start accepting this + return Diagnose ? ACC_plusOne + : ACC_invalid; // ACC_plusOne if we start accepting this // Recognize this specific builtin function, which is used by CFSTR. unsigned builtinID = fn->getBuiltinID(); @@ -2510,10 +2687,11 @@ namespace { // Otherwise, don't do anything implicit with an unaudited function. if (!fn->hasAttr()) return ACC_invalid; - + // Otherwise, it's +0 unless it follows the create convention. if (ento::coreFoundation::followsCreateRule(fn)) - return ACC_invalid; // ACC_plusOne if we start accepting this + return Diagnose ? ACC_plusOne + : ACC_invalid; // ACC_plusOne if we start accepting this return ACC_plusZero; } @@ -2564,11 +2742,12 @@ namespace { }; } -static bool -KnownName(Sema &S, const char *name) { - LookupResult R(S, &S.Context.Idents.get(name), SourceLocation(), +bool Sema::isKnownName(StringRef name) { + if (name.empty()) + return false; + LookupResult R(*this, &Context.Idents.get(name), SourceLocation(), Sema::LookupOrdinaryName); - return S.LookupName(R, S.TUScope, false); + return LookupName(R, TUScope, false); } static void addFixitForObjCARCConversion(Sema &S, @@ -2595,14 +2774,23 @@ static void addFixitForObjCARCConversion(Sema &S, castedE = CCE->getSubExpr(); castedE = castedE->IgnoreImpCasts(); SourceRange range = castedE->getSourceRange(); + + SmallString<32> BridgeCall; + + SourceManager &SM = S.getSourceManager(); + char PrevChar = *SM.getCharacterData(range.getBegin().getLocWithOffset(-1)); + if (Lexer::isIdentifierBodyChar(PrevChar, S.getLangOpts())) + BridgeCall += ' '; + + BridgeCall += CFBridgeName; + if (isa(castedE)) { DiagB.AddFixItHint(FixItHint::CreateInsertion(range.getBegin(), - CFBridgeName)); + BridgeCall)); } else { - std::string namePlusParen = CFBridgeName; - namePlusParen += "("; + BridgeCall += '('; DiagB.AddFixItHint(FixItHint::CreateInsertion(range.getBegin(), - namePlusParen)); + BridgeCall)); DiagB.AddFixItHint(FixItHint::CreateInsertion( S.PP.getLocForEndOfToken(range.getEnd()), ")")); @@ -2677,14 +2865,20 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange, << castType << castRange << castExpr->getSourceRange(); - bool br = KnownName(S, "CFBridgingRelease"); + bool br = S.isKnownName("CFBridgingRelease"); + ACCResult CreateRule = + ARCCastChecker(S.Context, exprACTC, castACTC, true).Visit(castExpr); + assert(CreateRule != ACC_bottom && "This cast should already be accepted."); + if (CreateRule != ACC_plusOne) { DiagnosticBuilder DiagB = S.Diag(noteLoc, diag::note_arc_bridge); addFixitForObjCARCConversion(S, DiagB, CCK, afterLParen, castType, castExpr, "__bridge ", 0); } + if (CreateRule != ACC_plusZero) { - DiagnosticBuilder DiagB = S.Diag(noteLoc, diag::note_arc_bridge_transfer) + DiagnosticBuilder DiagB = S.Diag(br ? castExpr->getExprLoc() : noteLoc, + diag::note_arc_bridge_transfer) << castExprType << br; addFixitForObjCARCConversion(S, DiagB, CCK, afterLParen, castType, castExpr, "__bridge_transfer ", @@ -2696,7 +2890,7 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange, // Bridge from a CF type to an ARC type. if (exprACTC == ACTC_retainable && isAnyRetainable(castACTC)) { - bool br = KnownName(S, "CFBridgingRetain"); + bool br = S.isKnownName("CFBridgingRetain"); S.Diag(loc, diag::err_arc_cast_requires_bridge) << unsigned(CCK == Sema::CCK_ImplicitConversion) // cast|implicit << unsigned(castExprType->isBlockPointerType()) // of ObjC|block type @@ -2705,14 +2899,19 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange, << castType << castRange << castExpr->getSourceRange(); - + ACCResult CreateRule = + ARCCastChecker(S.Context, exprACTC, castACTC, true).Visit(castExpr); + assert(CreateRule != ACC_bottom && "This cast should already be accepted."); + if (CreateRule != ACC_plusOne) { DiagnosticBuilder DiagB = S.Diag(noteLoc, diag::note_arc_bridge); addFixitForObjCARCConversion(S, DiagB, CCK, afterLParen, castType, castExpr, "__bridge ", 0); } + if (CreateRule != ACC_plusZero) { - DiagnosticBuilder DiagB = S.Diag(noteLoc, diag::note_arc_bridge_retained) + DiagnosticBuilder DiagB = S.Diag(br ? castExpr->getExprLoc() : noteLoc, + diag::note_arc_bridge_retained) << castType << br; addFixitForObjCARCConversion(S, DiagB, CCK, afterLParen, castType, castExpr, "__bridge_retained ", @@ -2785,7 +2984,7 @@ Sema::CheckObjCARCConversion(SourceRange castRange, QualType castType, CCK != CCK_ImplicitConversion) return ACR_okay; - switch (ARCCastChecker(Context, exprACTC, castACTC).Visit(castExpr)) { + switch (ARCCastChecker(Context, exprACTC, castACTC, false).Visit(castExpr)) { // For invalid casts, fall through. case ACC_invalid: break; @@ -2949,7 +3148,7 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, break; case OBC_BridgeRetained: { - bool br = KnownName(*this, "CFBridgingRelease"); + bool br = isKnownName("CFBridgingRelease"); Diag(BridgeKeywordLoc, diag::err_arc_bridge_cast_wrong_kind) << 2 << FromType @@ -2992,7 +3191,7 @@ ExprResult Sema::BuildObjCBridgedCast(SourceLocation LParenLoc, break; case OBC_BridgeTransfer: { - bool br = KnownName(*this, "CFBridgingRetain"); + bool br = isKnownName("CFBridgingRetain"); Diag(BridgeKeywordLoc, diag::err_arc_bridge_cast_wrong_kind) << (FromType->isBlockPointerType()? 1 : 0) << FromType diff --git a/lib/Sema/SemaFixItUtils.cpp b/lib/Sema/SemaFixItUtils.cpp index b78ea7d..b61b930 100644 --- a/lib/Sema/SemaFixItUtils.cpp +++ b/lib/Sema/SemaFixItUtils.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/ASTContext.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/Lex/Preprocessor.h" @@ -163,42 +164,54 @@ static bool isMacroDefined(const Sema &S, StringRef Name) { return S.PP.getMacroInfo(&S.getASTContext().Idents.get(Name)); } -const char *Sema::getFixItZeroInitializerForType(QualType T) const { +static std::string getScalarZeroExpressionForType(const Type& T, const Sema& S) { + assert(T.isScalarType() && "use scalar types only"); + // Suggest "0" for non-enumeration scalar types, unless we can find a + // better initializer. + if (T.isEnumeralType()) + return std::string(); + if ((T.isObjCObjectPointerType() || T.isBlockPointerType()) && + isMacroDefined(S, "nil")) + return "nil"; + if (T.isRealFloatingType()) + return "0.0"; + if (T.isBooleanType() && S.LangOpts.CPlusPlus) + return "false"; + if (T.isPointerType() || T.isMemberPointerType()) { + if (S.LangOpts.CPlusPlus0x) + return "nullptr"; + if (isMacroDefined(S, "NULL")) + return "NULL"; + } + if (T.isCharType()) + return "'\\0'"; + if (T.isWideCharType()) + return "L'\\0'"; + if (T.isChar16Type()) + return "u'\\0'"; + if (T.isChar32Type()) + return "U'\\0'"; + return "0"; +} + +std::string Sema::getFixItZeroInitializerForType(QualType T) const { if (T->isScalarType()) { - // Suggest " = 0" for non-enumeration scalar types, unless we can find a - // better initializer. - if (T->isEnumeralType()) - return 0; - if ((T->isObjCObjectPointerType() || T->isBlockPointerType()) && - isMacroDefined(*this, "nil")) - return " = nil"; - if (T->isRealFloatingType()) - return " = 0.0"; - if (T->isBooleanType() && LangOpts.CPlusPlus) - return " = false"; - if (T->isPointerType() || T->isMemberPointerType()) { - if (LangOpts.CPlusPlus0x) - return " = nullptr"; - else if (isMacroDefined(*this, "NULL")) - return " = NULL"; - } - if (T->isCharType()) - return " = '\\0'"; - if (T->isWideCharType()) - return " = L'\\0'"; - if (T->isChar16Type()) - return " = u'\\0'"; - if (T->isChar32Type()) - return " = U'\\0'"; - return " = 0"; + std::string s = getScalarZeroExpressionForType(*T, *this); + if (!s.empty()) + s = " = " + s; + return s; } const CXXRecordDecl *RD = T->getAsCXXRecordDecl(); if (!RD || !RD->hasDefinition()) - return 0; + return std::string(); if (LangOpts.CPlusPlus0x && !RD->hasUserProvidedDefaultConstructor()) return "{}"; if (RD->isAggregate()) return " = {}"; - return 0; + return std::string(); +} + +std::string Sema::getFixItZeroLiteralForType(QualType T) const { + return getScalarZeroExpressionForType(*T, *this); } diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp index a65b41f..62ab1e6 100644 --- a/lib/Sema/SemaInit.cpp +++ b/lib/Sema/SemaInit.cpp @@ -92,8 +92,7 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, if (const IncompleteArrayType *IAT = dyn_cast(AT)) { // C99 6.7.8p14. We have an array of character type with unknown size // being initialized to a string literal. - llvm::APSInt ConstVal(32); - ConstVal = StrLength; + llvm::APInt ConstVal(32, StrLength); // Return a new array type (C99 6.7.8p22). DeclT = S.Context.getConstantArrayType(IAT->getElementType(), ConstVal, @@ -687,22 +686,21 @@ void InitListChecker::CheckListElementTypes(const InitializedEntity &Entity, } else if (DeclType->isVectorType()) { CheckVectorType(Entity, IList, DeclType, Index, StructuredList, StructuredIndex); - } else if (DeclType->isAggregateType()) { - if (DeclType->isRecordType()) { - RecordDecl *RD = DeclType->getAs()->getDecl(); - CheckStructUnionTypes(Entity, IList, DeclType, RD->field_begin(), - SubobjectIsDesignatorContext, Index, - StructuredList, StructuredIndex, - TopLevelObject); - } else if (DeclType->isArrayType()) { - llvm::APSInt Zero( - SemaRef.Context.getTypeSize(SemaRef.Context.getSizeType()), - false); - CheckArrayType(Entity, IList, DeclType, Zero, - SubobjectIsDesignatorContext, Index, - StructuredList, StructuredIndex); - } else - llvm_unreachable("Aggregate that isn't a structure or array?!"); + } else if (DeclType->isRecordType()) { + assert(DeclType->isAggregateType() && + "non-aggregate records should be handed in CheckSubElementType"); + RecordDecl *RD = DeclType->getAs()->getDecl(); + CheckStructUnionTypes(Entity, IList, DeclType, RD->field_begin(), + SubobjectIsDesignatorContext, Index, + StructuredList, StructuredIndex, + TopLevelObject); + } else if (DeclType->isArrayType()) { + llvm::APSInt Zero( + SemaRef.Context.getTypeSize(SemaRef.Context.getSizeType()), + false); + CheckArrayType(Entity, IList, DeclType, Zero, + SubobjectIsDesignatorContext, Index, + StructuredList, StructuredIndex); } else if (DeclType->isVoidType() || DeclType->isFunctionType()) { // This type is invalid, issue a diagnostic. ++Index; @@ -710,19 +708,6 @@ void InitListChecker::CheckListElementTypes(const InitializedEntity &Entity, SemaRef.Diag(IList->getLocStart(), diag::err_illegal_initializer_type) << DeclType; hadError = true; - } else if (DeclType->isRecordType()) { - // C++ [dcl.init]p14: - // [...] If the class is an aggregate (8.5.1), and the initializer - // is a brace-enclosed list, see 8.5.1. - // - // Note: 8.5.1 is handled below; here, we diagnose the case where - // we have an initializer list and a destination type that is not - // an aggregate. - // FIXME: In C++0x, this is yet another form of initialization. - if (!VerifyOnly) - SemaRef.Diag(IList->getLocStart(), diag::err_init_non_aggr_init_list) - << DeclType << IList->getSourceRange(); - hadError = true; } else if (DeclType->isReferenceType()) { CheckReferenceType(Entity, IList, DeclType, Index, StructuredList, StructuredIndex); @@ -747,18 +732,25 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, unsigned &StructuredIndex) { Expr *expr = IList->getInit(Index); if (InitListExpr *SubInitList = dyn_cast(expr)) { - unsigned newIndex = 0; - unsigned newStructuredIndex = 0; - InitListExpr *newStructuredList - = getStructuredSubobjectInit(IList, Index, ElemType, - StructuredList, StructuredIndex, - SubInitList->getSourceRange()); - CheckExplicitInitList(Entity, SubInitList, ElemType, newIndex, - newStructuredList, newStructuredIndex); - ++StructuredIndex; - ++Index; - return; - } else if (ElemType->isScalarType()) { + if (!ElemType->isRecordType() || ElemType->isAggregateType()) { + unsigned newIndex = 0; + unsigned newStructuredIndex = 0; + InitListExpr *newStructuredList + = getStructuredSubobjectInit(IList, Index, ElemType, + StructuredList, StructuredIndex, + SubInitList->getSourceRange()); + CheckExplicitInitList(Entity, SubInitList, ElemType, newIndex, + newStructuredList, newStructuredIndex); + ++StructuredIndex; + ++Index; + return; + } + assert(SemaRef.getLangOpts().CPlusPlus && + "non-aggregate records are only possible in C++"); + // C++ initialization is handled later. + } + + if (ElemType->isScalarType()) { return CheckScalarType(Entity, IList, ElemType, Index, StructuredList, StructuredIndex); } else if (ElemType->isReferenceType()) { @@ -1859,7 +1851,7 @@ InitListChecker::CheckDesignatedInitializer(const InitializedEntity &Entity, } } else { // Recurse to check later designated subobjects. - QualType FieldType = (*Field)->getType(); + QualType FieldType = Field->getType(); unsigned newStructuredIndex = FieldIndex; InitializedEntity MemberEntity = @@ -2708,84 +2700,39 @@ static void MaybeProduceObjCObject(Sema &S, } } -/// \brief When initializing from init list via constructor, deal with the -/// empty init list and std::initializer_list special cases. +/// \brief When initializing from init list via constructor, handle +/// initialization of an object of type std::initializer_list. /// -/// \return True if this was a special case, false otherwise. -static bool TryListConstructionSpecialCases(Sema &S, - InitListExpr *List, - CXXRecordDecl *DestRecordDecl, - QualType DestType, - InitializationSequence &Sequence) { - // C++11 [dcl.init.list]p3: - // List-initialization of an object or reference of type T is defined as - // follows: - // - If T is an aggregate, aggregate initialization is performed. - if (DestType->isAggregateType()) +/// \return true if we have handled initialization of an object of type +/// std::initializer_list, false otherwise. +static bool TryInitializerListConstruction(Sema &S, + InitListExpr *List, + QualType DestType, + InitializationSequence &Sequence) { + QualType E; + if (!S.isStdInitializerList(DestType, &E)) return false; - // - Otherwise, if the initializer list has no elements and T is a class - // type with a default constructor, the object is value-initialized. - if (List->getNumInits() == 0) { - if (CXXConstructorDecl *DefaultConstructor = - S.LookupDefaultConstructor(DestRecordDecl)) { - if (DefaultConstructor->isDeleted() || - S.isFunctionConsideredUnavailable(DefaultConstructor)) { - // Fake an overload resolution failure. - OverloadCandidateSet &CandidateSet = Sequence.getFailedCandidateSet(); - DeclAccessPair FoundDecl = DeclAccessPair::make(DefaultConstructor, - DefaultConstructor->getAccess()); - if (FunctionTemplateDecl *ConstructorTmpl = - dyn_cast(DefaultConstructor)) - S.AddTemplateOverloadCandidate(ConstructorTmpl, FoundDecl, - /*ExplicitArgs*/ 0, - ArrayRef(), CandidateSet, - /*SuppressUserConversions*/ false); - else - S.AddOverloadCandidate(DefaultConstructor, FoundDecl, - ArrayRef(), CandidateSet, - /*SuppressUserConversions*/ false); - Sequence.SetOverloadFailure( - InitializationSequence::FK_ListConstructorOverloadFailed, - OR_Deleted); - } else - Sequence.AddConstructorInitializationStep(DefaultConstructor, - DefaultConstructor->getAccess(), - DestType, - /*MultipleCandidates=*/false, - /*FromInitList=*/true, - /*AsInitList=*/false); + // Check that each individual element can be copy-constructed. But since we + // have no place to store further information, we'll recalculate everything + // later. + InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary( + S.Context.getConstantArrayType(E, + llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()), + List->getNumInits()), + ArrayType::Normal, 0)); + InitializedEntity Element = InitializedEntity::InitializeElement(S.Context, + 0, HiddenArray); + for (unsigned i = 0, n = List->getNumInits(); i < n; ++i) { + Element.setElementIndex(i); + if (!S.CanPerformCopyInitialization(Element, List->getInit(i))) { + Sequence.SetFailed( + InitializationSequence::FK_InitListElementCopyFailure); return true; } } - - // - Otherwise, if T is a specialization of std::initializer_list, [...] - QualType E; - if (S.isStdInitializerList(DestType, &E)) { - // Check that each individual element can be copy-constructed. But since we - // have no place to store further information, we'll recalculate everything - // later. - InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary( - S.Context.getConstantArrayType(E, - llvm::APInt(S.Context.getTypeSize(S.Context.getSizeType()), - List->getNumInits()), - ArrayType::Normal, 0)); - InitializedEntity Element = InitializedEntity::InitializeElement(S.Context, - 0, HiddenArray); - for (unsigned i = 0, n = List->getNumInits(); i < n; ++i) { - Element.setElementIndex(i); - if (!S.CanPerformCopyInitialization(Element, List->getInit(i))) { - Sequence.SetFailed( - InitializationSequence::FK_InitListElementCopyFailure); - return true; - } - } - Sequence.AddStdInitializerListConstructionStep(DestType); - return true; - } - - // Not a special case. - return false; + Sequence.AddStdInitializerListConstructionStep(DestType); + return true; } static OverloadingResult @@ -2886,11 +2833,6 @@ static void TryConstructorInitialization(Sema &S, CXXRecordDecl *DestRecordDecl = cast(DestRecordType->getDecl()); - if (InitListSyntax && - TryListConstructionSpecialCases(S, cast(Args[0]), - DestRecordDecl, DestType, Sequence)) - return; - // Build the candidate set directly in the initialization sequence // structure, so that it will persist if we fail. OverloadCandidateSet &CandidateSet = Sequence.getFailedCandidateSet(); @@ -2917,15 +2859,21 @@ static void TryConstructorInitialization(Sema &S, // constructors of the class T and the argument list consists of the // initializer list as a single argument. if (InitListSyntax) { + InitListExpr *ILE = cast(Args[0]); AsInitializerList = true; - Result = ResolveConstructorOverload(S, Kind.getLocation(), Args, NumArgs, - CandidateSet, ConStart, ConEnd, Best, - CopyInitialization, AllowExplicit, - /*OnlyListConstructor=*/true, - InitListSyntax); + + // If the initializer list has no elements and T has a default constructor, + // the first phase is omitted. + if (ILE->getNumInits() != 0 || + (!DestRecordDecl->hasDeclaredDefaultConstructor() && + !DestRecordDecl->needsImplicitDefaultConstructor())) + Result = ResolveConstructorOverload(S, Kind.getLocation(), Args, NumArgs, + CandidateSet, ConStart, ConEnd, Best, + CopyInitialization, AllowExplicit, + /*OnlyListConstructor=*/true, + InitListSyntax); // Time to unwrap the init list. - InitListExpr *ILE = cast(Args[0]); Args = ILE->getInits(); NumArgs = ILE->getNumInits(); } @@ -2933,7 +2881,7 @@ static void TryConstructorInitialization(Sema &S, // C++11 [over.match.list]p1: // - If no viable initializer-list constructor is found, overload resolution // is performed again, where the candidate functions are all the - // constructors of the class T nad the argument list consists of the + // constructors of the class T and the argument list consists of the // elements of the initializer list. if (Result == OR_No_Viable_Function) { AsInitializerList = false; @@ -2951,13 +2899,13 @@ static void TryConstructorInitialization(Sema &S, return; } - // C++0x [dcl.init]p6: + // C++11 [dcl.init]p6: // If a program calls for the default initialization of an object // of a const-qualified type T, T shall be a class type with a // user-provided default constructor. if (Kind.getKind() == InitializationKind::IK_Default && Entity.getType().isConstQualified() && - cast(Best->Function)->isImplicit()) { + !cast(Best->Function)->isUserProvided()) { Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst); return; } @@ -3018,6 +2966,12 @@ static void TryReferenceInitializationCore(Sema &S, Qualifiers T2Quals, InitializationSequence &Sequence); +static void TryValueInitialization(Sema &S, + const InitializedEntity &Entity, + const InitializationKind &Kind, + InitializationSequence &Sequence, + InitListExpr *InitList = 0); + static void TryListInitialization(Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind, @@ -3108,19 +3062,36 @@ static void TryListInitialization(Sema &S, return; } if (DestType->isRecordType()) { - if (S.RequireCompleteType(InitList->getLocStart(), DestType, S.PDiag())) { + if (S.RequireCompleteType(InitList->getLocStart(), DestType, 0)) { Sequence.setIncompleteTypeFailure(DestType); return; } + // C++11 [dcl.init.list]p3: + // - If T is an aggregate, aggregate initialization is performed. if (!DestType->isAggregateType()) { if (S.getLangOpts().CPlusPlus0x) { + // - Otherwise, if the initializer list has no elements and T is a + // class type with a default constructor, the object is + // value-initialized. + if (InitList->getNumInits() == 0) { + CXXRecordDecl *RD = DestType->getAsCXXRecordDecl(); + if (RD->hasDeclaredDefaultConstructor() || + RD->needsImplicitDefaultConstructor()) { + TryValueInitialization(S, Entity, Kind, Sequence, InitList); + return; + } + } + + // - Otherwise, if T is a specialization of std::initializer_list, + // an initializer_list object constructed [...] + if (TryInitializerListConstruction(S, InitList, DestType, Sequence)) + return; + + // - Otherwise, if T is a class type, constructors are considered. Expr *Arg = InitList; - // A direct-initializer is not list-syntax, i.e. there's no special - // treatment of "A a({1, 2});". - TryConstructorInitialization(S, Entity, Kind, &Arg, 1, DestType, - Sequence, - Kind.getKind() != InitializationKind::IK_Direct); + TryConstructorInitialization(S, Entity, Kind, &Arg, 1, DestType, + Sequence, /*InitListSyntax*/true); } else Sequence.SetFailed( InitializationSequence::FK_InitListBadDestinationType); @@ -3605,7 +3576,11 @@ static void TryStringLiteralInitialization(Sema &S, static void TryValueInitialization(Sema &S, const InitializedEntity &Entity, const InitializationKind &Kind, - InitializationSequence &Sequence) { + InitializationSequence &Sequence, + InitListExpr *InitList) { + assert((!InitList || InitList->getNumInits() == 0) && + "Shouldn't use value-init for non-empty init lists"); + // C++98 [dcl.init]p5, C++11 [dcl.init]p7: // // To value-initialize an object of type T means: @@ -3616,17 +3591,15 @@ static void TryValueInitialization(Sema &S, if (const RecordType *RT = T->getAs()) { if (CXXRecordDecl *ClassDecl = dyn_cast(RT->getDecl())) { - // C++98: - // -- if T is a class type (clause 9) with a user-declared - // constructor (12.1), then the default constructor for T is - // called (and the initialization is ill-formed if T has no - // accessible default constructor); + bool NeedZeroInitialization = true; if (!S.getLangOpts().CPlusPlus0x) { + // C++98: + // -- if T is a class type (clause 9) with a user-declared constructor + // (12.1), then the default constructor for T is called (and the + // initialization is ill-formed if T has no accessible default + // constructor); if (ClassDecl->hasUserDeclaredConstructor()) - // FIXME: we really want to refer to a single subobject of the array, - // but Entity doesn't have a way to capture that (yet). - return TryConstructorInitialization(S, Entity, Kind, 0, 0, - T, Sequence); + NeedZeroInitialization = false; } else { // C++11: // -- if T is a class type (clause 9) with either no default constructor @@ -3634,19 +3607,28 @@ static void TryValueInitialization(Sema &S, // or deleted, then the object is default-initialized; CXXConstructorDecl *CD = S.LookupDefaultConstructor(ClassDecl); if (!CD || !CD->getCanonicalDecl()->isDefaulted() || CD->isDeleted()) - return TryConstructorInitialization(S, Entity, Kind, 0, 0, - T, Sequence); + NeedZeroInitialization = false; } // -- if T is a (possibly cv-qualified) non-union class type without a // user-provided or deleted default constructor, then the object is // zero-initialized and, if T has a non-trivial default constructor, // default-initialized; - if ((ClassDecl->getTagKind() == TTK_Class || - ClassDecl->getTagKind() == TTK_Struct)) { + // FIXME: The 'non-union' here is a defect (not yet assigned an issue + // number). Update the quotation when the defect is resolved. + if (NeedZeroInitialization) Sequence.AddZeroInitializationStep(Entity.getType()); - return TryConstructorInitialization(S, Entity, Kind, 0, 0, T, Sequence); - } + + // If this is list-value-initialization, pass the empty init list on when + // building the constructor call. This affects the semantics of a few + // things (such as whether an explicit default constructor can be called). + Expr *InitListAsExpr = InitList; + Expr **Args = InitList ? &InitListAsExpr : 0; + unsigned NumArgs = InitList ? 1 : 0; + bool InitListSyntax = InitList; + + return TryConstructorInitialization(S, Entity, Kind, Args, NumArgs, T, + Sequence, InitListSyntax); } } @@ -4101,8 +4083,8 @@ InitializationSequence::InitializationSequence(Sema &S, AddArrayInitStep(DestType); } } - // Note: as a GNU C++ extension, we allow initialization of a - // class member from a parenthesized initializer list. + // Note: as a GNU C++ extension, we allow list-initialization of a + // class member of array type from a parenthesized initializer list. else if (S.getLangOpts().CPlusPlus && Entity.getKind() == InitializedEntity::EK_Member && Initializer && isa(Initializer)) { @@ -4409,7 +4391,7 @@ static SourceLocation getInitializationLoc(const InitializedEntity &Entity, /// \param T The type of the temporary object, which must either be /// the type of the initializer expression or a superclass thereof. /// -/// \param Enter The entity being initialized. +/// \param Entity The entity being initialized. /// /// \param CurInit The initializer expression. /// @@ -4452,7 +4434,7 @@ static ExprResult CopyObject(Sema &S, SourceLocation Loc = getInitializationLoc(Entity, CurInit.get()); // Make sure that the type we are copying is complete. - if (S.RequireCompleteType(Loc, T, S.PDiag(diag::err_temp_copy_incomplete))) + if (S.RequireCompleteType(Loc, T, diag::err_temp_copy_incomplete)) return move(CurInit); // Perform overload resolution using the class's copy/move constructors. @@ -4516,7 +4498,7 @@ static ExprResult CopyObject(Sema &S, for (unsigned I = 1, N = Constructor->getNumParams(); I != N; ++I) { ParmVarDecl *Parm = Constructor->getParamDecl(I); if (S.RequireCompleteType(Loc, Parm->getType(), - S.PDiag(diag::err_call_incomplete_argument))) + diag::err_call_incomplete_argument)) break; // Build the default argument expression; we don't actually care @@ -4748,6 +4730,43 @@ PerformConstructorInitialization(Sema &S, return move(CurInit); } +/// Determine whether the specified InitializedEntity definitely has a lifetime +/// longer than the current full-expression. Conservatively returns false if +/// it's unclear. +static bool +InitializedEntityOutlivesFullExpression(const InitializedEntity &Entity) { + const InitializedEntity *Top = &Entity; + while (Top->getParent()) + Top = Top->getParent(); + + switch (Top->getKind()) { + case InitializedEntity::EK_Variable: + case InitializedEntity::EK_Result: + case InitializedEntity::EK_Exception: + case InitializedEntity::EK_Member: + case InitializedEntity::EK_New: + case InitializedEntity::EK_Base: + case InitializedEntity::EK_Delegating: + return true; + + case InitializedEntity::EK_ArrayElement: + case InitializedEntity::EK_VectorElement: + case InitializedEntity::EK_BlockElement: + case InitializedEntity::EK_ComplexElement: + // Could not determine what the full initialization is. Assume it might not + // outlive the full-expression. + return false; + + case InitializedEntity::EK_Parameter: + case InitializedEntity::EK_Temporary: + case InitializedEntity::EK_LambdaCapture: + // The entity being initialized might not outlive the full-expression. + return false; + } + + llvm_unreachable("unknown entity kind"); +} + ExprResult InitializationSequence::Perform(Sema &S, const InitializedEntity &Entity, @@ -4816,6 +4835,29 @@ InitializationSequence::Perform(Sema &S, if (Steps.empty()) return S.Owned((Expr *)0); + if (S.getLangOpts().CPlusPlus0x && Entity.getType()->isReferenceType() && + Args.size() == 1 && isa(Args.get()[0]) && + Entity.getKind() != InitializedEntity::EK_Parameter) { + // Produce a C++98 compatibility warning if we are initializing a reference + // from an initializer list. For parameters, we produce a better warning + // elsewhere. + Expr *Init = Args.get()[0]; + S.Diag(Init->getLocStart(), diag::warn_cxx98_compat_reference_list_init) + << Init->getSourceRange(); + } + + // Diagnose cases where we initialize a pointer to an array temporary, and the + // pointer obviously outlives the temporary. + if (Args.size() == 1 && Args.get()[0]->getType()->isArrayType() && + Entity.getType()->isPointerType() && + InitializedEntityOutlivesFullExpression(Entity)) { + Expr *Init = Args.get()[0]; + Expr::LValueClassification Kind = Init->ClassifyLValue(S.Context); + if (Kind == Expr::LV_ClassTemporary || Kind == Expr::LV_ArrayTemporary) + S.Diag(Init->getLocStart(), diag::warn_temporary_array_to_pointer_decay) + << Init->getSourceRange(); + } + QualType DestType = Entity.getType().getNonReferenceType(); // FIXME: Ugly hack around the fact that Entity.getType() is not // the same as Entity.getDecl()->getType() in cases involving type merging, @@ -4842,7 +4884,6 @@ InitializationSequence::Perform(Sema &S, case SK_QualificationConversionXValue: case SK_QualificationConversionRValue: case SK_ConversionSequence: - case SK_ListConstructorCall: case SK_ListInitialization: case SK_UnwrapInitList: case SK_RewrapInitList: @@ -4862,6 +4903,7 @@ InitializationSequence::Perform(Sema &S, } case SK_ConstructorInitialization: + case SK_ListConstructorCall: case SK_ZeroInitialization: break; } @@ -5152,7 +5194,10 @@ InitializationSequence::Perform(Sema &S, InitializedEntity TempEntity = InitializedEntity::InitializeTemporary( Entity.getType().getNonReferenceType()); bool UseTemporary = Entity.getType()->isReferenceType(); - InitListExpr *InitList = cast(CurInit.get()); + assert(Args.size() == 1 && "expected a single argument for list init"); + InitListExpr *InitList = cast(Args.get()[0]); + S.Diag(InitList->getExprLoc(), diag::warn_cxx98_compat_ctor_list_init) + << InitList->getSourceRange(); MultiExprArg Arg(InitList->getInits(), InitList->getNumInits()); CurInit = PerformConstructorInitialization(S, UseTemporary ? TempEntity : Entity, @@ -5198,7 +5243,8 @@ InitializationSequence::Perform(Sema &S, step_iterator NextStep = Step; ++NextStep; if (NextStep != StepEnd && - NextStep->Kind == SK_ConstructorInitialization) { + (NextStep->Kind == SK_ConstructorInitialization || + NextStep->Kind == SK_ListConstructorCall)) { // The need for zero-initialization is recorded directly into // the call to the object's constructor within the next step. ConstructorInitRequiresZeroInit = true; @@ -5330,6 +5376,8 @@ InitializationSequence::Perform(Sema &S, } InitListExpr *ILE = cast(CurInit.take()); + S.Diag(ILE->getExprLoc(), diag::warn_cxx98_compat_initializer_list_init) + << ILE->getSourceRange(); unsigned NumInits = ILE->getNumInits(); SmallVector Converted(NumInits); InitializedEntity HiddenArray = InitializedEntity::InitializeTemporary( @@ -6130,8 +6178,8 @@ Sema::CanPerformCopyInitialization(const InitializedEntity &Entity, Expr *InitE = Init.get(); assert(InitE && "No initialization expression"); - InitializationKind Kind = InitializationKind::CreateCopy(SourceLocation(), - SourceLocation()); + InitializationKind Kind + = InitializationKind::CreateCopy(InitE->getLocStart(), SourceLocation()); InitializationSequence Seq(*this, Entity, Kind, &InitE, 1); return !Seq.Failed(); } diff --git a/lib/Sema/SemaLambda.cpp b/lib/Sema/SemaLambda.cpp index 6ef8d88..6414c6f 100644 --- a/lib/Sema/SemaLambda.cpp +++ b/lib/Sema/SemaLambda.cpp @@ -54,9 +54,7 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, SourceRange IntroducerRange, TypeSourceInfo *MethodType, SourceLocation EndLoc, - llvm::ArrayRef Params, - llvm::Optional ManglingNumber, - Decl *ContextDecl) { + llvm::ArrayRef Params) { // C++11 [expr.prim.lambda]p5: // The closure type for a lambda-expression has a public inline function // call operator (13.5.4) whose parameters and return type are described by @@ -98,64 +96,76 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class, P != PEnd; ++P) (*P)->setOwningFunction(Method); } - - // If we don't already have a mangling number for this lambda expression, - // allocate one now. - if (!ManglingNumber) { - ContextDecl = ExprEvalContexts.back().LambdaContextDecl; - - enum ContextKind { - Normal, - DefaultArgument, - DataMember, - StaticDataMember - } Kind = Normal; - - // Default arguments of member function parameters that appear in a class - // definition, as well as the initializers of data members, receive special - // treatment. Identify them. - if (ContextDecl) { - if (ParmVarDecl *Param = dyn_cast(ContextDecl)) { - if (const DeclContext *LexicalDC - = Param->getDeclContext()->getLexicalParent()) - if (LexicalDC->isRecord()) - Kind = DefaultArgument; - } else if (VarDecl *Var = dyn_cast(ContextDecl)) { - if (Var->getDeclContext()->isRecord()) - Kind = StaticDataMember; - } else if (isa(ContextDecl)) { - Kind = DataMember; - } - } - - switch (Kind) { - case Normal: - if (CurContext->isDependentContext() || isInInlineFunction(CurContext)) - ManglingNumber = Context.getLambdaManglingNumber(Method); - else - ManglingNumber = 0; - - // There is no special context for this lambda. - ContextDecl = 0; - break; - - case StaticDataMember: - if (!CurContext->isDependentContext()) { - ManglingNumber = 0; - ContextDecl = 0; - break; - } - // Fall through to assign a mangling number. - - case DataMember: - case DefaultArgument: - ManglingNumber = ExprEvalContexts.back().getLambdaMangleContext() - .getManglingNumber(Method); - break; + + // Allocate a mangling number for this lambda expression, if the ABI + // requires one. + Decl *ContextDecl = ExprEvalContexts.back().LambdaContextDecl; + + enum ContextKind { + Normal, + DefaultArgument, + DataMember, + StaticDataMember + } Kind = Normal; + + // Default arguments of member function parameters that appear in a class + // definition, as well as the initializers of data members, receive special + // treatment. Identify them. + if (ContextDecl) { + if (ParmVarDecl *Param = dyn_cast(ContextDecl)) { + if (const DeclContext *LexicalDC + = Param->getDeclContext()->getLexicalParent()) + if (LexicalDC->isRecord()) + Kind = DefaultArgument; + } else if (VarDecl *Var = dyn_cast(ContextDecl)) { + if (Var->getDeclContext()->isRecord()) + Kind = StaticDataMember; + } else if (isa(ContextDecl)) { + Kind = DataMember; } } - Class->setLambdaMangling(*ManglingNumber, ContextDecl); + // Itanium ABI [5.1.7]: + // In the following contexts [...] the one-definition rule requires closure + // types in different translation units to "correspond": + bool IsInNonspecializedTemplate = + !ActiveTemplateInstantiations.empty() || CurContext->isDependentContext(); + unsigned ManglingNumber; + switch (Kind) { + case Normal: + // -- the bodies of non-exported nonspecialized template functions + // -- the bodies of inline functions + if ((IsInNonspecializedTemplate && + !(ContextDecl && isa(ContextDecl))) || + isInInlineFunction(CurContext)) + ManglingNumber = Context.getLambdaManglingNumber(Method); + else + ManglingNumber = 0; + + // There is no special context for this lambda. + ContextDecl = 0; + break; + + case StaticDataMember: + // -- the initializers of nonspecialized static members of template classes + if (!IsInNonspecializedTemplate) { + ManglingNumber = 0; + ContextDecl = 0; + break; + } + // Fall through to assign a mangling number. + + case DataMember: + // -- the in-class initializers of class members + case DefaultArgument: + // -- default arguments appearing in class definitions + ManglingNumber = ExprEvalContexts.back().getLambdaMangleContext() + .getManglingNumber(Method); + break; + } + + Class->setLambdaMangling(ManglingNumber, ContextDecl); + return Method; } @@ -214,6 +224,141 @@ void Sema::addLambdaParameters(CXXMethodDecl *CallOperator, Scope *CurScope) { } } +static bool checkReturnValueType(const ASTContext &Ctx, const Expr *E, + QualType &DeducedType, + QualType &AlternateType) { + // Handle ReturnStmts with no expressions. + if (!E) { + if (AlternateType.isNull()) + AlternateType = Ctx.VoidTy; + + return Ctx.hasSameType(DeducedType, Ctx.VoidTy); + } + + QualType StrictType = E->getType(); + QualType LooseType = StrictType; + + // In C, enum constants have the type of their underlying integer type, + // not the enum. When inferring block return types, we should allow + // the enum type if an enum constant is used, unless the enum is + // anonymous (in which case there can be no variables of its type). + if (!Ctx.getLangOpts().CPlusPlus) { + const DeclRefExpr *DRE = dyn_cast(E->IgnoreParenImpCasts()); + if (DRE) { + const Decl *D = DRE->getDecl(); + if (const EnumConstantDecl *ECD = dyn_cast(D)) { + const EnumDecl *Enum = cast(ECD->getDeclContext()); + if (Enum->getDeclName() || Enum->getTypedefNameForAnonDecl()) + LooseType = Ctx.getTypeDeclType(Enum); + } + } + } + + // Special case for the first return statement we find. + // The return type has already been tentatively set, but we might still + // have an alternate type we should prefer. + if (AlternateType.isNull()) + AlternateType = LooseType; + + if (Ctx.hasSameType(DeducedType, StrictType)) { + // FIXME: The loose type is different when there are constants from two + // different enums. We could consider warning here. + if (AlternateType != Ctx.DependentTy) + if (!Ctx.hasSameType(AlternateType, LooseType)) + AlternateType = Ctx.VoidTy; + return true; + } + + if (Ctx.hasSameType(DeducedType, LooseType)) { + // Use DependentTy to signal that we're using an alternate type and may + // need to add casts somewhere. + AlternateType = Ctx.DependentTy; + return true; + } + + if (Ctx.hasSameType(AlternateType, StrictType) || + Ctx.hasSameType(AlternateType, LooseType)) { + DeducedType = AlternateType; + // Use DependentTy to signal that we're using an alternate type and may + // need to add casts somewhere. + AlternateType = Ctx.DependentTy; + return true; + } + + return false; +} + +void Sema::deduceClosureReturnType(CapturingScopeInfo &CSI) { + assert(CSI.HasImplicitReturnType); + + // First case: no return statements, implicit void return type. + ASTContext &Ctx = getASTContext(); + if (CSI.Returns.empty()) { + // It's possible there were simply no /valid/ return statements. + // In this case, the first one we found may have at least given us a type. + if (CSI.ReturnType.isNull()) + CSI.ReturnType = Ctx.VoidTy; + return; + } + + // Second case: at least one return statement has dependent type. + // Delay type checking until instantiation. + assert(!CSI.ReturnType.isNull() && "We should have a tentative return type."); + if (CSI.ReturnType->isDependentType()) + return; + + // Third case: only one return statement. Don't bother doing extra work! + SmallVectorImpl::iterator I = CSI.Returns.begin(), + E = CSI.Returns.end(); + if (I+1 == E) + return; + + // General case: many return statements. + // Check that they all have compatible return types. + // For now, that means "identical", with an exception for enum constants. + // (In C, enum constants have the type of their underlying integer type, + // not the type of the enum. C++ uses the type of the enum.) + QualType AlternateType; + + // We require the return types to strictly match here. + for (; I != E; ++I) { + const ReturnStmt *RS = *I; + const Expr *RetE = RS->getRetValue(); + if (!checkReturnValueType(Ctx, RetE, CSI.ReturnType, AlternateType)) { + // FIXME: This is a poor diagnostic for ReturnStmts without expressions. + Diag(RS->getLocStart(), + diag::err_typecheck_missing_return_type_incompatible) + << (RetE ? RetE->getType() : Ctx.VoidTy) << CSI.ReturnType + << isa(CSI); + // Don't bother fixing up the return statements in the block if some of + // them are unfixable anyway. + AlternateType = Ctx.VoidTy; + // Continue iterating so that we keep emitting diagnostics. + } + } + + // If our return statements turned out to be compatible, but we needed to + // pick a different return type, go through and fix the ones that need it. + if (AlternateType == Ctx.DependentTy) { + for (SmallVectorImpl::iterator I = CSI.Returns.begin(), + E = CSI.Returns.end(); + I != E; ++I) { + ReturnStmt *RS = *I; + Expr *RetE = RS->getRetValue(); + if (RetE->getType() == CSI.ReturnType) + continue; + + // Right now we only support integral fixup casts. + assert(CSI.ReturnType->isIntegralOrUnscopedEnumerationType()); + assert(RetE->getType()->isIntegralOrUnscopedEnumerationType()); + ExprResult Casted = ImpCastExprToType(RetE, CSI.ReturnType, + CK_IntegralCast); + assert(Casted.isUsable()); + RS->setRetValue(Casted.take()); + } + } +} + void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, Declarator &ParamInfo, Scope *CurScope) { @@ -230,6 +375,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, TypeSourceInfo *MethodTyInfo; bool ExplicitParams = true; bool ExplicitResultType = true; + bool ContainsUnexpandedParameterPack = false; SourceLocation EndLoc; llvm::ArrayRef Params; if (ParamInfo.getNumTypeObjects() == 0) { @@ -269,9 +415,13 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, FunctionProtoTypeLoc Proto = cast(TL); Params = llvm::ArrayRef(Proto.getParmArray(), Proto.getNumArgs()); + + // Check for unexpanded parameter packs in the method type. + if (MethodTyInfo->getType()->containsUnexpandedParameterPack()) + ContainsUnexpandedParameterPack = true; } - CXXMethodDecl *Method = startLambdaDefinition(Class, Intro.Range, + CXXMethodDecl *Method = startLambdaDefinition(Class, Intro.Range, MethodTyInfo, EndLoc, Params); if (ExplicitParams) @@ -287,7 +437,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, LambdaScopeInfo *LSI = enterLambdaScope(Method, Intro.Range, Intro.Default, ExplicitParams, ExplicitResultType, - (Method->getTypeQualifiers() & Qualifiers::Const) == 0); + !Method->isConst()); // Handle explicit captures. SourceLocation PrevCaptureLoc @@ -409,8 +559,7 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, // Just ignore the ellipsis. } } else if (Var->isParameterPack()) { - Diag(C->Loc, diag::err_lambda_unexpanded_pack); - continue; + ContainsUnexpandedParameterPack = true; } TryCaptureKind Kind = C->Kind == LCK_ByRef ? TryCapture_ExplicitByRef : @@ -419,6 +568,8 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, } finishLambdaExplicitCaptures(LSI); + LSI->ContainsUnexpandedParameterPack = ContainsUnexpandedParameterPack; + // Add lambda parameters into scope. addLambdaParameters(Method, CurScope); @@ -441,7 +592,10 @@ void Sema::ActOnLambdaError(SourceLocation StartLoc, Scope *CurScope, LambdaScopeInfo *LSI = getCurLambda(); CXXRecordDecl *Class = LSI->Lambda; Class->setInvalidDecl(); - SmallVector Fields(Class->field_begin(), Class->field_end()); + SmallVector Fields; + for (RecordDecl::field_iterator i = Class->field_begin(), + e = Class->field_end(); i != e; ++i) + Fields.push_back(*i); ActOnFields(0, Class->getLocation(), Class, Fields, SourceLocation(), SourceLocation(), 0); CheckCompletedCXXClass(Class); @@ -578,6 +732,7 @@ ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, bool ExplicitParams; bool ExplicitResultType; bool LambdaExprNeedsCleanups; + bool ContainsUnexpandedParameterPack; llvm::SmallVector ArrayIndexVars; llvm::SmallVector ArrayIndexStarts; { @@ -588,6 +743,7 @@ ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, ExplicitParams = LSI->ExplicitParams; ExplicitResultType = !LSI->HasImplicitReturnType; LambdaExprNeedsCleanups = LSI->ExprNeedsCleanups; + ContainsUnexpandedParameterPack = LSI->ContainsUnexpandedParameterPack; ArrayIndexVars.swap(LSI->ArrayIndexVars); ArrayIndexStarts.swap(LSI->ArrayIndexStarts); @@ -639,32 +795,14 @@ ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, // denotes the following type: // FIXME: Assumes current resolution to core issue 975. if (LSI->HasImplicitReturnType) { + deduceClosureReturnType(*LSI); + // - if there are no return statements in the // compound-statement, or all return statements return // either an expression of type void or no expression or // braced-init-list, the type void; if (LSI->ReturnType.isNull()) { LSI->ReturnType = Context.VoidTy; - } else { - // C++11 [expr.prim.lambda]p4: - // - if the compound-statement is of the form - // - // { attribute-specifier-seq[opt] return expression ; } - // - // the type of the returned expression after - // lvalue-to-rvalue conversion (4.1), array-to-pointer - // conver- sion (4.2), and function-to-pointer conversion - // (4.3); - // - // Since we're accepting the resolution to a post-C++11 core - // issue with a non-trivial extension, provide a warning (by - // default). - CompoundStmt *CompoundBody = cast(Body); - if (!(CompoundBody->size() == 1 && - isa(*CompoundBody->body_begin())) && - !Context.hasSameType(LSI->ReturnType, Context.VoidTy)) - Diag(IntroducerRange.getBegin(), - diag::ext_lambda_implies_void_return); } // Create a function type with the inferred return type. @@ -704,7 +842,10 @@ ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, addBlockPointerConversion(*this, IntroducerRange, Class, CallOperator); // Finalize the lambda class. - SmallVector Fields(Class->field_begin(), Class->field_end()); + SmallVector Fields; + for (RecordDecl::field_iterator i = Class->field_begin(), + e = Class->field_end(); i != e; ++i) + Fields.push_back(*i); ActOnFields(0, Class->getLocation(), Class, Fields, SourceLocation(), SourceLocation(), 0); CheckCompletedCXXClass(Class); @@ -717,7 +858,8 @@ ExprResult Sema::ActOnLambdaExpr(SourceLocation StartLoc, Stmt *Body, CaptureDefault, Captures, ExplicitParams, ExplicitResultType, CaptureInits, ArrayIndexVars, - ArrayIndexStarts, Body->getLocEnd()); + ArrayIndexStarts, Body->getLocEnd(), + ContainsUnexpandedParameterPack); // C++11 [expr.prim.lambda]p2: // A lambda-expression shall not appear in an unevaluated operand @@ -807,9 +949,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation, // Add a fake function body to the block. IR generation is responsible // for filling in the actual body, which cannot be expressed as an AST. - Block->setBody(new (Context) CompoundStmt(Context, 0, 0, - ConvLocation, - ConvLocation)); + Block->setBody(new (Context) CompoundStmt(ConvLocation)); // Create the block literal expression. Expr *BuildBlock = new (Context) BlockExpr(Block, Conv->getConversionType()); diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index 9f5138b..dad196b 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -899,7 +899,7 @@ bool Sema::CppLookupName(LookupResult &R, Scope *S) { if (!Ctx && S->isTemplateParamScope() && OutsideOfTemplateParamDC && S->getParent() && !S->getParent()->isTemplateParamScope()) { // We've just searched the last template parameter scope and - // found nothing, so look into the the contexts between the + // found nothing, so look into the contexts between the // lexical and semantic declaration contexts returned by // findOuterContext(). This implements the name lookup behavior // of C++ [temp.local]p8. @@ -1004,7 +1004,7 @@ bool Sema::CppLookupName(LookupResult &R, Scope *S) { if (!Ctx && S->isTemplateParamScope() && OutsideOfTemplateParamDC && S->getParent() && !S->getParent()->isTemplateParamScope()) { // We've just searched the last template parameter scope and - // found nothing, so look into the the contexts between the + // found nothing, so look into the contexts between the // lexical and semantic declaration contexts returned by // findOuterContext(). This implements the name lookup behavior // of C++ [temp.local]p8. @@ -1100,15 +1100,12 @@ static NamedDecl *getVisibleDecl(NamedDecl *D) { /// begin. If the lookup criteria permits, name lookup may also search /// in the parent scopes. /// -/// @param Name The name of the entity that we are searching for. +/// @param [in,out] R Specifies the lookup to perform (e.g., the name to +/// look up and the lookup kind), and is updated with the results of lookup +/// including zero or more declarations and possibly additional information +/// used to diagnose ambiguities. /// -/// @param Loc If provided, the source location where we're performing -/// name lookup. At present, this is only used to produce diagnostics when -/// C library functions (like "malloc") are implicitly declared. -/// -/// @returns The result of name lookup, which includes zero or more -/// declarations and possibly additional information used to diagnose -/// ambiguities. +/// @returns \c true if lookup succeeded and false otherwise. bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation) { DeclarationName Name = R.getLookupName(); if (!Name) return false; @@ -1231,7 +1228,7 @@ bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation) { /// using directives by the given context. /// /// C++98 [namespace.qual]p2: -/// Given X::m (where X is a user-declared namespace), or given ::m +/// Given X::m (where X is a user-declared namespace), or given \::m /// (where X is the global namespace), let S be the set of all /// declarations of m in X and in the transitive closure of all /// namespaces nominated by using-directives in X and its used @@ -1244,6 +1241,7 @@ bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation) { /// (namespace.udecl), S is the required set of declarations of /// m. Otherwise if the use of m is not one that allows a unique /// declaration to be chosen from S, the program is ill-formed. +/// /// C++98 [namespace.qual]p5: /// During the lookup of a qualified namespace member name, if the /// lookup finds more than one declaration of the member, and if one @@ -1636,22 +1634,12 @@ bool Sema::LookupParsedName(LookupResult &R, Scope *S, CXXScopeSpec *SS, } -/// @brief Produce a diagnostic describing the ambiguity that resulted +/// \brief Produce a diagnostic describing the ambiguity that resulted /// from name lookup. /// -/// @param Result The ambiguous name lookup result. -/// -/// @param Name The name of the entity that name lookup was -/// searching for. -/// -/// @param NameLoc The location of the name within the source code. +/// \param Result The result of the ambiguous lookup to be diagnosed. /// -/// @param LookupRange A source range that provides more -/// source-location information concerning the lookup itself. For -/// example, this range might highlight a nested-name-specifier that -/// precedes the name. -/// -/// @returns true +/// \returns true bool Sema::DiagnoseAmbiguousLookup(LookupResult &Result) { assert(Result.isAmbiguous() && "Lookup result must be ambiguous"); @@ -2444,10 +2432,11 @@ CXXConstructorDecl *Sema::LookupCopyingConstructor(CXXRecordDecl *Class, } /// \brief Look up the moving constructor for the given class. -CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class) { +CXXConstructorDecl *Sema::LookupMovingConstructor(CXXRecordDecl *Class, + unsigned Quals) { SpecialMemberOverloadResult *Result = - LookupSpecialMember(Class, CXXMoveConstructor, false, - false, false, false, false); + LookupSpecialMember(Class, CXXMoveConstructor, Quals & Qualifiers::Const, + Quals & Qualifiers::Volatile, false, false, false); return cast_or_null(Result->getMethod()); } @@ -2488,12 +2477,14 @@ CXXMethodDecl *Sema::LookupCopyingAssignment(CXXRecordDecl *Class, /// \brief Look up the moving assignment operator for the given class. CXXMethodDecl *Sema::LookupMovingAssignment(CXXRecordDecl *Class, + unsigned Quals, bool RValueThis, unsigned ThisQuals) { assert(!(ThisQuals & ~(Qualifiers::Const | Qualifiers::Volatile)) && "non-const, non-volatile qualifiers for copy assignment this"); SpecialMemberOverloadResult *Result = - LookupSpecialMember(Class, CXXMoveAssignment, false, false, RValueThis, + LookupSpecialMember(Class, CXXMoveAssignment, Quals & Qualifiers::Const, + Quals & Qualifiers::Volatile, RValueThis, ThisQuals & Qualifiers::Const, ThisQuals & Qualifiers::Volatile); @@ -3147,7 +3138,8 @@ LabelDecl *Sema::LookupOrCreateLabel(IdentifierInfo *II, SourceLocation Loc, namespace { -typedef llvm::StringMap TypoResultsMap; +typedef llvm::SmallVector TypoResultList; +typedef llvm::StringMap TypoResultsMap; typedef std::map TypoEditDistanceMap; static const unsigned MaxTypoDistanceResultSets = 5; @@ -3161,7 +3153,7 @@ class TypoCorrectionConsumer : public VisibleDeclConsumer { /// /// The pointer value being set to the current DeclContext indicates /// whether there is a keyword with this name. - TypoEditDistanceMap BestResults; + TypoEditDistanceMap CorrectionResults; Sema &SemaRef; @@ -3180,23 +3172,28 @@ public: typedef TypoResultsMap::iterator result_iterator; typedef TypoEditDistanceMap::iterator distance_iterator; - distance_iterator begin() { return BestResults.begin(); } - distance_iterator end() { return BestResults.end(); } - void erase(distance_iterator I) { BestResults.erase(I); } - unsigned size() const { return BestResults.size(); } - bool empty() const { return BestResults.empty(); } - - TypoCorrection &operator[](StringRef Name) { - return BestResults.begin()->second[Name]; + distance_iterator begin() { return CorrectionResults.begin(); } + distance_iterator end() { return CorrectionResults.end(); } + void erase(distance_iterator I) { CorrectionResults.erase(I); } + unsigned size() const { return CorrectionResults.size(); } + bool empty() const { return CorrectionResults.empty(); } + + TypoResultList &operator[](StringRef Name) { + return CorrectionResults.begin()->second[Name]; } unsigned getBestEditDistance(bool Normalized) { - if (BestResults.empty()) + if (CorrectionResults.empty()) return (std::numeric_limits::max)(); - unsigned BestED = BestResults.begin()->first; + unsigned BestED = CorrectionResults.begin()->first; return Normalized ? TypoCorrection::NormalizeEditDistance(BestED) : BestED; } + + TypoResultsMap &getBestResults() { + return CorrectionResults.begin()->second; + } + }; } @@ -3251,19 +3248,31 @@ void TypoCorrectionConsumer::addName(StringRef Name, void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) { StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName(); - TypoResultsMap &Map = BestResults[Correction.getEditDistance(false)]; - - TypoCorrection &CurrentCorrection = Map[Name]; - if (!CurrentCorrection || - // FIXME: The following should be rolled up into an operator< on - // TypoCorrection with a more principled definition. - CurrentCorrection.isKeyword() < Correction.isKeyword() || - Correction.getAsString(SemaRef.getLangOpts()) < - CurrentCorrection.getAsString(SemaRef.getLangOpts())) - CurrentCorrection = Correction; + TypoResultList &CList = + CorrectionResults[Correction.getEditDistance(false)][Name]; + + if (!CList.empty() && !CList.back().isResolved()) + CList.pop_back(); + if (NamedDecl *NewND = Correction.getCorrectionDecl()) { + std::string CorrectionStr = Correction.getAsString(SemaRef.getLangOpts()); + for (TypoResultList::iterator RI = CList.begin(), RIEnd = CList.end(); + RI != RIEnd; ++RI) { + // If the Correction refers to a decl already in the result list, + // replace the existing result if the string representation of Correction + // comes before the current result alphabetically, then stop as there is + // nothing more to be done to add Correction to the candidate set. + if (RI->getCorrectionDecl() == NewND) { + if (CorrectionStr < RI->getAsString(SemaRef.getLangOpts())) + *RI = Correction; + return; + } + } + } + if (CList.empty() || Correction.isResolved()) + CList.push_back(Correction); - while (BestResults.size() > MaxTypoDistanceResultSets) - erase(llvm::prior(BestResults.end())); + while (CorrectionResults.size() > MaxTypoDistanceResultSets) + erase(llvm::prior(CorrectionResults.end())); } // Fill the supplied vector with the IdentifierInfo pointers for each piece of @@ -3348,7 +3357,7 @@ class NamespaceSpecifierSet { getNestedNameSpecifierIdentifiers(CurScopeSpec->getScopeRep(), CurNameSpecifierIdentifiers); // Build the list of identifiers that would be used for an absolute - // (from the global context) NestedNameSpecifier refering to the current + // (from the global context) NestedNameSpecifier referring to the current // context. for (DeclContextList::reverse_iterator C = CurContextChain.rbegin(), CEnd = CurContextChain.rend(); @@ -3515,7 +3524,16 @@ static void LookupPotentialTypoResult(Sema &SemaRef, /// \brief Add keywords to the consumer as possible typo corrections. static void AddKeywordsToConsumer(Sema &SemaRef, TypoCorrectionConsumer &Consumer, - Scope *S, CorrectionCandidateCallback &CCC) { + Scope *S, CorrectionCandidateCallback &CCC, + bool AfterNestedNameSpecifier) { + if (AfterNestedNameSpecifier) { + // For 'X::', we know exactly which keywords can appear next. + Consumer.addKeywordResult("template"); + if (CCC.WantExpressionKeywords) + Consumer.addKeywordResult("operator"); + return; + } + if (CCC.WantObjCSuper) Consumer.addKeywordResult("super"); @@ -3589,6 +3607,12 @@ static void AddKeywordsToConsumer(Sema &SemaRef, Consumer.addKeywordResult("nullptr"); } } + + if (SemaRef.getLangOpts().C11) { + // FIXME: We should not suggest _Alignof if the alignof macro + // is present. + Consumer.addKeywordResult("_Alignof"); + } } if (CCC.WantRemainingKeywords) { @@ -3777,6 +3801,9 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, bool SearchNamespaces = getLangOpts().CPlusPlus && (IsUnqualifiedLookup || (QualifiedDC && QualifiedDC->isNamespace())); + // In a few cases we *only* want to search for corrections bases on just + // adding or changing the nested name specifier. + bool AllowOnlyNNSChanges = Typo->getName().size() < 3; if (IsUnqualifiedLookup || SearchNamespaces) { // For unqualified lookup, look through all of the names that we have @@ -3802,7 +3829,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, } } - AddKeywordsToConsumer(*this, Consumer, S, CCC); + AddKeywordsToConsumer(*this, Consumer, S, CCC, SS && SS->isNotEmpty()); // If we haven't found anything, we're done. if (Consumer.empty()) { @@ -3813,8 +3840,8 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, return TypoCorrection(); } - // Make sure that the user typed at least 3 characters for each correction - // made. Otherwise, we don't even both looking at the results. + // Make sure the best edit distance (prior to adding any namespace qualifiers) + // is not more that about a third of the length of the typo's identifier. unsigned ED = Consumer.getBestEditDistance(true); if (ED > 0 && Typo->getName().size() / ED < 3) { // If this was an unqualified lookup, note that no correction was found. @@ -3854,19 +3881,43 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, for (TypoCorrectionConsumer::result_iterator I = DI->second.begin(), IEnd = DI->second.end(); I != IEnd; /* Increment in loop. */) { + // If we only want nested name specifier corrections, ignore potential + // corrections that have a different base identifier from the typo. + if (AllowOnlyNNSChanges && + I->second.front().getCorrectionAsIdentifierInfo() != Typo) { + TypoCorrectionConsumer::result_iterator Prev = I; + ++I; + DI->second.erase(Prev); + continue; + } + // If the item already has been looked up or is a keyword, keep it. // If a validator callback object was given, drop the correction // unless it passes validation. - if (I->second.isResolved()) { + bool Viable = false; + for (TypoResultList::iterator RI = I->second.begin(); + RI != I->second.end(); /* Increment in loop. */) { + TypoResultList::iterator Prev = RI; + ++RI; + if (Prev->isResolved()) { + if (!isCandidateViable(CCC, *Prev)) + RI = I->second.erase(Prev); + else + Viable = true; + } + } + if (Viable || I->second.empty()) { TypoCorrectionConsumer::result_iterator Prev = I; ++I; - if (!isCandidateViable(CCC, Prev->second)) + if (!Viable) DI->second.erase(Prev); continue; } + assert(I->second.size() == 1 && "Expected a single unresolved candidate"); // Perform name lookup on this name. - IdentifierInfo *Name = I->second.getCorrectionAsIdentifierInfo(); + TypoCorrection &Candidate = I->second.front(); + IdentifierInfo *Name = Candidate.getCorrectionAsIdentifierInfo(); LookupPotentialTypoResult(*this, TmpRes, Name, S, SS, MemberContext, EnteringContext, CCC.IsObjCIvarLookup); @@ -3874,7 +3925,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, case LookupResult::NotFound: case LookupResult::NotFoundInCurrentInstantiation: case LookupResult::FoundUnresolvedValue: - QualifiedResults.push_back(I->second); + QualifiedResults.push_back(Candidate); // We didn't find this name in our scope, or didn't like what we found; // ignore it. { @@ -3895,18 +3946,18 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, for (LookupResult::iterator TRD = TmpRes.begin(), TRDEnd = TmpRes.end(); TRD != TRDEnd; ++TRD) - I->second.addCorrectionDecl(*TRD); + Candidate.addCorrectionDecl(*TRD); ++I; - if (!isCandidateViable(CCC, Prev->second)) + if (!isCandidateViable(CCC, Candidate)) DI->second.erase(Prev); break; } case LookupResult::Found: { TypoCorrectionConsumer::result_iterator Prev = I; - I->second.setCorrectionDecl(TmpRes.getAsSingle()); + Candidate.setCorrectionDecl(TmpRes.getAsSingle()); ++I; - if (!isCandidateViable(CCC, Prev->second)) + if (!isCandidateViable(CCC, Candidate)) DI->second.erase(Prev); break; } @@ -3978,10 +4029,10 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // No corrections remain... if (Consumer.empty()) return TypoCorrection(); - TypoResultsMap &BestResults = Consumer.begin()->second; - ED = TypoCorrection::NormalizeEditDistance(Consumer.begin()->first); + TypoResultsMap &BestResults = Consumer.getBestResults(); + ED = Consumer.getBestEditDistance(true); - if (ED > 0 && Typo->getName().size() / ED < 3) { + if (!AllowOnlyNNSChanges && ED > 0 && Typo->getName().size() / ED < 3) { // If this was an unqualified lookup and we believe the callback // object wouldn't have filtered out possible corrections, note // that no correction was found. @@ -3993,8 +4044,9 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // If only a single name remains, return that result. if (BestResults.size() == 1) { - const llvm::StringMapEntry &Correction = *(BestResults.begin()); - const TypoCorrection &Result = Correction.second; + const TypoResultList &CorrectionList = BestResults.begin()->second; + const TypoCorrection &Result = CorrectionList.front(); + if (CorrectionList.size() != 1) return TypoCorrection(); // Don't correct to a keyword that's the same as the typo; the keyword // wasn't actually in scope. @@ -4012,7 +4064,7 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // some instances of CTC_Unknown, while WantRemainingKeywords is true // for CTC_Unknown but not for CTC_ObjCMessageReceiver. && CCC.WantObjCSuper && !CCC.WantRemainingKeywords - && BestResults["super"].isKeyword()) { + && BestResults["super"].front().isKeyword()) { // Prefer 'super' when we're completing in a message-receiver // context. @@ -4022,9 +4074,9 @@ TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName, // Record the correction for unqualified lookup. if (IsUnqualifiedLookup) - UnqualifiedTyposCorrected[Typo] = BestResults["super"]; + UnqualifiedTyposCorrected[Typo] = BestResults["super"].front(); - return BestResults["super"]; + return BestResults["super"].front(); } // If this was an unqualified lookup and we believe the callback object did diff --git a/lib/Sema/SemaObjCProperty.cpp b/lib/Sema/SemaObjCProperty.cpp index 5ece8f1..27deab2 100644 --- a/lib/Sema/SemaObjCProperty.cpp +++ b/lib/Sema/SemaObjCProperty.cpp @@ -18,6 +18,8 @@ #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ASTMutationListener.h" +#include "clang/Lex/Lexer.h" +#include "clang/Basic/SourceManager.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" @@ -133,7 +135,6 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc, // Proceed with constructing the ObjCPropertDecls. ObjCContainerDecl *ClassDecl = cast(CurContext); - if (ObjCCategoryDecl *CDecl = dyn_cast(ClassDecl)) if (CDecl->IsClassExtension()) { Decl *Res = HandlePropertyInClassExtension(S, AtLoc, LParenLoc, @@ -144,10 +145,11 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc, isOverridingProperty, TSI, MethodImplKind); if (Res) { - CheckObjCPropertyAttributes(Res, AtLoc, Attributes); + CheckObjCPropertyAttributes(Res, AtLoc, Attributes, false); if (getLangOpts().ObjCAutoRefCount) checkARCPropertyDecl(*this, cast(Res)); } + ActOnDocumentableDecl(Res); return Res; } @@ -161,11 +163,14 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc, Res->setLexicalDeclContext(lexicalDC); // Validate the attributes on the @property. - CheckObjCPropertyAttributes(Res, AtLoc, Attributes); + CheckObjCPropertyAttributes(Res, AtLoc, Attributes, + (isa(ClassDecl) || + isa(ClassDecl))); if (getLangOpts().ObjCAutoRefCount) checkARCPropertyDecl(*this, Res); + ActOnDocumentableDecl(Res); return Res; } @@ -200,6 +205,37 @@ makePropertyAttributesAsWritten(unsigned Attributes) { return (ObjCPropertyDecl::PropertyAttributeKind)attributesAsWritten; } +static bool LocPropertyAttribute( ASTContext &Context, const char *attrName, + SourceLocation LParenLoc, SourceLocation &Loc) { + if (LParenLoc.isMacroID()) + return false; + + SourceManager &SM = Context.getSourceManager(); + std::pair locInfo = SM.getDecomposedLoc(LParenLoc); + // Try to load the file buffer. + bool invalidTemp = false; + StringRef file = SM.getBufferData(locInfo.first, &invalidTemp); + if (invalidTemp) + return false; + const char *tokenBegin = file.data() + locInfo.second; + + // Lex from the start of the given location. + Lexer lexer(SM.getLocForStartOfFile(locInfo.first), + Context.getLangOpts(), + file.begin(), tokenBegin, file.end()); + Token Tok; + do { + lexer.LexFromRawLexer(Tok); + if (Tok.is(tok::raw_identifier) && + StringRef(Tok.getRawIdentifierData(), Tok.getLength()) == attrName) { + Loc = Tok.getLocation(); + return true; + } + } while (Tok.isNot(tok::r_paren)); + return false; + +} + Decl * Sema::HandlePropertyInClassExtension(Scope *S, SourceLocation AtLoc, @@ -568,9 +604,70 @@ static void setImpliedPropertyAttributeForReadOnlyProperty( return; } +/// DiagnoseClassAndClassExtPropertyMismatch - diagnose inconsistant property +/// attribute declared in primary class and attributes overridden in any of its +/// class extensions. +static void +DiagnoseClassAndClassExtPropertyMismatch(Sema &S, ObjCInterfaceDecl *ClassDecl, + ObjCPropertyDecl *property) { + unsigned Attributes = property->getPropertyAttributesAsWritten(); + bool warn = (Attributes & ObjCDeclSpec::DQ_PR_readonly); + for (const ObjCCategoryDecl *CDecl = ClassDecl->getFirstClassExtension(); + CDecl; CDecl = CDecl->getNextClassExtension()) { + ObjCPropertyDecl *ClassExtProperty = 0; + for (ObjCContainerDecl::prop_iterator P = CDecl->prop_begin(), + E = CDecl->prop_end(); P != E; ++P) { + if ((*P)->getIdentifier() == property->getIdentifier()) { + ClassExtProperty = *P; + break; + } + } + if (ClassExtProperty) { + warn = false; + unsigned classExtPropertyAttr = + ClassExtProperty->getPropertyAttributesAsWritten(); + // We are issuing the warning that we postponed because class extensions + // can override readonly->readwrite and 'setter' attributes originally + // placed on class's property declaration now make sense in the overridden + // property. + if (Attributes & ObjCDeclSpec::DQ_PR_readonly) { + if (!classExtPropertyAttr || + (classExtPropertyAttr & ObjCDeclSpec::DQ_PR_readwrite)) + continue; + warn = true; + break; + } + } + } + if (warn) { + unsigned setterAttrs = (ObjCDeclSpec::DQ_PR_assign | + ObjCDeclSpec::DQ_PR_unsafe_unretained | + ObjCDeclSpec::DQ_PR_copy | + ObjCDeclSpec::DQ_PR_retain | + ObjCDeclSpec::DQ_PR_strong); + if (Attributes & setterAttrs) { + const char * which = + (Attributes & ObjCDeclSpec::DQ_PR_assign) ? + "assign" : + (Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained) ? + "unsafe_unretained" : + (Attributes & ObjCDeclSpec::DQ_PR_copy) ? + "copy" : + (Attributes & ObjCDeclSpec::DQ_PR_retain) ? + "retain" : "strong"; + + S.Diag(property->getLocation(), + diag::warn_objc_property_attr_mutually_exclusive) + << "readonly" << which; + } + } + + +} + /// ActOnPropertyImplDecl - This routine performs semantic checks and /// builds the AST node for a property implementation declaration; declared -/// as @synthesize or @dynamic. +/// as \@synthesize or \@dynamic. /// Decl *Sema::ActOnPropertyImplDecl(Scope *S, SourceLocation AtLoc, @@ -588,6 +685,9 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, } if (PropertyIvarLoc.isInvalid()) PropertyIvarLoc = PropertyLoc; + SourceLocation PropertyDiagLoc = PropertyLoc; + if (PropertyDiagLoc.isInvalid()) + PropertyDiagLoc = ClassImpDecl->getLocStart(); ObjCPropertyDecl *property = 0; ObjCInterfaceDecl* IDecl = 0; // Find the class or category class where this property must have @@ -625,6 +725,27 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, return 0; } } + + if (Synthesize&& + (PIkind & ObjCPropertyDecl::OBJC_PR_readonly) && + property->hasAttr() && + !AtLoc.isValid()) { + Diag(IC->getLocation(), diag::warn_auto_readonly_iboutlet_property); + Diag(property->getLocation(), diag::note_property_declare); + SourceLocation readonlyLoc; + if (LocPropertyAttribute(Context, "readonly", + property->getLParenLoc(), readonlyLoc)) { + SourceLocation endLoc = + readonlyLoc.getLocWithOffset(strlen("readonly")-1); + SourceRange ReadonlySourceRange(readonlyLoc, endLoc); + Diag(property->getLocation(), + diag::note_auto_readonly_iboutlet_fixup_suggest) << + FixItHint::CreateReplacement(ReadonlySourceRange, "readwrite"); + } + } + + DiagnoseClassAndClassExtPropertyMismatch(*this, IDecl, property); + } else if ((CatImplClass = dyn_cast(ClassImpDecl))) { if (Synthesize) { Diag(AtLoc, diag::error_synthesize_category_decl); @@ -654,6 +775,8 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, return 0; } ObjCIvarDecl *Ivar = 0; + bool CompleteTypeErr = false; + bool compat = true; // Check that we have a valid, previously declared ivar for @synthesize if (Synthesize) { // @synthesize @@ -664,7 +787,14 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, Ivar = IDecl->lookupInstanceVariable(PropertyIvar, ClassDeclared); QualType PropType = property->getType(); QualType PropertyIvarType = PropType.getNonReferenceType(); - + + if (RequireCompleteType(PropertyDiagLoc, PropertyIvarType, + diag::err_incomplete_synthesized_property, + property->getDeclName())) { + Diag(property->getLocation(), diag::note_property_declare); + CompleteTypeErr = true; + } + if (getLangOpts().ObjCAutoRefCount && (property->getPropertyAttributesAsWritten() & ObjCPropertyDecl::OBJC_PR_readonly) && @@ -680,14 +810,32 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, getLangOpts().getGC() != LangOptions::NonGC) { assert(!getLangOpts().ObjCAutoRefCount); if (PropertyIvarType.isObjCGCStrong()) { - Diag(PropertyLoc, diag::err_gc_weak_property_strong_type); + Diag(PropertyDiagLoc, diag::err_gc_weak_property_strong_type); Diag(property->getLocation(), diag::note_property_declare); } else { PropertyIvarType = Context.getObjCGCQualType(PropertyIvarType, Qualifiers::Weak); } } - + if (AtLoc.isInvalid()) { + // Check when default synthesizing a property that there is + // an ivar matching property name and issue warning; since this + // is the most common case of not using an ivar used for backing + // property in non-default synthesis case. + ObjCInterfaceDecl *ClassDeclared=0; + ObjCIvarDecl *originalIvar = + IDecl->lookupInstanceVariable(property->getIdentifier(), + ClassDeclared); + if (originalIvar) { + Diag(PropertyDiagLoc, + diag::warn_autosynthesis_property_ivar_match) + << PropertyId << (Ivar == 0) << PropertyIvar + << originalIvar->getIdentifier(); + Diag(property->getLocation(), diag::note_property_declare); + Diag(originalIvar->getLocation(), diag::note_ivar_decl); + } + } + if (!Ivar) { // In ARC, give the ivar a lifetime qualifier based on the // property attributes. @@ -699,7 +847,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, // explicitly write an ownership attribute on the property. if (!property->hasWrittenStorageAttribute() && !(kind & ObjCPropertyDecl::OBJC_PR_strong)) { - Diag(PropertyLoc, + Diag(PropertyDiagLoc, diag::err_arc_objc_property_default_assign_on_object); Diag(property->getLocation(), diag::note_property_declare); } else { @@ -711,12 +859,12 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, if (const ObjCObjectPointerType *ObjT = PropertyIvarType->getAs()) if (ObjT->getInterfaceDecl()->isArcWeakrefUnavailable()) { - Diag(PropertyLoc, diag::err_arc_weak_unavailable_property); + Diag(PropertyDiagLoc, diag::err_arc_weak_unavailable_property); Diag(property->getLocation(), diag::note_property_declare); err = true; } if (!err && !getLangOpts().ObjCRuntimeHasWeak) { - Diag(PropertyLoc, diag::err_arc_weak_no_runtime); + Diag(PropertyDiagLoc, diag::err_arc_weak_no_runtime); Diag(property->getLocation(), diag::note_property_declare); } } @@ -730,7 +878,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, if (kind & ObjCPropertyDecl::OBJC_PR_weak && !getLangOpts().ObjCAutoRefCount && getLangOpts().getGC() == LangOptions::NonGC) { - Diag(PropertyLoc, diag::error_synthesize_weak_non_arc_or_gc); + Diag(PropertyDiagLoc, diag::error_synthesize_weak_non_arc_or_gc); Diag(property->getLocation(), diag::note_property_declare); } @@ -739,17 +887,20 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, PropertyIvarType, /*Dinfo=*/0, ObjCIvarDecl::Private, (Expr *)0, true); + if (CompleteTypeErr) + Ivar->setInvalidDecl(); ClassImpDecl->addDecl(Ivar); IDecl->makeDeclVisibleInContext(Ivar); property->setPropertyIvarDecl(Ivar); - if (!getLangOpts().ObjCNonFragileABI) - Diag(PropertyLoc, diag::error_missing_property_ivar_decl) << PropertyId; + if (getLangOpts().ObjCRuntime.isFragile()) + Diag(PropertyDiagLoc, diag::error_missing_property_ivar_decl) + << PropertyId; // Note! I deliberately want it to fall thru so, we have a // a property implementation and to avoid future warnings. - } else if (getLangOpts().ObjCNonFragileABI && + } else if (getLangOpts().ObjCRuntime.isNonFragile() && !declaresSameEntity(ClassDeclared, IDecl)) { - Diag(PropertyLoc, diag::error_ivar_in_superclass_use) + Diag(PropertyDiagLoc, diag::error_ivar_in_superclass_use) << property->getDeclName() << Ivar->getDeclName() << ClassDeclared->getDeclName(); Diag(Ivar->getLocation(), diag::note_previous_access_declaration) @@ -759,8 +910,8 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, QualType IvarType = Context.getCanonicalType(Ivar->getType()); // Check that type of property and its ivar are type compatible. - if (Context.getCanonicalType(PropertyIvarType) != IvarType) { - bool compat = false; + if (!Context.hasSameType(PropertyIvarType, IvarType)) { + compat = false; if (isa(PropertyIvarType) && isa(IvarType)) compat = @@ -773,31 +924,32 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, == Compatible); } if (!compat) { - Diag(PropertyLoc, diag::error_property_ivar_type) + Diag(PropertyDiagLoc, diag::error_property_ivar_type) << property->getDeclName() << PropType << Ivar->getDeclName() << IvarType; Diag(Ivar->getLocation(), diag::note_ivar_decl); // Note! I deliberately want it to fall thru so, we have a // a property implementation and to avoid future warnings. } - - // FIXME! Rules for properties are somewhat different that those - // for assignments. Use a new routine to consolidate all cases; - // specifically for property redeclarations as well as for ivars. - QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType(); - QualType rhsType =Context.getCanonicalType(IvarType).getUnqualifiedType(); - if (lhsType != rhsType && - lhsType->isArithmeticType()) { - Diag(PropertyLoc, diag::error_property_ivar_type) - << property->getDeclName() << PropType - << Ivar->getDeclName() << IvarType; - Diag(Ivar->getLocation(), diag::note_ivar_decl); - // Fall thru - see previous comment + else { + // FIXME! Rules for properties are somewhat different that those + // for assignments. Use a new routine to consolidate all cases; + // specifically for property redeclarations as well as for ivars. + QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType(); + QualType rhsType =Context.getCanonicalType(IvarType).getUnqualifiedType(); + if (lhsType != rhsType && + lhsType->isArithmeticType()) { + Diag(PropertyDiagLoc, diag::error_property_ivar_type) + << property->getDeclName() << PropType + << Ivar->getDeclName() << IvarType; + Diag(Ivar->getLocation(), diag::note_ivar_decl); + // Fall thru - see previous comment + } } // __weak is explicit. So it works on Canonical type. if ((PropType.isObjCGCWeak() && !IvarType.isObjCGCWeak() && getLangOpts().getGC() != LangOptions::NonGC)) { - Diag(PropertyLoc, diag::error_weak_property) + Diag(PropertyDiagLoc, diag::error_weak_property) << property->getDeclName() << Ivar->getDeclName(); Diag(Ivar->getLocation(), diag::note_ivar_decl); // Fall thru - see previous comment @@ -806,7 +958,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, if ((property->getType()->isObjCObjectPointerType() || PropType.isObjCGCStrong()) && IvarType.isObjCGCWeak() && getLangOpts().getGC() != LangOptions::NonGC) { - Diag(PropertyLoc, diag::error_strong_property) + Diag(PropertyDiagLoc, diag::error_strong_property) << property->getDeclName() << Ivar->getDeclName(); // Fall thru - see previous comment } @@ -815,7 +967,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, checkARCPropertyImpl(*this, PropertyLoc, property, Ivar); } else if (PropertyIvar) // @dynamic - Diag(PropertyLoc, diag::error_dynamic_property_ivar_decl); + Diag(PropertyDiagLoc, diag::error_dynamic_property_ivar_decl); assert (property && "ActOnPropertyImplDecl - property declaration missing"); ObjCPropertyImplDecl *PIDecl = @@ -825,9 +977,13 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, ObjCPropertyImplDecl::Synthesize : ObjCPropertyImplDecl::Dynamic), Ivar, PropertyIvarLoc); + + if (CompleteTypeErr || !compat) + PIDecl->setInvalidDecl(); + if (ObjCMethodDecl *getterMethod = property->getGetterMethodDecl()) { getterMethod->createImplicitParams(Context, IDecl); - if (getLangOpts().CPlusPlus && Synthesize && + if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr && Ivar->getType()->isRecordType()) { // For Objective-C++, need to synthesize the AST for the IVAR object to be // returned by the getter as it must conform to C++'s copy-return rules. @@ -862,8 +1018,8 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, } if (ObjCMethodDecl *setterMethod = property->getSetterMethodDecl()) { setterMethod->createImplicitParams(Context, IDecl); - if (getLangOpts().CPlusPlus && Synthesize - && Ivar->getType()->isRecordType()) { + if (getLangOpts().CPlusPlus && Synthesize && !CompleteTypeErr && + Ivar->getType()->isRecordType()) { // FIXME. Eventually we want to do this for Objective-C as well. ImplicitParamDecl *SelfDecl = setterMethod->getSelfDecl(); DeclRefExpr *SelfExpr = @@ -916,7 +1072,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, } IC->addPropertyImplementation(PIDecl); if (getLangOpts().ObjCDefaultSynthProperties && - getLangOpts().ObjCNonFragileABI2 && + getLangOpts().ObjCRuntime.isNonFragile() && !IDecl->isObjCRequiresPropertyDefs()) { // Diagnose if an ivar was lazily synthesdized due to a previous // use and if 1) property is @dynamic or 2) property is synthesized @@ -941,7 +1097,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, if (Synthesize) if (ObjCPropertyImplDecl *PPIDecl = CatImplClass->FindPropertyImplIvarDecl(PropertyIvar)) { - Diag(PropertyLoc, diag::error_duplicate_ivar_use) + Diag(PropertyDiagLoc, diag::error_duplicate_ivar_use) << PropertyId << PPIDecl->getPropertyDecl()->getIdentifier() << PropertyIvar; Diag(PPIDecl->getLocation(), diag::note_previous_use); @@ -949,7 +1105,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, if (ObjCPropertyImplDecl *PPIDecl = CatImplClass->FindPropertyImplDecl(PropertyId)) { - Diag(PropertyLoc, diag::error_property_implemented) << PropertyId; + Diag(PropertyDiagLoc, diag::error_property_implemented) << PropertyId; Diag(PPIDecl->getLocation(), diag::note_previous_declaration); return 0; } @@ -1030,21 +1186,42 @@ Sema::DiagnosePropertyMismatch(ObjCPropertyDecl *Property, bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property, ObjCMethodDecl *GetterMethod, SourceLocation Loc) { - if (GetterMethod && - !Context.hasSameType(GetterMethod->getResultType().getNonReferenceType(), - property->getType().getNonReferenceType())) { - AssignConvertType result = Incompatible; - if (property->getType()->isObjCObjectPointerType()) - result = CheckAssignmentConstraints(Loc, GetterMethod->getResultType(), - property->getType()); - if (result != Compatible) { - Diag(Loc, diag::warn_accessor_property_type_mismatch) - << property->getDeclName() - << GetterMethod->getSelector(); - Diag(GetterMethod->getLocation(), diag::note_declared_at); - return true; + if (!GetterMethod) + return false; + QualType GetterType = GetterMethod->getResultType().getNonReferenceType(); + QualType PropertyIvarType = property->getType().getNonReferenceType(); + bool compat = Context.hasSameType(PropertyIvarType, GetterType); + if (!compat) { + if (isa(PropertyIvarType) && + isa(GetterType)) + compat = + Context.canAssignObjCInterfaces( + GetterType->getAs(), + PropertyIvarType->getAs()); + else if (CheckAssignmentConstraints(Loc, GetterType, PropertyIvarType) + != Compatible) { + Diag(Loc, diag::error_property_accessor_type) + << property->getDeclName() << PropertyIvarType + << GetterMethod->getSelector() << GetterType; + Diag(GetterMethod->getLocation(), diag::note_declared_at); + return true; + } else { + compat = true; + QualType lhsType =Context.getCanonicalType(PropertyIvarType).getUnqualifiedType(); + QualType rhsType =Context.getCanonicalType(GetterType).getUnqualifiedType(); + if (lhsType != rhsType && lhsType->isArithmeticType()) + compat = false; } } + + if (!compat) { + Diag(Loc, diag::warn_accessor_property_type_mismatch) + << property->getDeclName() + << GetterMethod->getSelector(); + Diag(GetterMethod->getLocation(), diag::note_declared_at); + return true; + } + return false; } @@ -1059,11 +1236,11 @@ void Sema::ComparePropertiesInBaseAndSuper(ObjCInterfaceDecl *IDecl) { // FIXME: O(N^2) for (ObjCInterfaceDecl::prop_iterator S = SDecl->prop_begin(), E = SDecl->prop_end(); S != E; ++S) { - ObjCPropertyDecl *SuperPDecl = (*S); + ObjCPropertyDecl *SuperPDecl = *S; // Does property in super class has declaration in current class? for (ObjCInterfaceDecl::prop_iterator I = IDecl->prop_begin(), E = IDecl->prop_end(); I != E; ++I) { - ObjCPropertyDecl *PDecl = (*I); + ObjCPropertyDecl *PDecl = *I; if (SuperPDecl->getIdentifier() == PDecl->getIdentifier()) DiagnosePropertyMismatch(PDecl, SuperPDecl, SDecl->getIdentifier()); @@ -1085,29 +1262,29 @@ Sema::MatchOneProtocolPropertiesInClass(Decl *CDecl, if (!CatDecl->IsClassExtension()) for (ObjCProtocolDecl::prop_iterator P = PDecl->prop_begin(), E = PDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Pr = (*P); + ObjCPropertyDecl *Pr = *P; ObjCCategoryDecl::prop_iterator CP, CE; // Is this property already in category's list of properties? for (CP = CatDecl->prop_begin(), CE = CatDecl->prop_end(); CP!=CE; ++CP) - if ((*CP)->getIdentifier() == Pr->getIdentifier()) + if (CP->getIdentifier() == Pr->getIdentifier()) break; if (CP != CE) // Property protocol already exist in class. Diagnose any mismatch. - DiagnosePropertyMismatch((*CP), Pr, PDecl->getIdentifier()); + DiagnosePropertyMismatch(*CP, Pr, PDecl->getIdentifier()); } return; } for (ObjCProtocolDecl::prop_iterator P = PDecl->prop_begin(), E = PDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Pr = (*P); + ObjCPropertyDecl *Pr = *P; ObjCInterfaceDecl::prop_iterator CP, CE; // Is this property already in class's list of properties? for (CP = IDecl->prop_begin(), CE = IDecl->prop_end(); CP != CE; ++CP) - if ((*CP)->getIdentifier() == Pr->getIdentifier()) + if (CP->getIdentifier() == Pr->getIdentifier()) break; if (CP != CE) // Property protocol already exist in class. Diagnose any mismatch. - DiagnosePropertyMismatch((*CP), Pr, PDecl->getIdentifier()); + DiagnosePropertyMismatch(*CP, Pr, PDecl->getIdentifier()); } } @@ -1223,7 +1400,7 @@ void Sema::CollectImmediateProperties(ObjCContainerDecl *CDecl, if (ObjCInterfaceDecl *IDecl = dyn_cast(CDecl)) { for (ObjCContainerDecl::prop_iterator P = IDecl->prop_begin(), E = IDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; PropMap[Prop->getIdentifier()] = Prop; } // scan through class's protocols. @@ -1236,7 +1413,7 @@ void Sema::CollectImmediateProperties(ObjCContainerDecl *CDecl, if (!CATDecl->IsClassExtension()) for (ObjCContainerDecl::prop_iterator P = CATDecl->prop_begin(), E = CATDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; PropMap[Prop->getIdentifier()] = Prop; } // scan through class's protocols. @@ -1247,7 +1424,7 @@ void Sema::CollectImmediateProperties(ObjCContainerDecl *CDecl, else if (ObjCProtocolDecl *PDecl = dyn_cast(CDecl)) { for (ObjCProtocolDecl::prop_iterator P = PDecl->prop_begin(), E = PDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; ObjCPropertyDecl *PropertyFromSuper = SuperPropMap[Prop->getIdentifier()]; // Exclude property for protocols which conform to class's super-class, // as super-class has to implement the property. @@ -1273,7 +1450,7 @@ static void CollectClassPropertyImplementations(ObjCContainerDecl *CDecl, if (ObjCInterfaceDecl *IDecl = dyn_cast(CDecl)) { for (ObjCContainerDecl::prop_iterator P = IDecl->prop_begin(), E = IDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; PropMap[Prop->getIdentifier()] = Prop; } for (ObjCInterfaceDecl::all_protocol_iterator @@ -1284,7 +1461,7 @@ static void CollectClassPropertyImplementations(ObjCContainerDecl *CDecl, else if (ObjCProtocolDecl *PDecl = dyn_cast(CDecl)) { for (ObjCProtocolDecl::prop_iterator P = PDecl->prop_begin(), E = PDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; if (!PropMap.count(Prop->getIdentifier())) PropMap[Prop->getIdentifier()] = Prop; } @@ -1316,7 +1493,7 @@ ObjCPropertyDecl *Sema::LookupPropertyDecl(const ObjCContainerDecl *CDecl, dyn_cast(CDecl)) { for (ObjCContainerDecl::prop_iterator P = IDecl->prop_begin(), E = IDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; if (Prop->getIdentifier() == II) return Prop; } @@ -1333,7 +1510,7 @@ ObjCPropertyDecl *Sema::LookupPropertyDecl(const ObjCContainerDecl *CDecl, dyn_cast(CDecl)) { for (ObjCProtocolDecl::prop_iterator P = PDecl->prop_begin(), E = PDecl->prop_end(); P != E; ++P) { - ObjCPropertyDecl *Prop = (*P); + ObjCPropertyDecl *Prop = *P; if (Prop->getIdentifier() == II) return Prop; } @@ -1358,8 +1535,8 @@ static IdentifierInfo * getDefaultSynthIvarName(ObjCPropertyDecl *Prop, return &Ctx.Idents.get(ivarName.str()); } -/// DefaultSynthesizeProperties - This routine default synthesizes all -/// properties which must be synthesized in class's @implementation. +/// \brief Default synthesizes all properties which must be synthesized +/// in class's \@implementation. void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl, ObjCInterfaceDecl *IDecl) { @@ -1402,16 +1579,21 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl, // aren't really synthesized at a particular location; they just exist. // Saying that they are located at the @implementation isn't really going // to help users. - ActOnPropertyImplDecl(S, SourceLocation(), SourceLocation(), - true, - /* property = */ Prop->getIdentifier(), - /* ivar = */ getDefaultSynthIvarName(Prop, Context), - SourceLocation()); + ObjCPropertyImplDecl *PIDecl = dyn_cast_or_null( + ActOnPropertyImplDecl(S, SourceLocation(), SourceLocation(), + true, + /* property = */ Prop->getIdentifier(), + /* ivar = */ getDefaultSynthIvarName(Prop, Context), + Prop->getLocation())); + if (PIDecl) { + Diag(Prop->getLocation(), diag::warn_missing_explicit_synthesis); + Diag(IMPDecl->getLocation(), diag::note_while_in_implementation); + } } } void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D) { - if (!LangOpts.ObjCDefaultSynthProperties || !LangOpts.ObjCNonFragileABI2) + if (!LangOpts.ObjCDefaultSynthProperties || LangOpts.ObjCRuntime.isFragile()) return; ObjCImplementationDecl *IC=dyn_cast_or_null(D); if (!IC) @@ -1423,7 +1605,7 @@ void Sema::DefaultSynthesizeProperties(Scope *S, Decl *D) { void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl, ObjCContainerDecl *CDecl, - const llvm::DenseSet& InsMap) { + const SelectorSet &InsMap) { llvm::DenseMap SuperPropMap; if (ObjCInterfaceDecl *IDecl = dyn_cast(CDecl)) CollectSuperClassPropertyImplementations(IDecl, SuperPropMap); @@ -1437,7 +1619,7 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl, for (ObjCImplDecl::propimpl_iterator I = IMPDecl->propimpl_begin(), EI = IMPDecl->propimpl_end(); I != EI; ++I) - PropImplMap.insert((*I)->getPropertyDecl()); + PropImplMap.insert(I->getPropertyDecl()); for (llvm::DenseMap::iterator P = PropMap.begin(), E = PropMap.end(); P != E; ++P) { @@ -1455,7 +1637,7 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl, << Prop->getDeclName() << Prop->getGetterName(); Diag(Prop->getLocation(), diag::note_property_declare); - if (LangOpts.ObjCDefaultSynthProperties && LangOpts.ObjCNonFragileABI2) + if (LangOpts.ObjCDefaultSynthProperties && LangOpts.ObjCRuntime.isNonFragile()) if (ObjCInterfaceDecl *ID = dyn_cast(CDecl)) if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs()) Diag(RID->getLocation(), diag::note_suppressed_class_declare); @@ -1470,7 +1652,7 @@ void Sema::DiagnoseUnimplementedProperties(Scope *S, ObjCImplDecl* IMPDecl, << Prop->getDeclName() << Prop->getSetterName(); Diag(Prop->getLocation(), diag::note_property_declare); - if (LangOpts.ObjCDefaultSynthProperties && LangOpts.ObjCNonFragileABI2) + if (LangOpts.ObjCDefaultSynthProperties && LangOpts.ObjCRuntime.isNonFragile()) if (ObjCInterfaceDecl *ID = dyn_cast(CDecl)) if (const ObjCInterfaceDecl *RID = ID->isObjCRequiresPropertyDefs()) Diag(RID->getLocation(), diag::note_suppressed_class_declare); @@ -1487,7 +1669,7 @@ Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl, for (ObjCContainerDecl::prop_iterator I = IDecl->prop_begin(), E = IDecl->prop_end(); I != E; ++I) { - ObjCPropertyDecl *Property = (*I); + ObjCPropertyDecl *Property = *I; ObjCMethodDecl *GetterMethod = 0; ObjCMethodDecl *SetterMethod = 0; bool LookedUpGetterSetter = false; @@ -1753,11 +1935,24 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property, AddInstanceMethodToGlobalPool(GetterMethod); if (SetterMethod) AddInstanceMethodToGlobalPool(SetterMethod); + + ObjCInterfaceDecl *CurrentClass = dyn_cast(CD); + if (!CurrentClass) { + if (ObjCCategoryDecl *Cat = dyn_cast(CD)) + CurrentClass = Cat->getClassInterface(); + else if (ObjCImplDecl *Impl = dyn_cast(CD)) + CurrentClass = Impl->getClassInterface(); + } + if (GetterMethod) + CheckObjCMethodOverrides(GetterMethod, CurrentClass, Sema::RTC_Unknown); + if (SetterMethod) + CheckObjCMethodOverrides(SetterMethod, CurrentClass, Sema::RTC_Unknown); } void Sema::CheckObjCPropertyAttributes(Decl *PDecl, SourceLocation Loc, - unsigned &Attributes) { + unsigned &Attributes, + bool propertyInPrimaryClass) { // FIXME: Improve the reported location. if (!PDecl || PDecl->isInvalidDecl()) return; @@ -1780,9 +1975,18 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl, return; } + if (propertyInPrimaryClass) { + // we postpone most property diagnosis until class's implementation + // because, its readonly attribute may be overridden in its class + // extensions making other attributes, which make no sense, to make sense. + if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) && + (Attributes & ObjCDeclSpec::DQ_PR_readwrite)) + Diag(Loc, diag::err_objc_property_attr_mutually_exclusive) + << "readonly" << "readwrite"; + } // readonly and readwrite/assign/retain/copy conflict. - if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) && - (Attributes & (ObjCDeclSpec::DQ_PR_readwrite | + else if ((Attributes & ObjCDeclSpec::DQ_PR_readonly) && + (Attributes & (ObjCDeclSpec::DQ_PR_readwrite | ObjCDeclSpec::DQ_PR_assign | ObjCDeclSpec::DQ_PR_unsafe_unretained | ObjCDeclSpec::DQ_PR_copy | @@ -1939,8 +2143,7 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl, && getLangOpts().getGC() == LangOptions::GCOnly && PropertyTy->isBlockPointerType()) Diag(Loc, diag::warn_objc_property_copy_missing_on_block); - else if (getLangOpts().ObjCAutoRefCount && - (Attributes & ObjCDeclSpec::DQ_PR_retain) && + else if ((Attributes & ObjCDeclSpec::DQ_PR_retain) && !(Attributes & ObjCDeclSpec::DQ_PR_readonly) && !(Attributes & ObjCDeclSpec::DQ_PR_strong) && PropertyTy->isBlockPointerType()) diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp index 50230f0..a874489 100644 --- a/lib/Sema/SemaOverload.cpp +++ b/lib/Sema/SemaOverload.cpp @@ -28,6 +28,7 @@ #include "clang/Basic/PartialDiagnostic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include @@ -388,13 +389,22 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx, const unsigned ToWidth = Ctx.getIntWidth(ToType); if (FromWidth > ToWidth || - (FromWidth == ToWidth && FromSigned != ToSigned)) { + (FromWidth == ToWidth && FromSigned != ToSigned) || + (FromSigned && !ToSigned)) { // Not all values of FromType can be represented in ToType. llvm::APSInt InitializerValue; const Expr *Initializer = IgnoreNarrowingConversion(Converted); - if (Initializer->isIntegerConstantExpr(InitializerValue, Ctx)) { - ConstantValue = APValue(InitializerValue); - + if (!Initializer->isIntegerConstantExpr(InitializerValue, Ctx)) { + // Such conversions on variables are always narrowing. + return NK_Variable_Narrowing; + } + bool Narrowing = false; + if (FromWidth < ToWidth) { + // Negative -> unsigned is narrowing. Otherwise, more bits is never + // narrowing. + if (InitializerValue.isSigned() && InitializerValue.isNegative()) + Narrowing = true; + } else { // Add a bit to the InitializerValue so we don't have to worry about // signed vs. unsigned comparisons. InitializerValue = InitializerValue.extend( @@ -406,13 +416,13 @@ StandardConversionSequence::getNarrowingKind(ASTContext &Ctx, ConvertedValue = ConvertedValue.extend(InitializerValue.getBitWidth()); ConvertedValue.setIsSigned(InitializerValue.isSigned()); // If the result is different, this was a narrowing conversion. - if (ConvertedValue != InitializerValue) { - ConstantType = Initializer->getType(); - return NK_Constant_Narrowing; - } - } else { - // Variables are always narrowings. - return NK_Variable_Narrowing; + if (ConvertedValue != InitializerValue) + Narrowing = true; + } + if (Narrowing) { + ConstantType = Initializer->getType(); + ConstantValue = APValue(InitializerValue); + return NK_Constant_Narrowing; } } return NK_Not_Narrowing; @@ -541,6 +551,7 @@ static MakeDeductionFailureInfo(ASTContext &Context, TemplateDeductionInfo &Info) { OverloadCandidate::DeductionFailureInfo Result; Result.Result = static_cast(TDK); + Result.HasDiagnostic = false; Result.Data = 0; switch (TDK) { case Sema::TDK_Success: @@ -567,6 +578,12 @@ static MakeDeductionFailureInfo(ASTContext &Context, case Sema::TDK_SubstitutionFailure: Result.Data = Info.take(); + if (Info.hasSFINAEDiagnostic()) { + PartialDiagnosticAt *Diag = new (Result.Diagnostic) PartialDiagnosticAt( + SourceLocation(), PartialDiagnostic::NullDiagnostic()); + Info.takeSFINAEDiagnostic(*Diag); + Result.HasDiagnostic = true; + } break; case Sema::TDK_NonDeducedMismatch: @@ -594,8 +611,12 @@ void OverloadCandidate::DeductionFailureInfo::Destroy() { break; case Sema::TDK_SubstitutionFailure: - // FIXME: Destroy the template arugment list? + // FIXME: Destroy the template argument list? Data = 0; + if (PartialDiagnosticAt *Diag = getSFINAEDiagnostic()) { + Diag->~PartialDiagnosticAt(); + HasDiagnostic = false; + } break; // Unhandled @@ -605,6 +626,13 @@ void OverloadCandidate::DeductionFailureInfo::Destroy() { } } +PartialDiagnosticAt * +OverloadCandidate::DeductionFailureInfo::getSFINAEDiagnostic() { + if (HasDiagnostic) + return static_cast(static_cast(Diagnostic)); + return 0; +} + TemplateParameter OverloadCandidate::DeductionFailureInfo::getTemplateParameter() { switch (static_cast(Result)) { @@ -707,9 +735,12 @@ OverloadCandidate::DeductionFailureInfo::getSecondArg() { } void OverloadCandidateSet::clear() { - for (iterator i = begin(), e = end(); i != e; ++i) + for (iterator i = begin(), e = end(); i != e; ++i) { for (unsigned ii = 0, ie = i->NumConversions; ii != ie; ++ii) i->Conversions[ii].~ImplicitConversionSequence(); + if (!i->Viable && i->FailureKind == ovl_fail_bad_deduction) + i->DeductionFailure.Destroy(); + } NumInlineSequences = 0; Candidates.clear(); Functions.clear(); @@ -1657,7 +1688,7 @@ bool Sema::IsIntegralPromotion(Expr *From, QualType FromType, QualType ToType) { // We have already pre-calculated the promotion type, so this is trivial. if (ToType->isIntegerType() && - !RequireCompleteType(From->getLocStart(), FromType, PDiag())) + !RequireCompleteType(From->getLocStart(), FromType, 0)) return Context.hasSameUnqualifiedType(ToType, FromEnumType->getDecl()->getPromotionType()); } @@ -1987,7 +2018,7 @@ bool Sema::IsPointerConversion(Expr *From, QualType FromType, QualType ToType, if (getLangOpts().CPlusPlus && FromPointeeType->isRecordType() && ToPointeeType->isRecordType() && !Context.hasSameUnqualifiedType(FromPointeeType, ToPointeeType) && - !RequireCompleteType(From->getLocStart(), FromPointeeType, PDiag()) && + !RequireCompleteType(From->getLocStart(), FromPointeeType, 0) && IsDerivedFrom(FromPointeeType, ToPointeeType)) { ConvertedType = BuildSimilarlyQualifiedPointerType(FromTypePtr, ToPointeeType, @@ -2469,7 +2500,7 @@ void Sema::HandleFunctionTypeMismatch(PartialDiagnostic &PDiag, /// for equality of their argument types. Caller has already checked that /// they have same number of arguments. This routine assumes that Objective-C /// pointer types which only differ in their protocol qualifiers are equal. -/// If the parameters are different, ArgPos will have the the parameter index +/// If the parameters are different, ArgPos will have the parameter index /// of the first different parameter. bool Sema::FunctionArgTypesAreEqual(const FunctionProtoType *OldType, const FunctionProtoType *NewType, @@ -2531,13 +2562,17 @@ bool Sema::CheckPointerConversion(Expr *From, QualType ToType, Kind = CK_BitCast; - if (!IsCStyleOrFunctionalCast && - Context.hasSameUnqualifiedType(From->getType(), Context.BoolTy) && - From->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull)) - DiagRuntimeBehavior(From->getExprLoc(), From, - PDiag(diag::warn_impcast_bool_to_null_pointer) - << ToType << From->getSourceRange()); - + if (!IsCStyleOrFunctionalCast && !FromType->isAnyPointerType() && + From->isNullPointerConstant(Context, Expr::NPC_ValueDependentIsNotNull) == + Expr::NPCK_ZeroExpression) { + if (Context.hasSameUnqualifiedType(From->getType(), Context.BoolTy)) + DiagRuntimeBehavior(From->getExprLoc(), From, + PDiag(diag::warn_impcast_bool_to_null_pointer) + << ToType << From->getSourceRange()); + else if (!isUnevaluatedContext()) + Diag(From->getExprLoc(), diag::warn_non_literal_null_pointer) + << ToType << From->getSourceRange(); + } if (const PointerType *ToPtrType = ToType->getAs()) { if (const PointerType *FromPtrType = FromType->getAs()) { QualType FromPointeeType = FromPtrType->getPointeeType(), @@ -2616,7 +2651,7 @@ bool Sema::IsMemberPointerConversion(Expr *From, QualType FromType, QualType ToClass(ToTypePtr->getClass(), 0); if (!Context.hasSameUnqualifiedType(FromClass, ToClass) && - !RequireCompleteType(From->getLocStart(), ToClass, PDiag()) && + !RequireCompleteType(From->getLocStart(), ToClass, 0) && IsDerivedFrom(ToClass, FromClass)) { ConvertedType = Context.getMemberPointerType(FromTypePtr->getPointeeType(), ToClass.getTypePtr()); @@ -2923,7 +2958,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType, S.IsDerivedFrom(From->getType(), ToType))) ConstructorsOnly = true; - S.RequireCompleteType(From->getLocStart(), ToType, S.PDiag()); + S.RequireCompleteType(From->getLocStart(), ToType, 0); // RequireCompleteType may have returned true due to some invalid decl // during template instantiation, but ToType may be complete enough now // to try to recover. @@ -3001,8 +3036,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType, // Enumerate conversion functions, if we're allowed to. if (ConstructorsOnly || isa(From)) { - } else if (S.RequireCompleteType(From->getLocStart(), From->getType(), - S.PDiag(0) << From->getSourceRange())) { + } else if (S.RequireCompleteType(From->getLocStart(), From->getType(), 0)) { // No conversion functions from incomplete types. } else if (const RecordType *FromRecordType = From->getType()->getAs()) { @@ -3848,7 +3882,7 @@ Sema::CompareReferenceRelationship(SourceLocation Loc, ObjCLifetimeConversion = false; if (UnqualT1 == UnqualT2) { // Nothing to do. - } else if (!RequireCompleteType(Loc, OrigT2, PDiag()) && + } else if (!RequireCompleteType(Loc, OrigT2, 0) && IsDerivedFrom(UnqualT2, UnqualT1)) DerivedToBase = true; else if (UnqualT1->isObjCObjectOrInterfaceType() && @@ -4135,7 +4169,7 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType, // qualifier. // This is also the point where rvalue references and lvalue inits no longer // go together. - if (!isRValRef && !T1.isConstQualified()) + if (!isRValRef && (!T1.isConstQualified() || T1.isVolatileQualified())) return ICS; // -- If the initializer expression @@ -4313,7 +4347,7 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType, // We need a complete type for what follows. Incomplete types can never be // initialized from init lists. - if (S.RequireCompleteType(From->getLocStart(), ToType, S.PDiag())) + if (S.RequireCompleteType(From->getLocStart(), ToType, 0)) return Result; // C++11 [over.ics.list]p2: @@ -4995,29 +5029,9 @@ static bool isIntegralOrEnumerationType(QualType T, bool AllowScopedEnum) { /// \param Loc The source location of the construct that requires the /// conversion. /// -/// \param FromE The expression we're converting from. -/// -/// \param NotIntDiag The diagnostic to be emitted if the expression does not -/// have integral or enumeration type. -/// -/// \param IncompleteDiag The diagnostic to be emitted if the expression has -/// incomplete class type. +/// \param From The expression we're converting from. /// -/// \param ExplicitConvDiag The diagnostic to be emitted if we're calling an -/// explicit conversion function (because no implicit conversion functions -/// were available). This is a recovery mode. -/// -/// \param ExplicitConvNote The note to be emitted with \p ExplicitConvDiag, -/// showing which conversion was picked. -/// -/// \param AmbigDiag The diagnostic to be emitted if there is more than one -/// conversion function that could convert to integral or enumeration type. -/// -/// \param AmbigNote The note to be emitted with \p AmbigDiag for each -/// usable conversion function. -/// -/// \param ConvDiag The diagnostic to be emitted if we are calling a conversion -/// function, which may be an extension in this case. +/// \param Diagnoser Used to output any diagnostics. /// /// \param AllowScopedEnumerations Specifies whether conversions to scoped /// enumerations should be considered. @@ -5026,13 +5040,7 @@ static bool isIntegralOrEnumerationType(QualType T, bool AllowScopedEnum) { /// successful. ExprResult Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, - const PartialDiagnostic &NotIntDiag, - const PartialDiagnostic &IncompleteDiag, - const PartialDiagnostic &ExplicitConvDiag, - const PartialDiagnostic &ExplicitConvNote, - const PartialDiagnostic &AmbigDiag, - const PartialDiagnostic &AmbigNote, - const PartialDiagnostic &ConvDiag, + ICEConvertDiagnoser &Diagnoser, bool AllowScopedEnumerations) { // We can't perform any more checking for type-dependent expressions. if (From->isTypeDependent()) @@ -5056,13 +5064,25 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, // expression of integral or enumeration type. const RecordType *RecordTy = T->getAs(); if (!RecordTy || !getLangOpts().CPlusPlus) { - if (NotIntDiag.getDiagID()) - Diag(Loc, NotIntDiag) << T << From->getSourceRange(); + if (!Diagnoser.Suppress) + Diagnoser.diagnoseNotInt(*this, Loc, T) << From->getSourceRange(); return Owned(From); } // We must have a complete class type. - if (RequireCompleteType(Loc, T, IncompleteDiag)) + struct TypeDiagnoserPartialDiag : TypeDiagnoser { + ICEConvertDiagnoser &Diagnoser; + Expr *From; + + TypeDiagnoserPartialDiag(ICEConvertDiagnoser &Diagnoser, Expr *From) + : TypeDiagnoser(Diagnoser.Suppress), Diagnoser(Diagnoser), From(From) {} + + virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) { + Diagnoser.diagnoseIncomplete(S, Loc, T) << From->getSourceRange(); + } + } IncompleteDiagnoser(Diagnoser, From); + + if (RequireCompleteType(Loc, T, IncompleteDiagnoser)) return Owned(From); // Look for a conversion to an integral or enumeration type. @@ -5092,7 +5112,7 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, switch (ViableConversions.size()) { case 0: - if (ExplicitConversions.size() == 1 && ExplicitConvDiag.getDiagID()) { + if (ExplicitConversions.size() == 1 && !Diagnoser.Suppress) { DeclAccessPair Found = ExplicitConversions[0]; CXXConversionDecl *Conversion = cast(Found->getUnderlyingDecl()); @@ -5104,14 +5124,12 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, std::string TypeStr; ConvTy.getAsStringInternal(TypeStr, getPrintingPolicy()); - Diag(Loc, ExplicitConvDiag) - << T << ConvTy + Diagnoser.diagnoseExplicitConv(*this, Loc, T, ConvTy) << FixItHint::CreateInsertion(From->getLocStart(), "static_cast<" + TypeStr + ">(") << FixItHint::CreateInsertion(PP.getLocForEndOfToken(From->getLocEnd()), ")"); - Diag(Conversion->getLocation(), ExplicitConvNote) - << ConvTy->isEnumeralType() << ConvTy; + Diagnoser.noteExplicitConv(*this, Conversion, ConvTy); // If we aren't in a SFINAE context, build a call to the // explicit conversion function. @@ -5142,12 +5160,12 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, = cast(Found->getUnderlyingDecl()); QualType ConvTy = Conversion->getConversionType().getNonReferenceType(); - if (ConvDiag.getDiagID()) { + if (!Diagnoser.SuppressConversion) { if (isSFINAEContext()) return ExprError(); - Diag(Loc, ConvDiag) - << T << ConvTy->isEnumeralType() << ConvTy << From->getSourceRange(); + Diagnoser.diagnoseConversion(*this, Loc, T, ConvTy) + << From->getSourceRange(); } ExprResult Result = BuildCXXMemberCallExpr(From, Found, Conversion, @@ -5163,24 +5181,24 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, } default: - if (!AmbigDiag.getDiagID()) - return Owned(From); + if (Diagnoser.Suppress) + return ExprError(); - Diag(Loc, AmbigDiag) - << T << From->getSourceRange(); + Diagnoser.diagnoseAmbiguous(*this, Loc, T) << From->getSourceRange(); for (unsigned I = 0, N = ViableConversions.size(); I != N; ++I) { CXXConversionDecl *Conv = cast(ViableConversions[I]->getUnderlyingDecl()); QualType ConvTy = Conv->getConversionType().getNonReferenceType(); - Diag(Conv->getLocation(), AmbigNote) - << ConvTy->isEnumeralType() << ConvTy; + Diagnoser.noteAmbiguous(*this, Conv, ConvTy); } return Owned(From); } if (!isIntegralOrEnumerationType(From->getType(), AllowScopedEnumerations) && - NotIntDiag.getDiagID()) - Diag(Loc, NotIntDiag) << From->getType() << From->getSourceRange(); + !Diagnoser.Suppress) { + Diagnoser.diagnoseNotInt(*this, Loc, From->getType()) + << From->getSourceRange(); + } return DefaultLvalueConversion(From); } @@ -5190,7 +5208,7 @@ Sema::ConvertToIntegralOrEnumerationType(SourceLocation Loc, Expr *From, /// @p SuppressUserConversions, then don't allow user-defined /// conversions via constructors or conversion operators. /// -/// \para PartialOverloading true if we are performing "partial" overloading +/// \param PartialOverloading true if we are performing "partial" overloading /// based on an incomplete set of function arguments. This feature is used by /// code completion. void @@ -5906,7 +5924,7 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op, // empty. if (const RecordType *T1Rec = T1->getAs()) { // Complete the type if it can be completed. Otherwise, we're done. - if (RequireCompleteType(OpLoc, T1, PDiag())) + if (RequireCompleteType(OpLoc, T1, 0)) return; LookupResult Operators(*this, OpName, OpLoc, LookupOrdinaryName); @@ -6098,40 +6116,49 @@ BuiltinCandidateTypeSet::AddPointerWithMoreQualifiedTypeVariants(QualType Ty, const PointerType *PointerTy = Ty->getAs(); bool buildObjCPtr = false; if (!PointerTy) { - if (const ObjCObjectPointerType *PTy = Ty->getAs()) { - PointeeTy = PTy->getPointeeType(); - buildObjCPtr = true; - } - else - llvm_unreachable("type was not a pointer type!"); - } - else + const ObjCObjectPointerType *PTy = Ty->castAs(); + PointeeTy = PTy->getPointeeType(); + buildObjCPtr = true; + } else { PointeeTy = PointerTy->getPointeeType(); - + } + // Don't add qualified variants of arrays. For one, they're not allowed // (the qualifier would sink to the element type), and for another, the // only overload situation where it matters is subscript or pointer +- int, // and those shouldn't have qualifier variants anyway. if (PointeeTy->isArrayType()) return true; + unsigned BaseCVR = PointeeTy.getCVRQualifiers(); - if (const ConstantArrayType *Array =Context.getAsConstantArrayType(PointeeTy)) - BaseCVR = Array->getElementType().getCVRQualifiers(); bool hasVolatile = VisibleQuals.hasVolatile(); bool hasRestrict = VisibleQuals.hasRestrict(); // Iterate through all strict supersets of BaseCVR. for (unsigned CVR = BaseCVR+1; CVR <= Qualifiers::CVRMask; ++CVR) { if ((CVR | BaseCVR) != CVR) continue; - // Skip over Volatile/Restrict if no Volatile/Restrict found anywhere - // in the types. + // Skip over volatile if no volatile found anywhere in the types. if ((CVR & Qualifiers::Volatile) && !hasVolatile) continue; - if ((CVR & Qualifiers::Restrict) && !hasRestrict) continue; + + // Skip over restrict if no restrict found anywhere in the types, or if + // the type cannot be restrict-qualified. + if ((CVR & Qualifiers::Restrict) && + (!hasRestrict || + (!(PointeeTy->isAnyPointerType() || PointeeTy->isReferenceType())))) + continue; + + // Build qualified pointee type. QualType QPointeeTy = Context.getCVRQualifiedType(PointeeTy, CVR); + + // Build qualified pointer type. + QualType QPointerTy; if (!buildObjCPtr) - PointerTypes.insert(Context.getPointerType(QPointeeTy)); + QPointerTy = Context.getPointerType(QPointeeTy); else - PointerTypes.insert(Context.getObjCObjectPointerType(QPointeeTy)); + QPointerTy = Context.getObjCObjectPointerType(QPointeeTy); + + // Insert qualified pointer type. + PointerTypes.insert(QPointerTy); } return true; @@ -6328,6 +6355,8 @@ static Qualifiers CollectVRQualifiers(ASTContext &Context, Expr* ArgExpr) { // as see them. bool done = false; while (!done) { + if (CanTy.isRestrictQualified()) + VRQuals.addRestrict(); if (const PointerType *ResTypePtr = CanTy->getAs()) CanTy = ResTypePtr->getPointeeType(); else if (const MemberPointerType *ResTypeMPtr = @@ -6337,8 +6366,6 @@ static Qualifiers CollectVRQualifiers(ASTContext &Context, Expr* ArgExpr) { done = true; if (CanTy.isVolatileQualified()) VRQuals.addVolatile(); - if (CanTy.isRestrictQualified()) - VRQuals.addRestrict(); if (VRQuals.hasRestrict() && VRQuals.hasVolatile()) return VRQuals; } @@ -6368,12 +6395,12 @@ class BuiltinOperatorOverloadBuilder { // The "promoted arithmetic types" are the arithmetic // types are that preserved by promotion (C++ [over.built]p2). static const unsigned FirstIntegralType = 3; - static const unsigned LastIntegralType = 18; + static const unsigned LastIntegralType = 20; static const unsigned FirstPromotedIntegralType = 3, - LastPromotedIntegralType = 9; + LastPromotedIntegralType = 11; static const unsigned FirstPromotedArithmeticType = 0, - LastPromotedArithmeticType = 9; - static const unsigned NumArithmeticTypes = 18; + LastPromotedArithmeticType = 11; + static const unsigned NumArithmeticTypes = 20; /// \brief Get the canonical type for a given arithmetic type index. CanQualType getArithmeticType(unsigned index) { @@ -6389,9 +6416,11 @@ class BuiltinOperatorOverloadBuilder { &ASTContext::IntTy, &ASTContext::LongTy, &ASTContext::LongLongTy, + &ASTContext::Int128Ty, &ASTContext::UnsignedIntTy, &ASTContext::UnsignedLongTy, &ASTContext::UnsignedLongLongTy, + &ASTContext::UnsignedInt128Ty, // End of promoted types. &ASTContext::BoolTy, @@ -6404,7 +6433,7 @@ class BuiltinOperatorOverloadBuilder { &ASTContext::UnsignedCharTy, &ASTContext::UnsignedShortTy, // End of integral types. - // FIXME: What about complex? + // FIXME: What about complex? What about half? }; return S.Context.*ArithmeticTypes[index]; } @@ -6423,20 +6452,24 @@ class BuiltinOperatorOverloadBuilder { // *except* when dealing with signed types of higher rank. // (we could precompute SLL x UI for all known platforms, but it's // better not to make any assumptions). + // We assume that int128 has a higher rank than long long on all platforms. enum PromotedType { - Flt, Dbl, LDbl, SI, SL, SLL, UI, UL, ULL, Dep=-1 + Dep=-1, + Flt, Dbl, LDbl, SI, SL, SLL, S128, UI, UL, ULL, U128 }; - static PromotedType ConversionsTable[LastPromotedArithmeticType] + static const PromotedType ConversionsTable[LastPromotedArithmeticType] [LastPromotedArithmeticType] = { - /* Flt*/ { Flt, Dbl, LDbl, Flt, Flt, Flt, Flt, Flt, Flt }, - /* Dbl*/ { Dbl, Dbl, LDbl, Dbl, Dbl, Dbl, Dbl, Dbl, Dbl }, - /*LDbl*/ { LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl }, - /* SI*/ { Flt, Dbl, LDbl, SI, SL, SLL, UI, UL, ULL }, - /* SL*/ { Flt, Dbl, LDbl, SL, SL, SLL, Dep, UL, ULL }, - /* SLL*/ { Flt, Dbl, LDbl, SLL, SLL, SLL, Dep, Dep, ULL }, - /* UI*/ { Flt, Dbl, LDbl, UI, Dep, Dep, UI, UL, ULL }, - /* UL*/ { Flt, Dbl, LDbl, UL, UL, Dep, UL, UL, ULL }, - /* ULL*/ { Flt, Dbl, LDbl, ULL, ULL, ULL, ULL, ULL, ULL }, +/* Flt*/ { Flt, Dbl, LDbl, Flt, Flt, Flt, Flt, Flt, Flt, Flt, Flt }, +/* Dbl*/ { Dbl, Dbl, LDbl, Dbl, Dbl, Dbl, Dbl, Dbl, Dbl, Dbl, Dbl }, +/*LDbl*/ { LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl, LDbl }, +/* SI*/ { Flt, Dbl, LDbl, SI, SL, SLL, S128, UI, UL, ULL, U128 }, +/* SL*/ { Flt, Dbl, LDbl, SL, SL, SLL, S128, Dep, UL, ULL, U128 }, +/* SLL*/ { Flt, Dbl, LDbl, SLL, SLL, SLL, S128, Dep, Dep, ULL, U128 }, +/*S128*/ { Flt, Dbl, LDbl, S128, S128, S128, S128, S128, S128, S128, U128 }, +/* UI*/ { Flt, Dbl, LDbl, UI, Dep, Dep, S128, UI, UL, ULL, U128 }, +/* UL*/ { Flt, Dbl, LDbl, UL, UL, Dep, S128, UL, UL, ULL, U128 }, +/* ULL*/ { Flt, Dbl, LDbl, ULL, ULL, ULL, S128, ULL, ULL, ULL, U128 }, +/*U128*/ { Flt, Dbl, LDbl, U128, U128, U128, U128, U128, U128, U128, U128 }, }; assert(L < LastPromotedArithmeticType); @@ -6466,7 +6499,8 @@ class BuiltinOperatorOverloadBuilder { /// \brief Helper method to factor out the common pattern of adding overloads /// for '++' and '--' builtin operators. void addPlusPlusMinusMinusStyleOverloads(QualType CandidateTy, - bool HasVolatile) { + bool HasVolatile, + bool HasRestrict) { QualType ParamTypes[2] = { S.Context.getLValueReferenceType(CandidateTy), S.Context.IntTy @@ -6489,6 +6523,33 @@ class BuiltinOperatorOverloadBuilder { else S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, 2, CandidateSet); } + + // Add restrict version only if there are conversions to a restrict type + // and our candidate type is a non-restrict-qualified pointer. + if (HasRestrict && CandidateTy->isAnyPointerType() && + !CandidateTy.isRestrictQualified()) { + ParamTypes[0] + = S.Context.getLValueReferenceType( + S.Context.getCVRQualifiedType(CandidateTy, Qualifiers::Restrict)); + if (NumArgs == 1) + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 1, CandidateSet); + else + S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, 2, CandidateSet); + + if (HasVolatile) { + ParamTypes[0] + = S.Context.getLValueReferenceType( + S.Context.getCVRQualifiedType(CandidateTy, + (Qualifiers::Volatile | + Qualifiers::Restrict))); + if (NumArgs == 1) + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 1, + CandidateSet); + else + S.AddBuiltinCandidate(CandidateTy, ParamTypes, Args, 2, CandidateSet); + } + } + } public: @@ -6508,13 +6569,13 @@ public: assert(getArithmeticType(FirstPromotedIntegralType) == S.Context.IntTy && "Invalid first promoted integral type"); assert(getArithmeticType(LastPromotedIntegralType - 1) - == S.Context.UnsignedLongLongTy && + == S.Context.UnsignedInt128Ty && "Invalid last promoted integral type"); assert(getArithmeticType(FirstPromotedArithmeticType) == S.Context.FloatTy && "Invalid first promoted arithmetic type"); assert(getArithmeticType(LastPromotedArithmeticType - 1) - == S.Context.UnsignedLongLongTy && + == S.Context.UnsignedInt128Ty && "Invalid last promoted arithmetic type"); } @@ -6543,7 +6604,8 @@ public: Arith < NumArithmeticTypes; ++Arith) { addPlusPlusMinusMinusStyleOverloads( getArithmeticType(Arith), - VisibleTypeConversionsQuals.hasVolatile()); + VisibleTypeConversionsQuals.hasVolatile(), + VisibleTypeConversionsQuals.hasRestrict()); } } @@ -6567,8 +6629,10 @@ public: continue; addPlusPlusMinusMinusStyleOverloads(*Ptr, - (!S.Context.getCanonicalType(*Ptr).isVolatileQualified() && - VisibleTypeConversionsQuals.hasVolatile())); + (!(*Ptr).isVolatileQualified() && + VisibleTypeConversionsQuals.hasVolatile()), + (!(*Ptr).isRestrictQualified() && + VisibleTypeConversionsQuals.hasRestrict())); } } @@ -7026,14 +7090,36 @@ public: S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, CandidateSet, /*IsAssigmentOperator=*/ isEqualOp); - if (!S.Context.getCanonicalType(*Ptr).isVolatileQualified() && - VisibleTypeConversionsQuals.hasVolatile()) { + bool NeedVolatile = !(*Ptr).isVolatileQualified() && + VisibleTypeConversionsQuals.hasVolatile(); + if (NeedVolatile) { // volatile version ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr)); S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, CandidateSet, /*IsAssigmentOperator=*/isEqualOp); } + + if (!(*Ptr).isRestrictQualified() && + VisibleTypeConversionsQuals.hasRestrict()) { + // restrict version + ParamTypes[0] + = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr)); + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, CandidateSet, + /*IsAssigmentOperator=*/isEqualOp); + + if (NeedVolatile) { + // volatile restrict version + ParamTypes[0] + = S.Context.getLValueReferenceType( + S.Context.getCVRQualifiedType(*Ptr, + (Qualifiers::Volatile | + Qualifiers::Restrict))); + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, + CandidateSet, + /*IsAssigmentOperator=*/isEqualOp); + } + } } if (isEqualOp) { @@ -7054,14 +7140,36 @@ public: S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, CandidateSet, /*IsAssigmentOperator=*/true); - if (!S.Context.getCanonicalType(*Ptr).isVolatileQualified() && - VisibleTypeConversionsQuals.hasVolatile()) { + bool NeedVolatile = !(*Ptr).isVolatileQualified() && + VisibleTypeConversionsQuals.hasVolatile(); + if (NeedVolatile) { // volatile version ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr)); S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, CandidateSet, /*IsAssigmentOperator=*/true); } + + if (!(*Ptr).isRestrictQualified() && + VisibleTypeConversionsQuals.hasRestrict()) { + // restrict version + ParamTypes[0] + = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr)); + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, + CandidateSet, /*IsAssigmentOperator=*/true); + + if (NeedVolatile) { + // volatile restrict version + ParamTypes[0] + = S.Context.getLValueReferenceType( + S.Context.getCVRQualifiedType(*Ptr, + (Qualifiers::Volatile | + Qualifiers::Restrict))); + S.AddBuiltinCandidate(ParamTypes[0], ParamTypes, Args, 2, + CandidateSet, /*IsAssigmentOperator=*/true); + + } + } } } } @@ -7705,13 +7813,11 @@ isBetterOverloadCandidate(Sema &S, /// \brief Computes the best viable function (C++ 13.3.3) /// within an overload candidate set. /// -/// \param CandidateSet the set of candidate functions. -/// -/// \param Loc the location of the function name (or operator symbol) for +/// \param Loc The location of the function name (or operator symbol) for /// which overload resolution occurs. /// -/// \param Best f overload resolution was successful or found a deleted -/// function, Best points to the candidate function found. +/// \param Best If overload resolution was successful or found a deleted +/// function, \p Best points to the candidate function found. /// /// \returns The result of overload resolution. OverloadingResult @@ -8035,12 +8141,22 @@ void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand, unsigned I) { FromIface->isSuperClassOf(ToIface)) BaseToDerivedConversion = 2; } else if (const ReferenceType *ToRefTy = ToTy->getAs()) { - if (ToRefTy->getPointeeType().isAtLeastAsQualifiedAs(FromTy) && - !FromTy->isIncompleteType() && - !ToRefTy->getPointeeType()->isIncompleteType() && - S.IsDerivedFrom(ToRefTy->getPointeeType(), FromTy)) - BaseToDerivedConversion = 3; + if (ToRefTy->getPointeeType().isAtLeastAsQualifiedAs(FromTy) && + !FromTy->isIncompleteType() && + !ToRefTy->getPointeeType()->isIncompleteType() && + S.IsDerivedFrom(ToRefTy->getPointeeType(), FromTy)) { + BaseToDerivedConversion = 3; + } else if (ToTy->isLValueReferenceType() && !FromExpr->isLValue() && + ToTy.getNonReferenceType().getCanonicalType() == + FromTy.getNonReferenceType().getCanonicalType()) { + S.Diag(Fn->getLocation(), diag::note_ovl_candidate_bad_lvalue) + << (unsigned) FnKind << FnDesc + << (FromExpr ? FromExpr->getSourceRange() : SourceRange()) + << (unsigned) isObjectArgument << I + 1; + MaybeEmitInheritedConstructorNote(S, Fn); + return; } + } if (BaseToDerivedConversion) { S.Diag(Fn->getLocation(), @@ -8127,9 +8243,14 @@ void DiagnoseArityMismatch(Sema &S, OverloadCandidate *Cand, std::string Description; OverloadCandidateKind FnKind = ClassifyOverloadCandidate(S, Fn, Description); - S.Diag(Fn->getLocation(), diag::note_ovl_candidate_arity) - << (unsigned) FnKind << (Fn->getDescribedFunctionTemplate() != 0) << mode - << modeCount << NumFormalArgs; + if (modeCount == 1 && Fn->getParamDecl(0)->getDeclName()) + S.Diag(Fn->getLocation(), diag::note_ovl_candidate_arity_one) + << (unsigned) FnKind << (Fn->getDescribedFunctionTemplate() != 0) << mode + << Fn->getParamDecl(0) << NumFormalArgs; + else + S.Diag(Fn->getLocation(), diag::note_ovl_candidate_arity) + << (unsigned) FnKind << (Fn->getDescribedFunctionTemplate() != 0) << mode + << modeCount << NumFormalArgs; MaybeEmitInheritedConstructorNote(S, Fn); } @@ -8232,14 +8353,39 @@ void DiagnoseBadDeduction(Sema &S, OverloadCandidate *Cand, return; case Sema::TDK_SubstitutionFailure: { - std::string ArgString; - if (TemplateArgumentList *Args - = Cand->DeductionFailure.getTemplateArgumentList()) - ArgString = S.getTemplateArgumentBindingsText( - Fn->getDescribedFunctionTemplate()->getTemplateParameters(), - *Args); + // Format the template argument list into the argument string. + llvm::SmallString<128> TemplateArgString; + if (TemplateArgumentList *Args = + Cand->DeductionFailure.getTemplateArgumentList()) { + TemplateArgString = " "; + TemplateArgString += S.getTemplateArgumentBindingsText( + Fn->getDescribedFunctionTemplate()->getTemplateParameters(), *Args); + } + + // If this candidate was disabled by enable_if, say so. + PartialDiagnosticAt *PDiag = Cand->DeductionFailure.getSFINAEDiagnostic(); + if (PDiag && PDiag->second.getDiagID() == + diag::err_typename_nested_not_found_enable_if) { + // FIXME: Use the source range of the condition, and the fully-qualified + // name of the enable_if template. These are both present in PDiag. + S.Diag(PDiag->first, diag::note_ovl_candidate_disabled_by_enable_if) + << "'enable_if'" << TemplateArgString; + return; + } + + // Format the SFINAE diagnostic into the argument string. + // FIXME: Add a general mechanism to include a PartialDiagnostic *'s + // formatted message in another diagnostic. + llvm::SmallString<128> SFINAEArgString; + SourceRange R; + if (PDiag) { + SFINAEArgString = ": "; + R = SourceRange(PDiag->first, PDiag->first); + PDiag->second.EmitToString(S.getDiagnostics(), SFINAEArgString); + } + S.Diag(Fn->getLocation(), diag::note_ovl_candidate_substitution_failure) - << ArgString; + << TemplateArgString << SFINAEArgString << R; MaybeEmitInheritedConstructorNote(S, Fn); return; } @@ -9190,7 +9336,7 @@ bool Sema::ResolveAndFixSingleFunctionTemplateSpecialization( return true; } - // Fix the expresion to refer to 'fn'. + // Fix the expression to refer to 'fn'. SingleFunctionExpression = Owned(FixOverloadedFunctionReference(SrcExpr.take(), found, fn)); @@ -9692,14 +9838,14 @@ static bool IsOverloaded(const UnresolvedSetImpl &Functions) { /// \param OpcIn The UnaryOperator::Opcode that describes this /// operator. /// -/// \param Functions The set of non-member functions that will be +/// \param Fns The set of non-member functions that will be /// considered by overload resolution. The caller needs to build this /// set based on the context using, e.g., /// LookupOverloadedOperatorName() and ArgumentDependentLookup(). This /// set should not contain any member functions; those will be added /// by CreateOverloadedUnaryOp(). /// -/// \param input The input argument. +/// \param Input The input argument. ExprResult Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, unsigned OpcIn, const UnresolvedSetImpl &Fns, @@ -9892,7 +10038,7 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, unsigned OpcIn, /// \param OpcIn The BinaryOperator::Opcode that describes this /// operator. /// -/// \param Functions The set of non-member functions that will be +/// \param Fns The set of non-member functions that will be /// considered by overload resolution. The caller needs to build this /// set based on the context using, e.g., /// LookupOverloadedOperatorName() and ArgumentDependentLookup(). This @@ -10559,7 +10705,7 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, DiagnoseSentinelCalls(Method, LParenLoc, Args, NumArgs); - if (CheckFunctionCall(Method, TheCall)) + if (CheckFunctionCall(Method, TheCall, Proto)) return ExprError(); if ((isa(CurContext) || @@ -10610,8 +10756,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(OO_Call); if (RequireCompleteType(LParenLoc, Object.get()->getType(), - PDiag(diag::err_incomplete_object_call) - << Object.get()->getSourceRange())) + diag::err_incomplete_object_call, Object.get())) return true; LookupResult R(*this, OpName, LParenLoc, LookupOrdinaryName); @@ -10857,7 +11002,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, // If this is a variadic call, handle args passed through "...". if (Proto->isVariadic()) { // Promote the arguments (C99 6.5.2.2p7). - for (unsigned i = NumArgsInProto; i != NumArgs; i++) { + for (unsigned i = NumArgsInProto; i < NumArgs; i++) { ExprResult Arg = DefaultVariadicArgumentPromotion(Args[i], VariadicMethod, 0); IsError |= Arg.isInvalid(); TheCall->setArg(i + 1, Arg.take()); @@ -10868,7 +11013,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, DiagnoseSentinelCalls(Method, LParenLoc, Args, NumArgs); - if (CheckFunctionCall(Method, TheCall)) + if (CheckFunctionCall(Method, TheCall, Proto)) return true; return MaybeBindToTemporary(TheCall); @@ -10899,8 +11044,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc) { const RecordType *BaseRecord = Base->getType()->getAs(); if (RequireCompleteType(Loc, Base->getType(), - PDiag(diag::err_typecheck_incomplete_tag) - << Base->getSourceRange())) + diag::err_typecheck_incomplete_tag, Base)) return ExprError(); LookupResult R(*this, OpName, OpLoc, LookupOrdinaryName); @@ -11049,7 +11193,7 @@ ExprResult Sema::BuildLiteralOperatorCall(LookupResult &R, if (CheckCallReturnType(FD->getResultType(), UDSuffixLoc, UDL, FD)) return ExprError(); - if (CheckFunctionCall(FD, UDL)) + if (CheckFunctionCall(FD, UDL, NULL)) return ExprError(); return MaybeBindToTemporary(UDL); diff --git a/lib/Sema/SemaPseudoObject.cpp b/lib/Sema/SemaPseudoObject.cpp index 0e66329..722ac19 100644 --- a/lib/Sema/SemaPseudoObject.cpp +++ b/lib/Sema/SemaPseudoObject.cpp @@ -34,6 +34,7 @@ #include "clang/Sema/Initialization.h" #include "clang/AST/ExprObjC.h" #include "clang/Lex/Preprocessor.h" +#include "llvm/ADT/SmallString.h" using namespace clang; using namespace sema; @@ -232,7 +233,7 @@ namespace { Expr *op); bool tryBuildGetOfReference(Expr *op, ExprResult &result); - bool findSetter(); + bool findSetter(bool warn=true); bool findGetter(); Expr *rebuildAndCaptureObject(Expr *syntacticBase); @@ -505,7 +506,7 @@ bool ObjCPropertyOpBuilder::findGetter() { /// reference. /// /// \return true if a setter was found, in which case Setter -bool ObjCPropertyOpBuilder::findSetter() { +bool ObjCPropertyOpBuilder::findSetter(bool warn) { // For implicit properties, just trust the lookup we already did. if (RefExpr->isImplicitProperty()) { if (ObjCMethodDecl *setter = RefExpr->getImplicitPropertySetter()) { @@ -531,6 +532,23 @@ bool ObjCPropertyOpBuilder::findSetter() { // Do a normal method lookup first. if (ObjCMethodDecl *setter = LookupMethodInReceiverType(S, SetterSelector, RefExpr)) { + if (setter->isSynthesized() && warn) + if (const ObjCInterfaceDecl *IFace = + dyn_cast(setter->getDeclContext())) { + const StringRef thisPropertyName(prop->getName()); + char front = thisPropertyName.front(); + front = islower(front) ? toupper(front) : tolower(front); + SmallString<100> PropertyName = thisPropertyName; + PropertyName[0] = front; + IdentifierInfo *AltMember = &S.PP.getIdentifierTable().get(PropertyName); + if (ObjCPropertyDecl *prop1 = IFace->FindPropertyDeclaration(AltMember)) + if (prop != prop1 && (prop1->getSetterMethodDecl() == setter)) { + S.Diag(RefExpr->getExprLoc(), diag::error_property_setter_ambiguous_use) + << prop->getName() << prop1->getName() << setter->getSelector(); + S.Diag(prop->getLocation(), diag::note_property_declare); + S.Diag(prop1->getLocation(), diag::note_property_declare); + } + } Setter = setter; return true; } @@ -603,7 +621,7 @@ ExprResult ObjCPropertyOpBuilder::buildGet() { /// value being set as the value of the property operation. ExprResult ObjCPropertyOpBuilder::buildSet(Expr *op, SourceLocation opcLoc, bool captureSetValueAsResult) { - bool hasSetter = findSetter(); + bool hasSetter = findSetter(false); assert(hasSetter); (void) hasSetter; if (SyntacticRefExpr) @@ -889,8 +907,7 @@ Sema::ObjCSubscriptKind // We must have a complete class type. if (RequireCompleteType(FromE->getExprLoc(), T, - PDiag(diag::err_objc_index_incomplete_class_type) - << FromE->getSourceRange())) + diag::err_objc_index_incomplete_class_type, FromE)) return OS_Error; // Look for a conversion to an integral, enumeration type, or @@ -938,6 +955,27 @@ Sema::ObjCSubscriptKind return OS_Error; } +/// CheckKeyForObjCARCConversion - This routine suggests bridge casting of CF +/// objects used as dictionary subscript key objects. +static void CheckKeyForObjCARCConversion(Sema &S, QualType ContainerT, + Expr *Key) { + if (ContainerT.isNull()) + return; + // dictionary subscripting. + // - (id)objectForKeyedSubscript:(id)key; + IdentifierInfo *KeyIdents[] = { + &S.Context.Idents.get("objectForKeyedSubscript") + }; + Selector GetterSelector = S.Context.Selectors.getSelector(1, KeyIdents); + ObjCMethodDecl *Getter = S.LookupMethodInObjectType(GetterSelector, ContainerT, + true /*instance*/); + if (!Getter) + return; + QualType T = Getter->param_begin()[0]->getType(); + S.CheckObjCARCConversion(Key->getSourceRange(), + T, Key, Sema::CCK_ImplicitConversion); +} + bool ObjCSubscriptOpBuilder::findAtIndexGetter() { if (AtIndexGetter) return true; @@ -955,8 +993,12 @@ bool ObjCSubscriptOpBuilder::findAtIndexGetter() { } Sema::ObjCSubscriptKind Res = S.CheckSubscriptingKind(RefExpr->getKeyExpr()); - if (Res == Sema::OS_Error) + if (Res == Sema::OS_Error) { + if (S.getLangOpts().ObjCAutoRefCount) + CheckKeyForObjCARCConversion(S, ResultType, + RefExpr->getKeyExpr()); return false; + } bool arrayRef = (Res == Sema::OS_Array); if (ResultType.isNull()) { @@ -1063,8 +1105,12 @@ bool ObjCSubscriptOpBuilder::findAtIndexSetter() { Sema::ObjCSubscriptKind Res = S.CheckSubscriptingKind(RefExpr->getKeyExpr()); - if (Res == Sema::OS_Error) + if (Res == Sema::OS_Error) { + if (S.getLangOpts().ObjCAutoRefCount) + CheckKeyForObjCARCConversion(S, ResultType, + RefExpr->getKeyExpr()); return false; + } bool arrayRef = (Res == Sema::OS_Array); if (ResultType.isNull()) { diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp index 9052278..d22deb2 100644 --- a/lib/Sema/SemaStmt.cpp +++ b/lib/Sema/SemaStmt.cpp @@ -19,6 +19,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/CharUnits.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/StmtObjC.h" @@ -28,7 +29,21 @@ #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" using namespace clang; using namespace sema; @@ -150,10 +165,11 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) { if (!E) return; + const Expr *WarnExpr; SourceLocation Loc; SourceRange R1, R2; if (SourceMgr.isInSystemMacro(E->getExprLoc()) || - !E->isUnusedResultAWarning(Loc, R1, R2, Context)) + !E->isUnusedResultAWarning(WarnExpr, Loc, R1, R2, Context)) return; // Okay, we have an unused result. Depending on what the base expression is, @@ -168,7 +184,7 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) { if (DiagnoseUnusedComparison(*this, E)) return; - E = E->IgnoreParenImpCasts(); + E = WarnExpr; if (const CallExpr *CE = dyn_cast(E)) { if (E->getType()->isVoidType()) return; @@ -226,6 +242,11 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) { } } + if (E->isGLValue() && E->getType().isVolatileQualified()) { + Diag(Loc, diag::warn_unused_volatile) << R1 << R2; + return; + } + DiagRuntimeBehavior(Loc, 0, PDiag(DiagID) << R1 << R2); } @@ -361,12 +382,10 @@ Sema::ActOnLabelStmt(SourceLocation IdentLoc, LabelDecl *TheDecl, } StmtResult Sema::ActOnAttributedStmt(SourceLocation AttrLoc, - const AttrVec &Attrs, + ArrayRef Attrs, Stmt *SubStmt) { - // Fill in the declaration and return it. Variable length will require to - // change this to AttributedStmt::Create(Context, ....); - // and probably using ArrayRef - AttributedStmt *LS = new (Context) AttributedStmt(AttrLoc, Attrs, SubStmt); + // Fill in the declaration and return it. + AttributedStmt *LS = AttributedStmt::Create(Context, AttrLoc, Attrs, SubStmt); return Owned(LS); } @@ -519,16 +538,56 @@ Sema::ActOnStartOfSwitchStmt(SourceLocation SwitchLoc, Expr *Cond, if (!Cond) return StmtError(); + class SwitchConvertDiagnoser : public ICEConvertDiagnoser { + Expr *Cond; + + public: + SwitchConvertDiagnoser(Expr *Cond) + : ICEConvertDiagnoser(false, true), Cond(Cond) { } + + virtual DiagnosticBuilder diagnoseNotInt(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_typecheck_statement_requires_integer) << T; + } + + virtual DiagnosticBuilder diagnoseIncomplete(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_switch_incomplete_class_type) + << T << Cond->getSourceRange(); + } + + virtual DiagnosticBuilder diagnoseExplicitConv(Sema &S, SourceLocation Loc, + QualType T, + QualType ConvTy) { + return S.Diag(Loc, diag::err_switch_explicit_conversion) << T << ConvTy; + } + + virtual DiagnosticBuilder noteExplicitConv(Sema &S, CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_switch_conversion) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseAmbiguous(Sema &S, SourceLocation Loc, + QualType T) { + return S.Diag(Loc, diag::err_switch_multiple_conversions) << T; + } + + virtual DiagnosticBuilder noteAmbiguous(Sema &S, CXXConversionDecl *Conv, + QualType ConvTy) { + return S.Diag(Conv->getLocation(), diag::note_switch_conversion) + << ConvTy->isEnumeralType() << ConvTy; + } + + virtual DiagnosticBuilder diagnoseConversion(Sema &S, SourceLocation Loc, + QualType T, + QualType ConvTy) { + return DiagnosticBuilder::getEmpty(); + } + } SwitchDiagnoser(Cond); + CondResult - = ConvertToIntegralOrEnumerationType(SwitchLoc, Cond, - PDiag(diag::err_typecheck_statement_requires_integer), - PDiag(diag::err_switch_incomplete_class_type) - << Cond->getSourceRange(), - PDiag(diag::err_switch_explicit_conversion), - PDiag(diag::note_switch_conversion), - PDiag(diag::err_switch_multiple_conversions), - PDiag(diag::note_switch_conversion), - PDiag(0), + = ConvertToIntegralOrEnumerationType(SwitchLoc, Cond, SwitchDiagnoser, /*AllowScopedEnumerations*/ true); if (CondResult.isInvalid()) return StmtError(); Cond = CondResult.take(); @@ -609,7 +668,7 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, = CondExpr->isTypeDependent() || CondExpr->isValueDependent(); unsigned CondWidth = HasDependentValue ? 0 : Context.getIntWidth(CondTypeBeforePromotion); - bool CondIsSigned + bool CondIsSigned = CondTypeBeforePromotion->isSignedIntegerOrEnumerationType(); // Accumulate all of the case values in a vector so that we can sort them @@ -726,8 +785,30 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, if (i != 0 && CaseVals[i].first == CaseVals[i-1].first) { // If we have a duplicate, report it. - Diag(CaseVals[i].second->getLHS()->getLocStart(), - diag::err_duplicate_case) << CaseVals[i].first.toString(10); + // First, determine if either case value has a name + StringRef PrevString, CurrString; + Expr *PrevCase = CaseVals[i-1].second->getLHS()->IgnoreParenCasts(); + Expr *CurrCase = CaseVals[i].second->getLHS()->IgnoreParenCasts(); + if (DeclRefExpr *DeclRef = dyn_cast(PrevCase)) { + PrevString = DeclRef->getDecl()->getName(); + } + if (DeclRefExpr *DeclRef = dyn_cast(CurrCase)) { + CurrString = DeclRef->getDecl()->getName(); + } + llvm::SmallString<16> CaseValStr; + CaseVals[i-1].first.toString(CaseValStr); + + if (PrevString == CurrString) + Diag(CaseVals[i].second->getLHS()->getLocStart(), + diag::err_duplicate_case) << + (PrevString.empty() ? CaseValStr.str() : PrevString); + else + Diag(CaseVals[i].second->getLHS()->getLocStart(), + diag::err_duplicate_case_differing_expr) << + (PrevString.empty() ? CaseValStr.str() : PrevString) << + (CurrString.empty() ? CaseValStr.str() : CurrString) << + CaseValStr; + Diag(CaseVals[i-1].second->getLHS()->getLocStart(), diag::note_duplicate_case_prev); // FIXME: We really want to remove the bogus case stmt from the @@ -904,7 +985,7 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, << CondTypeBeforePromotion; } - llvm::APSInt Hi = + llvm::APSInt Hi = RI->second->getRHS()->EvaluateKnownConstInt(Context); AdjustAPSInt(Hi, CondWidth, CondIsSigned); while (EI != EIend && EI->first < Hi) @@ -952,12 +1033,12 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, switch (UnhandledNames.size()) { case 0: break; case 1: - Diag(CondExpr->getExprLoc(), TheDefaultStmt + Diag(CondExpr->getExprLoc(), TheDefaultStmt ? diag::warn_def_missing_case1 : diag::warn_missing_case1) << UnhandledNames[0]; break; case 2: - Diag(CondExpr->getExprLoc(), TheDefaultStmt + Diag(CondExpr->getExprLoc(), TheDefaultStmt ? diag::warn_def_missing_case2 : diag::warn_missing_case2) << UnhandledNames[0] << UnhandledNames[1]; break; @@ -990,6 +1071,55 @@ Sema::ActOnFinishSwitchStmt(SourceLocation SwitchLoc, Stmt *Switch, return Owned(SS); } +void +Sema::DiagnoseAssignmentEnum(QualType DstType, QualType SrcType, + Expr *SrcExpr) { + unsigned DIAG = diag::warn_not_in_enum_assignement; + if (Diags.getDiagnosticLevel(DIAG, SrcExpr->getExprLoc()) + == DiagnosticsEngine::Ignored) + return; + + if (const EnumType *ET = DstType->getAs()) + if (!Context.hasSameType(SrcType, DstType) && + SrcType->isIntegerType()) { + if (!SrcExpr->isTypeDependent() && !SrcExpr->isValueDependent() && + SrcExpr->isIntegerConstantExpr(Context)) { + // Get the bitwidth of the enum value before promotions. + unsigned DstWith = Context.getIntWidth(DstType); + bool DstIsSigned = DstType->isSignedIntegerOrEnumerationType(); + + llvm::APSInt RhsVal = SrcExpr->EvaluateKnownConstInt(Context); + const EnumDecl *ED = ET->getDecl(); + typedef SmallVector, 64> + EnumValsTy; + EnumValsTy EnumVals; + + // Gather all enum values, set their type and sort them, + // allowing easier comparison with rhs constant. + for (EnumDecl::enumerator_iterator EDI = ED->enumerator_begin(); + EDI != ED->enumerator_end(); ++EDI) { + llvm::APSInt Val = EDI->getInitVal(); + AdjustAPSInt(Val, DstWith, DstIsSigned); + EnumVals.push_back(std::make_pair(Val, *EDI)); + } + if (EnumVals.empty()) + return; + std::stable_sort(EnumVals.begin(), EnumVals.end(), CmpEnumVals); + EnumValsTy::iterator EIend = + std::unique(EnumVals.begin(), EnumVals.end(), EqEnumVals); + + // See which case values aren't in enum. + EnumValsTy::const_iterator EI = EnumVals.begin(); + while (EI != EIend && EI->first < RhsVal) + EI++; + if (EI == EIend || EI->first != RhsVal) { + Diag(SrcExpr->getExprLoc(), diag::warn_not_in_enum_assignement) + << DstType; + } + } + } +} + StmtResult Sema::ActOnWhileStmt(SourceLocation WhileLoc, FullExprArg Cond, Decl *CondVar, Stmt *Body) { @@ -1037,6 +1167,215 @@ Sema::ActOnDoStmt(SourceLocation DoLoc, Stmt *Body, return Owned(new (Context) DoStmt(Body, Cond, DoLoc, WhileLoc, CondRParen)); } +namespace { + // This visitor will traverse a conditional statement and store all + // the evaluated decls into a vector. Simple is set to true if none + // of the excluded constructs are used. + class DeclExtractor : public EvaluatedExprVisitor { + llvm::SmallPtrSet &Decls; + llvm::SmallVector &Ranges; + bool Simple; +public: + typedef EvaluatedExprVisitor Inherited; + + DeclExtractor(Sema &S, llvm::SmallPtrSet &Decls, + llvm::SmallVector &Ranges) : + Inherited(S.Context), + Decls(Decls), + Ranges(Ranges), + Simple(true) {} + + bool isSimple() { return Simple; } + + // Replaces the method in EvaluatedExprVisitor. + void VisitMemberExpr(MemberExpr* E) { + Simple = false; + } + + // Any Stmt not whitelisted will cause the condition to be marked complex. + void VisitStmt(Stmt *S) { + Simple = false; + } + + void VisitBinaryOperator(BinaryOperator *E) { + Visit(E->getLHS()); + Visit(E->getRHS()); + } + + void VisitCastExpr(CastExpr *E) { + Visit(E->getSubExpr()); + } + + void VisitUnaryOperator(UnaryOperator *E) { + // Skip checking conditionals with derefernces. + if (E->getOpcode() == UO_Deref) + Simple = false; + else + Visit(E->getSubExpr()); + } + + void VisitConditionalOperator(ConditionalOperator *E) { + Visit(E->getCond()); + Visit(E->getTrueExpr()); + Visit(E->getFalseExpr()); + } + + void VisitParenExpr(ParenExpr *E) { + Visit(E->getSubExpr()); + } + + void VisitBinaryConditionalOperator(BinaryConditionalOperator *E) { + Visit(E->getOpaqueValue()->getSourceExpr()); + Visit(E->getFalseExpr()); + } + + void VisitIntegerLiteral(IntegerLiteral *E) { } + void VisitFloatingLiteral(FloatingLiteral *E) { } + void VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E) { } + void VisitCharacterLiteral(CharacterLiteral *E) { } + void VisitGNUNullExpr(GNUNullExpr *E) { } + void VisitImaginaryLiteral(ImaginaryLiteral *E) { } + + void VisitDeclRefExpr(DeclRefExpr *E) { + VarDecl *VD = dyn_cast(E->getDecl()); + if (!VD) return; + + Ranges.push_back(E->getSourceRange()); + + Decls.insert(VD); + } + + }; // end class DeclExtractor + + // DeclMatcher checks to see if the decls are used in a non-evauluated + // context. + class DeclMatcher : public EvaluatedExprVisitor { + llvm::SmallPtrSet &Decls; + bool FoundDecl; + +public: + typedef EvaluatedExprVisitor Inherited; + + DeclMatcher(Sema &S, llvm::SmallPtrSet &Decls, Stmt *Statement) : + Inherited(S.Context), Decls(Decls), FoundDecl(false) { + if (!Statement) return; + + Visit(Statement); + } + + void VisitReturnStmt(ReturnStmt *S) { + FoundDecl = true; + } + + void VisitBreakStmt(BreakStmt *S) { + FoundDecl = true; + } + + void VisitGotoStmt(GotoStmt *S) { + FoundDecl = true; + } + + void VisitCastExpr(CastExpr *E) { + if (E->getCastKind() == CK_LValueToRValue) + CheckLValueToRValueCast(E->getSubExpr()); + else + Visit(E->getSubExpr()); + } + + void CheckLValueToRValueCast(Expr *E) { + E = E->IgnoreParenImpCasts(); + + if (isa(E)) { + return; + } + + if (ConditionalOperator *CO = dyn_cast(E)) { + Visit(CO->getCond()); + CheckLValueToRValueCast(CO->getTrueExpr()); + CheckLValueToRValueCast(CO->getFalseExpr()); + return; + } + + if (BinaryConditionalOperator *BCO = + dyn_cast(E)) { + CheckLValueToRValueCast(BCO->getOpaqueValue()->getSourceExpr()); + CheckLValueToRValueCast(BCO->getFalseExpr()); + return; + } + + Visit(E); + } + + void VisitDeclRefExpr(DeclRefExpr *E) { + if (VarDecl *VD = dyn_cast(E->getDecl())) + if (Decls.count(VD)) + FoundDecl = true; + } + + bool FoundDeclInUse() { return FoundDecl; } + + }; // end class DeclMatcher + + void CheckForLoopConditionalStatement(Sema &S, Expr *Second, + Expr *Third, Stmt *Body) { + // Condition is empty + if (!Second) return; + + if (S.Diags.getDiagnosticLevel(diag::warn_variables_not_in_loop_body, + Second->getLocStart()) + == DiagnosticsEngine::Ignored) + return; + + PartialDiagnostic PDiag = S.PDiag(diag::warn_variables_not_in_loop_body); + llvm::SmallPtrSet Decls; + llvm::SmallVector Ranges; + DeclExtractor DE(S, Decls, Ranges); + DE.Visit(Second); + + // Don't analyze complex conditionals. + if (!DE.isSimple()) return; + + // No decls found. + if (Decls.size() == 0) return; + + // Don't warn on volatile, static, or global variables. + for (llvm::SmallPtrSet::iterator I = Decls.begin(), + E = Decls.end(); + I != E; ++I) + if ((*I)->getType().isVolatileQualified() || + (*I)->hasGlobalStorage()) return; + + if (DeclMatcher(S, Decls, Second).FoundDeclInUse() || + DeclMatcher(S, Decls, Third).FoundDeclInUse() || + DeclMatcher(S, Decls, Body).FoundDeclInUse()) + return; + + // Load decl names into diagnostic. + if (Decls.size() > 4) + PDiag << 0; + else { + PDiag << Decls.size(); + for (llvm::SmallPtrSet::iterator I = Decls.begin(), + E = Decls.end(); + I != E; ++I) + PDiag << (*I)->getDeclName(); + } + + // Load SourceRanges into diagnostic if there is room. + // Otherwise, load the SourceRange of the conditional expression. + if (Ranges.size() <= PartialDiagnostic::MaxArguments) + for (llvm::SmallVector::iterator I = Ranges.begin(), + E = Ranges.end(); + I != E; ++I) + PDiag << *I; + else + PDiag << Second->getSourceRange(); + + S.Diag(Ranges.begin()->getBegin(), PDiag); + } + +} // end namespace + StmtResult Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, Stmt *First, FullExprArg second, Decl *secondVar, @@ -1059,6 +1398,8 @@ Sema::ActOnForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, } } + CheckForLoopConditionalStatement(*this, second.get(), third.get(), Body); + ExprResult SecondResult(second.release()); VarDecl *ConditionVar = 0; if (secondVar) { @@ -1103,8 +1444,9 @@ StmtResult Sema::ActOnForEachLValueExpr(Expr *E) { } ExprResult -Sema::ActOnObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { - assert(collection); +Sema::CheckObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { + if (!collection) + return ExprError(); // Bail out early if we've got a type-dependent expression. if (collection->isTypeDependent()) return Owned(collection); @@ -1130,12 +1472,12 @@ Sema::ActOnObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { // If we have a forward-declared type, we can't do this check. // Under ARC, it is an error not to have a forward-declared class. - if (iface && + if (iface && RequireCompleteType(forLoc, QualType(objectType, 0), getLangOpts().ObjCAutoRefCount - ? PDiag(diag::err_arc_collection_forward) - << collection->getSourceRange() - : PDiag(0))) { + ? diag::err_arc_collection_forward + : 0, + collection)) { // Otherwise, if we have any useful type information, check that // the type declares the appropriate method. } else if (iface || !objectType->qual_empty()) { @@ -1151,7 +1493,7 @@ Sema::ActOnObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { // If there's an interface, look in both the public and private APIs. if (iface) { method = iface->lookupInstanceMethod(selector); - if (!method) method = LookupPrivateInstanceMethod(selector, iface); + if (!method) method = iface->lookupPrivateMethod(selector); } // Also check protocol qualifiers. @@ -1175,8 +1517,12 @@ Sema::ActOnObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) { StmtResult Sema::ActOnObjCForCollectionStmt(SourceLocation ForLoc, SourceLocation LParenLoc, - Stmt *First, Expr *Second, - SourceLocation RParenLoc, Stmt *Body) { + Stmt *First, Expr *collection, + SourceLocation RParenLoc) { + + ExprResult CollectionExprResult = + CheckObjCForCollectionOperand(ForLoc, collection); + if (First) { QualType FirstType; if (DeclStmt *DS = dyn_cast(First)) { @@ -1204,11 +1550,15 @@ Sema::ActOnObjCForCollectionStmt(SourceLocation ForLoc, if (!FirstType->isDependentType() && !FirstType->isObjCObjectPointerType() && !FirstType->isBlockPointerType()) - Diag(ForLoc, diag::err_selector_element_type) - << FirstType << First->getSourceRange(); + return StmtError(Diag(ForLoc, diag::err_selector_element_type) + << FirstType << First->getSourceRange()); } - return Owned(new (Context) ObjCForCollectionStmt(First, Second, Body, + if (CollectionExprResult.isInvalid()) + return StmtError(); + + return Owned(new (Context) ObjCForCollectionStmt(First, + CollectionExprResult.take(), 0, ForLoc, RParenLoc)); } @@ -1252,7 +1602,7 @@ static bool FinishForRangeVarDecl(Sema &SemaRef, VarDecl *Decl, Expr *Init, // In ARC, infer lifetime. // FIXME: ARC may want to turn this into 'const __unsafe_unretained' if // we're doing the equivalent of fast iteration. - if (SemaRef.getLangOpts().ObjCAutoRefCount && + if (SemaRef.getLangOpts().ObjCAutoRefCount && SemaRef.inferObjCARCLifetime(Decl)) Decl->setInvalidDecl(); @@ -1343,6 +1693,11 @@ static ExprResult BuildForRangeBeginEndCall(Sema &SemaRef, Scope *S, } +static bool ObjCEnumerationCollection(Expr *Collection) { + return !Collection->isTypeDependent() + && Collection->getType()->getAs() != 0; +} + /// ActOnCXXForRangeStmt - Check and build a C++0x for-range statement. /// /// C++0x [stmt.ranged]: @@ -1368,6 +1723,10 @@ Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc, SourceLocation LParenLoc, if (!First || !Range) return StmtError(); + if (ObjCEnumerationCollection(Range)) + return ActOnObjCForCollectionStmt(ForLoc, LParenLoc, First, Range, + RParenLoc); + DeclStmt *DS = dyn_cast(First); assert(DS && "first part of for range not a decl stmt"); @@ -1442,7 +1801,7 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc, QualType RangeType = Range->getType(); if (RequireCompleteType(RangeLoc, RangeType, - PDiag(diag::err_for_range_incomplete_type))) + diag::err_for_range_incomplete_type)) return StmtError(); // Build auto __begin = begin-expr, __end = end-expr. @@ -1618,6 +1977,17 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc, ColonLoc, RParenLoc)); } +/// FinishObjCForCollectionStmt - Attach the body to a objective-C foreach +/// statement. +StmtResult Sema::FinishObjCForCollectionStmt(Stmt *S, Stmt *B) { + if (!S || !B) + return StmtError(); + ObjCForCollectionStmt * ForStmt = cast(S); + + ForStmt->setBody(B); + return S; +} + /// FinishCXXForRangeStmt - Attach the body to a C++0x for-range statement. /// This is a separate step from ActOnCXXForRangeStmt because analysis of the /// body cannot be performed until after the type of the range variable is @@ -1626,6 +1996,9 @@ StmtResult Sema::FinishCXXForRangeStmt(Stmt *S, Stmt *B) { if (!S || !B) return StmtError(); + if (isa(S)) + return FinishObjCForCollectionStmt(S, B); + CXXForRangeStmt *ForStmt = cast(S); ForStmt->setBody(B); @@ -1723,7 +2096,7 @@ const VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType, // ... the expression is the name of a non-volatile automatic object // (other than a function or catch-clause parameter)) ... const DeclRefExpr *DR = dyn_cast(E->IgnoreParens()); - if (!DR) + if (!DR || DR->refersToEnclosingLocal()) return 0; const VarDecl *VD = dyn_cast(DR->getDecl()); if (!VD) @@ -1776,8 +2149,7 @@ Sema::PerformMoveOrCopyInitialization(const InitializedEntity &Entity, if (AllowNRVO && (NRVOCandidate || getCopyElisionCandidate(ResultType, Value, true))) { ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, - Value->getType(), CK_LValueToRValue, - Value, VK_XValue); + Value->getType(), CK_NoOp, Value, VK_XValue); Expr *InitExpr = &AsRvalue; InitializationKind Kind @@ -1812,8 +2184,7 @@ Sema::PerformMoveOrCopyInitialization(const InitializedEntity &Entity, // Promote "AsRvalue" to the heap, since we now need this // expression node to persist. Value = ImplicitCastExpr::Create(Context, Value->getType(), - CK_LValueToRValue, Value, 0, - VK_XValue); + CK_NoOp, Value, 0, VK_XValue); // Complete type-checking the initialization of the return type // using the constructor we found. @@ -1840,8 +2211,12 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // [expr.prim.lambda]p4 in C++11; block literals follow a superset of those // rules which allows multiple return statements. CapturingScopeInfo *CurCap = cast(getCurFunction()); + QualType FnRetType = CurCap->ReturnType; + + // For blocks/lambdas with implicit return types, we check each return + // statement individually, and deduce the common return type when the block + // or lambda is completed. if (CurCap->HasImplicitReturnType) { - QualType ReturnT; if (RetValExp && !isa(RetValExp)) { ExprResult Result = DefaultFunctionArrayLvalueConversion(RetValExp); if (Result.isInvalid()) @@ -1849,10 +2224,10 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { RetValExp = Result.take(); if (!RetValExp->isTypeDependent()) - ReturnT = RetValExp->getType(); + FnRetType = RetValExp->getType(); else - ReturnT = Context.DependentTy; - } else { + FnRetType = CurCap->ReturnType = Context.DependentTy; + } else { if (RetValExp) { // C++11 [expr.lambda.prim]p4 bans inferring the result from an // initializer list, because it is not an expression (even @@ -1861,21 +2236,14 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { << RetValExp->getSourceRange(); } - ReturnT = Context.VoidTy; - } - // We require the return types to strictly match here. - if (!CurCap->ReturnType.isNull() && - !CurCap->ReturnType->isDependentType() && - !ReturnT->isDependentType() && - !Context.hasSameType(ReturnT, CurCap->ReturnType)) { - Diag(ReturnLoc, diag::err_typecheck_missing_return_type_incompatible) - << ReturnT << CurCap->ReturnType - << (getCurLambda() != 0); - return StmtError(); + FnRetType = Context.VoidTy; } - CurCap->ReturnType = ReturnT; + + // Although we'll properly infer the type of the block once it's completed, + // make sure we provide a return type now for better error recovery. + if (CurCap->ReturnType.isNull()) + CurCap->ReturnType = FnRetType; } - QualType FnRetType = CurCap->ReturnType; assert(!FnRetType.isNull()); if (BlockScopeInfo *CurBlock = dyn_cast(CurCap)) { @@ -1943,10 +2311,12 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { ReturnStmt *Result = new (Context) ReturnStmt(ReturnLoc, RetValExp, NRVOCandidate); - // If we need to check for the named return value optimization, save the - // return statement in our scope for later processing. - if (getLangOpts().CPlusPlus && FnRetType->isRecordType() && - !CurContext->isDependentContext()) + // If we need to check for the named return value optimization, + // or if we need to infer the return type, + // save the return statement in our scope for later processing. + if (CurCap->HasImplicitReturnType || + (getLangOpts().CPlusPlus && FnRetType->isRecordType() && + !CurContext->isDependentContext())) FunctionScopes.back()->Returns.push_back(Result); return Owned(Result); @@ -1957,7 +2327,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // Check for unexpanded parameter packs. if (RetValExp && DiagnoseUnexpandedParameterPack(RetValExp)) return StmtError(); - + if (isa(getCurFunction())) return ActOnCapScopeReturnStmt(ReturnLoc, RetValExp); @@ -1973,7 +2343,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { FnRetType = MD->getResultType(); if (MD->hasRelatedResultType() && MD->getClassInterface()) { // In the implementation of a method with a related return type, the - // type used to type-check the validity of return statements within the + // type used to type-check the validity of return statements within the // method body is a pointer to the type of the class being implemented. RelatedRetType = Context.getObjCInterfaceType(MD->getClassInterface()); RelatedRetType = Context.getObjCObjectPointerType(RelatedRetType); @@ -2064,7 +2434,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { // FIXME: The diagnostics here don't really describe what is happening. InitializedEntity Entity = InitializedEntity::InitializeTemporary(RelatedRetType); - + ExprResult Res = PerformCopyInitialization(Entity, SourceLocation(), RetValExp); if (Res.isInvalid()) { @@ -2108,7 +2478,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) { if (getLangOpts().CPlusPlus && FnRetType->isRecordType() && !CurContext->isDependentContext()) FunctionScopes.back()->Returns.push_back(Result); - + return Owned(Result); } @@ -2147,18 +2517,17 @@ static bool CheckAsmLValue(const Expr *E, Sema &S) { /// isOperandMentioned - Return true if the specified operand # is mentioned /// anywhere in the decomposed asm string. -static bool isOperandMentioned(unsigned OpNo, +static bool isOperandMentioned(unsigned OpNo, ArrayRef AsmStrPieces) { for (unsigned p = 0, e = AsmStrPieces.size(); p != e; ++p) { const AsmStmt::AsmStringPiece &Piece = AsmStrPieces[p]; if (!Piece.isOperand()) continue; - + // If this is a reference to the input and if the input was the smaller // one, then we have to reject this asm. if (Piece.getOperandNo() == OpNo) return true; } - return false; } @@ -2343,7 +2712,7 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple, // then we can promote the smaller one to a larger input and the asm string // won't notice. bool SmallerValueMentioned = false; - + // If this is a reference to the input and if the input was the smaller // one, then we have to reject this asm. if (isOperandMentioned(InputOpNo, Pieces)) { @@ -2364,7 +2733,7 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple, if (!SmallerValueMentioned && InputDomain != AD_Other && OutputConstraintInfos[TiedTo].allowsRegister()) continue; - + // Either both of the operands were mentioned or the smaller one was // mentioned. One more special case that we'll allow: if the tied input is // integer, unmentioned, and is a constant, then we'll allow truncating it @@ -2379,7 +2748,7 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple, NS->setInputExpr(i, InputExpr); continue; } - + Diag(InputExpr->getLocStart(), diag::err_asm_tying_incompatible_types) << InTy << OutTy << OutputExpr->getSourceRange() @@ -2390,6 +2759,213 @@ StmtResult Sema::ActOnAsmStmt(SourceLocation AsmLoc, bool IsSimple, return Owned(NS); } +// needSpaceAsmToken - This function handles whitespace around asm punctuation. +// Returns true if a space should be emitted. +static inline bool needSpaceAsmToken(Token currTok) { + static Token prevTok; + + // No need for space after prevToken. + switch(prevTok.getKind()) { + default: + break; + case tok::l_square: + case tok::r_square: + case tok::l_brace: + case tok::r_brace: + case tok::colon: + prevTok = currTok; + return false; + } + + // No need for a space before currToken. + switch(currTok.getKind()) { + default: + break; + case tok::l_square: + case tok::r_square: + case tok::l_brace: + case tok::r_brace: + case tok::comma: + case tok::colon: + prevTok = currTok; + return false; + } + prevTok = currTok; + return true; +} + +static void patchMSAsmStrings(Sema &SemaRef, bool &IsSimple, + SourceLocation AsmLoc, + ArrayRef AsmToks, + ArrayRef LineEnds, + const TargetInfo &TI, + std::vector &AsmStrings) { + assert (!AsmToks.empty() && "Didn't expect an empty AsmToks!"); + + // Assume simple asm stmt until we parse a non-register identifer. + IsSimple = true; + + for (unsigned i = 0, e = LineEnds.size(); i != e; ++i) { + SmallString<512> Asm; + + // Check the operands. + for (unsigned j = (i == 0) ? 0 : LineEnds[i-1], e = LineEnds[i]; j != e; ++j) { + + IdentifierInfo *II; + if (j == 0 || (i > 0 && j == LineEnds[i-1])) { + II = AsmToks[j].getIdentifierInfo(); + Asm = II->getName().str(); + continue; + } + + if (needSpaceAsmToken(AsmToks[j])) + Asm += " "; + + switch (AsmToks[j].getKind()) { + default: + //llvm_unreachable("Unknown token."); + break; + case tok::comma: Asm += ","; break; + case tok::colon: Asm += ":"; break; + case tok::l_square: Asm += "["; break; + case tok::r_square: Asm += "]"; break; + case tok::l_brace: Asm += "{"; break; + case tok::r_brace: Asm += "}"; break; + case tok::numeric_constant: { + SmallString<32> TokenBuf; + TokenBuf.resize(32); + bool StringInvalid = false; + Asm += SemaRef.PP.getSpelling(AsmToks[j], TokenBuf, &StringInvalid); + assert (!StringInvalid && "Expected valid string!"); + break; + } + case tok::identifier: { + II = AsmToks[j].getIdentifierInfo(); + StringRef Name = II->getName(); + + // Valid registers don't need modification. + if (TI.isValidGCCRegisterName(Name)) { + Asm += Name; + break; + } + + // TODO: Lookup the identifier. + IsSimple = false; + } + } // AsmToks[i].getKind() + } + AsmStrings[i] = Asm.c_str(); + } +} + +// Build the unmodified MSAsmString. +static std::string buildMSAsmString(Sema &SemaRef, + ArrayRef AsmToks, + ArrayRef LineEnds) { + assert (!AsmToks.empty() && "Didn't expect an empty AsmToks!"); + SmallString<512> Asm; + SmallString<512> TokenBuf; + TokenBuf.resize(512); + unsigned AsmLineNum = 0; + for (unsigned i = 0, e = AsmToks.size(); i < e; ++i) { + const char *ThisTokBuf = &TokenBuf[0]; + bool StringInvalid = false; + unsigned ThisTokLen = + Lexer::getSpelling(AsmToks[i], ThisTokBuf, SemaRef.getSourceManager(), + SemaRef.getLangOpts(), &StringInvalid); + if (i && (!AsmLineNum || i != LineEnds[AsmLineNum-1]) && + needSpaceAsmToken(AsmToks[i])) + Asm += ' '; + Asm += StringRef(ThisTokBuf, ThisTokLen); + if (i + 1 == LineEnds[AsmLineNum] && i + 1 != AsmToks.size()) { + Asm += '\n'; + ++AsmLineNum; + } + } + return Asm.c_str(); +} + +StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, + ArrayRef AsmToks, + ArrayRef LineEnds, + SourceLocation EndLoc) { + // MS-style inline assembly is not fully supported, so emit a warning. + Diag(AsmLoc, diag::warn_unsupported_msasm); + SmallVector Clobbers; + + // Empty asm statements don't need to instantiate the AsmParser, etc. + if (AsmToks.empty()) { + StringRef AsmString; + MSAsmStmt *NS = + new (Context) MSAsmStmt(Context, AsmLoc, /* IsSimple */ true, + /* IsVolatile */ true, AsmToks, LineEnds, + AsmString, Clobbers, EndLoc); + return Owned(NS); + } + + std::string AsmString = buildMSAsmString(*this, AsmToks, LineEnds); + + bool IsSimple; + std::vector PatchedAsmStrings; + PatchedAsmStrings.resize(LineEnds.size()); + + // Rewrite operands to appease the AsmParser. + patchMSAsmStrings(*this, IsSimple, AsmLoc, AsmToks, LineEnds, + Context.getTargetInfo(), PatchedAsmStrings); + + // patchMSAsmStrings doesn't correctly patch non-simple asm statements. + if (!IsSimple) { + MSAsmStmt *NS = + new (Context) MSAsmStmt(Context, AsmLoc, /* IsSimple */ true, + /* IsVolatile */ true, AsmToks, LineEnds, + AsmString, Clobbers, EndLoc); + return Owned(NS); + } + + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + + // Get the target specific parser. + std::string Error; + const std::string &TT = Context.getTargetInfo().getTriple().getTriple(); + const llvm::Target *TheTarget(llvm::TargetRegistry::lookupTarget(TT, Error)); + + OwningPtr MAI(TheTarget->createMCAsmInfo(TT)); + OwningPtr MRI(TheTarget->createMCRegInfo(TT)); + OwningPtr MOFI(new llvm::MCObjectFileInfo()); + OwningPtr + STI(TheTarget->createMCSubtargetInfo(TT, "", "")); + + for (unsigned i = 0, e = PatchedAsmStrings.size(); i != e; ++i) { + llvm::SourceMgr SrcMgr; + llvm::MCContext Ctx(*MAI, *MRI, MOFI.get(), &SrcMgr); + llvm::MemoryBuffer *Buffer = + llvm::MemoryBuffer::getMemBuffer(PatchedAsmStrings[i], ""); + + // Tell SrcMgr about this buffer, which is what the parser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, llvm::SMLoc()); + + OwningPtr Str; + OwningPtr + Parser(createMCAsmParser(SrcMgr, Ctx, *Str.get(), *MAI)); + OwningPtr + TargetParser(TheTarget->createMCAsmParser(*STI, *Parser)); + // Change to the Intel dialect. + Parser->setAssemblerDialect(1); + Parser->setTargetParser(*TargetParser.get()); + + // TODO: Start parsing. + } + + MSAsmStmt *NS = + new (Context) MSAsmStmt(Context, AsmLoc, IsSimple, /* IsVolatile */ true, + AsmToks, LineEnds, AsmString, Clobbers, EndLoc); + + return Owned(NS); +} + StmtResult Sema::ActOnObjCAtCatchStmt(SourceLocation AtLoc, SourceLocation RParen, Decl *Parm, @@ -2420,15 +2996,13 @@ Sema::ActOnObjCAtTryStmt(SourceLocation AtLoc, Stmt *Try, Finally)); } -StmtResult Sema::BuildObjCAtThrowStmt(SourceLocation AtLoc, - Expr *Throw) { +StmtResult Sema::BuildObjCAtThrowStmt(SourceLocation AtLoc, Expr *Throw) { if (Throw) { - Throw = MaybeCreateExprWithCleanups(Throw); ExprResult Result = DefaultLvalueConversion(Throw); if (Result.isInvalid()) return StmtError(); - Throw = Result.take(); + Throw = MaybeCreateExprWithCleanups(Result.take()); QualType ThrowType = Throw->getType(); // Make sure the expression type is an ObjC pointer or "void *". if (!ThrowType->isDependentType() && @@ -2458,7 +3032,6 @@ Sema::ActOnObjCAtThrowStmt(SourceLocation AtLoc, Expr *Throw, if (!AtCatchParent) return StmtError(Diag(AtLoc, diag::error_rethrow_used_outside_catch)); } - return BuildObjCAtThrowStmt(AtLoc, Throw); } @@ -2646,17 +3219,17 @@ StmtResult Sema::BuildMSDependentExistsStmt(SourceLocation KeywordLoc, Stmt *Nested) { return new (Context) MSDependentExistsStmt(KeywordLoc, IsIfExists, - QualifierLoc, NameInfo, + QualifierLoc, NameInfo, cast(Nested)); } -StmtResult Sema::ActOnMSDependentExistsStmt(SourceLocation KeywordLoc, +StmtResult Sema::ActOnMSDependentExistsStmt(SourceLocation KeywordLoc, bool IsIfExists, - CXXScopeSpec &SS, + CXXScopeSpec &SS, UnqualifiedId &Name, Stmt *Nested) { - return BuildMSDependentExistsStmt(KeywordLoc, IsIfExists, + return BuildMSDependentExistsStmt(KeywordLoc, IsIfExists, SS.getWithLocInContext(Context), GetNameFromUnqualifiedId(Name), Nested); diff --git a/lib/Sema/SemaStmtAttr.cpp b/lib/Sema/SemaStmtAttr.cpp index 21c3297..3c15b7a 100644 --- a/lib/Sema/SemaStmtAttr.cpp +++ b/lib/Sema/SemaStmtAttr.cpp @@ -15,29 +15,55 @@ #include "TargetAttributesSema.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" #include "clang/Sema/DelayedDiagnostic.h" #include "clang/Sema/Lookup.h" +#include "clang/Sema/ScopeInfo.h" #include "llvm/ADT/StringExtras.h" + using namespace clang; using namespace sema; +static Attr *handleFallThroughAttr(Sema &S, Stmt *St, const AttributeList &A, + SourceRange Range) { + if (!isa(St)) { + S.Diag(A.getRange().getBegin(), diag::err_fallthrough_attr_wrong_target) + << St->getLocStart(); + if (isa(St)) { + SourceLocation L = Lexer::getLocForEndOfToken(Range.getEnd(), 0, + S.getSourceManager(), S.getLangOpts()); + S.Diag(L, diag::note_fallthrough_insert_semi_fixit) + << FixItHint::CreateInsertion(L, ";"); + } + return 0; + } + if (S.getCurFunction()->SwitchStack.empty()) { + S.Diag(A.getRange().getBegin(), diag::err_fallthrough_attr_outside_switch); + return 0; + } + return ::new (S.Context) FallThroughAttr(A.getRange(), S.Context); +} + -static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const AttributeList &A) { +static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const AttributeList &A, + SourceRange Range) { switch (A.getKind()) { + case AttributeList::AT_FallThrough: + return handleFallThroughAttr(S, St, A, Range); default: // if we're here, then we parsed an attribute, but didn't recognize it as a // statement attribute => it is declaration attribute - S.Diag(A.getRange().getBegin(), diag::warn_attribute_invalid_on_stmt) << - A.getName()->getName(); + S.Diag(A.getRange().getBegin(), diag::warn_attribute_invalid_on_stmt) + << A.getName()->getName() << St->getLocStart(); return 0; } } StmtResult Sema::ProcessStmtAttributes(Stmt *S, AttributeList *AttrList, SourceRange Range) { - AttrVec Attrs; + SmallVector Attrs; for (const AttributeList* l = AttrList; l; l = l->getNext()) { - if (Attr *a = ProcessStmtAttribute(*this, S, *l)) + if (Attr *a = ProcessStmtAttribute(*this, S, *l, Range)) Attrs.push_back(a); } diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp index 51ce2a1..c8e4501 100644 --- a/lib/Sema/SemaTemplate.cpp +++ b/lib/Sema/SemaTemplate.cpp @@ -354,12 +354,14 @@ void Sema::LookupTemplateName(LookupResult &Found, return; } - if (S && !ObjectType.isNull() && !ObjectTypeSearchedInScope) { - // C++ [basic.lookup.classref]p1: + if (S && !ObjectType.isNull() && !ObjectTypeSearchedInScope && + !(getLangOpts().CPlusPlus0x && !Found.empty())) { + // C++03 [basic.lookup.classref]p1: // [...] If the lookup in the class of the object expression finds a // template, the name is also looked up in the context of the entire // postfix-expression and [...] // + // Note: C++11 does not perform this second lookup. LookupResult FoundOuter(*this, Found.getLookupName(), Found.getNameLoc(), LookupOrdinaryName); LookupName(FoundOuter, S); @@ -743,7 +745,7 @@ Decl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, } /// ActOnTemplateTemplateParameter - Called when a C++ template template -/// parameter (e.g. T in template