diff options
author | dim <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 |
commit | aa45f148926e3461a1fd8b10c990f0a51a908cc9 (patch) | |
tree | 909310b2e05119d1d6efda049977042abbb58bb1 /lib/Target/X86 | |
parent | 169d2bd06003c39970bc94c99669a34b61bb7e45 (diff) | |
download | FreeBSD-src-aa45f148926e3461a1fd8b10c990f0a51a908cc9.zip FreeBSD-src-aa45f148926e3461a1fd8b10c990f0a51a908cc9.tar.gz |
Vendor import of llvm tags/RELEASE_33/final r183502 (effectively, 3.3
release):
http://llvm.org/svn/llvm-project/llvm/tags/RELEASE_33/final@183502
Diffstat (limited to 'lib/Target/X86')
27 files changed, 1774 insertions, 803 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index e462322..68908ab 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -33,17 +33,451 @@ using namespace llvm; namespace { struct X86Operand; +static const char OpPrecedence[] = { + 0, // IC_PLUS + 0, // IC_MINUS + 1, // IC_MULTIPLY + 1, // IC_DIVIDE + 2, // IC_RPAREN + 3, // IC_LPAREN + 0, // IC_IMM + 0 // IC_REGISTER +}; + class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; ParseInstructionInfo *InstInfo; private: + enum InfixCalculatorTok { + IC_PLUS = 0, + IC_MINUS, + IC_MULTIPLY, + IC_DIVIDE, + IC_RPAREN, + IC_LPAREN, + IC_IMM, + IC_REGISTER + }; + + class InfixCalculator { + typedef std::pair< InfixCalculatorTok, int64_t > ICToken; + SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; + SmallVector<ICToken, 4> PostfixStack; + + public: + int64_t popOperand() { + assert (!PostfixStack.empty() && "Poped an empty stack!"); + ICToken Op = PostfixStack.pop_back_val(); + assert ((Op.first == IC_IMM || Op.first == IC_REGISTER) + && "Expected and immediate or register!"); + return Op.second; + } + void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { + assert ((Op == IC_IMM || Op == IC_REGISTER) && + "Unexpected operand!"); + PostfixStack.push_back(std::make_pair(Op, Val)); + } + + void popOperator() { InfixOperatorStack.pop_back_val(); } + void pushOperator(InfixCalculatorTok Op) { + // Push the new operator if the stack is empty. + if (InfixOperatorStack.empty()) { + InfixOperatorStack.push_back(Op); + return; + } + + // Push the new operator if it has a higher precedence than the operator + // on the top of the stack or the operator on the top of the stack is a + // left parentheses. + unsigned Idx = InfixOperatorStack.size() - 1; + InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; + if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { + InfixOperatorStack.push_back(Op); + return; + } + + // The operator on the top of the stack has higher precedence than the + // new operator. + unsigned ParenCount = 0; + while (1) { + // Nothing to process. + if (InfixOperatorStack.empty()) + break; + + Idx = InfixOperatorStack.size() - 1; + StackOp = InfixOperatorStack[Idx]; + if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) + break; + + // If we have an even parentheses count and we see a left parentheses, + // then stop processing. + if (!ParenCount && StackOp == IC_LPAREN) + break; + + if (StackOp == IC_RPAREN) { + ++ParenCount; + InfixOperatorStack.pop_back_val(); + } else if (StackOp == IC_LPAREN) { + --ParenCount; + InfixOperatorStack.pop_back_val(); + } else { + InfixOperatorStack.pop_back_val(); + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } + } + // Push the new operator. + InfixOperatorStack.push_back(Op); + } + int64_t execute() { + // Push any remaining operators onto the postfix stack. + while (!InfixOperatorStack.empty()) { + InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); + if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) + PostfixStack.push_back(std::make_pair(StackOp, 0)); + } + + if (PostfixStack.empty()) + return 0; + + SmallVector<ICToken, 16> OperandStack; + for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { + ICToken Op = PostfixStack[i]; + if (Op.first == IC_IMM || Op.first == IC_REGISTER) { + OperandStack.push_back(Op); + } else { + assert (OperandStack.size() > 1 && "Too few operands."); + int64_t Val; + ICToken Op2 = OperandStack.pop_back_val(); + ICToken Op1 = OperandStack.pop_back_val(); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_PLUS: + Val = Op1.second + Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MINUS: + Val = Op1.second - Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_MULTIPLY: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Multiply operation with an immediate and a register!"); + Val = Op1.second * Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + case IC_DIVIDE: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Divide operation with an immediate and a register!"); + assert (Op2.second != 0 && "Division by zero!"); + Val = Op1.second / Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; + } + } + } + assert (OperandStack.size() == 1 && "Expected a single result."); + return OperandStack.pop_back_val().second; + } + }; + + enum IntelExprState { + IES_PLUS, + IES_MINUS, + IES_MULTIPLY, + IES_DIVIDE, + IES_LBRAC, + IES_RBRAC, + IES_LPAREN, + IES_RPAREN, + IES_REGISTER, + IES_INTEGER, + IES_IDENTIFIER, + IES_ERROR + }; + + class IntelExprStateMachine { + IntelExprState State, PrevState; + unsigned BaseReg, IndexReg, TmpReg, Scale; + int64_t Imm; + const MCExpr *Sym; + StringRef SymName; + bool StopOnLBrac, AddImmPrefix; + InfixCalculator IC; + InlineAsmIdentifierInfo Info; + public: + IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : + State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), + Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), + AddImmPrefix(addimmprefix) { Info.clear(); } + + unsigned getBaseReg() { return BaseReg; } + unsigned getIndexReg() { return IndexReg; } + unsigned getScale() { return Scale; } + const MCExpr *getSym() { return Sym; } + StringRef getSymName() { return SymName; } + int64_t getImm() { return Imm + IC.execute(); } + bool isValidEndState() { return State == IES_RBRAC; } + bool getStopOnLBrac() { return StopOnLBrac; } + bool getAddImmPrefix() { return AddImmPrefix; } + bool hadError() { return State == IES_ERROR; } + + InlineAsmIdentifierInfo &getIdentifierInfo() { + return Info; + } + + void onPlus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + case IES_REGISTER: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onMinus() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + case IES_RPAREN: + case IES_LBRAC: + case IES_RBRAC: + case IES_INTEGER: + case IES_REGISTER: + State = IES_MINUS; + // Only push the minus operator if it is not a unary operator. + if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || + CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || + CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + IC.pushOperator(IC_MINUS); + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onRegister(unsigned Reg) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_LPAREN: + State = IES_REGISTER; + TmpReg = Reg; + IC.pushOperand(IC_REGISTER); + break; + case IES_MULTIPLY: + // Index Register - Scale * Register + if (PrevState == IES_INTEGER) { + assert (!IndexReg && "IndexReg already set!"); + State = IES_REGISTER; + IndexReg = Reg; + // Get the scale and replace the 'Scale * Register' with '0'. + Scale = IC.popOperand(); + IC.pushOperand(IC_IMM); + IC.popOperator(); + } else { + State = IES_ERROR; + } + break; + } + PrevState = CurrState; + } + void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + State = IES_INTEGER; + Sym = SymRef; + SymName = SymRefName; + IC.pushOperand(IC_IMM); + break; + } + } + void onInteger(int64_t TmpInt) { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_DIVIDE: + case IES_MULTIPLY: + case IES_LPAREN: + State = IES_INTEGER; + if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { + // Index Register - Register * Scale + assert (!IndexReg && "IndexReg already set!"); + IndexReg = TmpReg; + Scale = TmpInt; + // Get the scale and replace the 'Register * Scale' with '0'. + IC.popOperator(); + } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC) && + CurrState == IES_MINUS) { + // Unary minus. No need to pop the minus operand because it was never + // pushed. + IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. + } else { + IC.pushOperand(IC_IMM, TmpInt); + } + break; + } + PrevState = CurrState; + } + void onStar() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_MULTIPLY; + IC.pushOperator(IC_MULTIPLY); + break; + } + } + void onDivide() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_DIVIDE; + IC.pushOperator(IC_DIVIDE); + break; + } + } + void onLBrac() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_RBRAC: + State = IES_PLUS; + IC.pushOperator(IC_PLUS); + break; + } + } + void onRBrac() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RBRAC; + if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { + // If we already have a BaseReg, then assume this is the IndexReg with + // a scale of 1. + if (!BaseReg) { + BaseReg = TmpReg; + } else { + assert (!IndexReg && "BaseReg/IndexReg already set!"); + IndexReg = TmpReg; + Scale = 1; + } + } + break; + } + PrevState = CurrState; + } + void onLParen() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_MINUS: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_LPAREN: + // FIXME: We don't handle this type of unary minus, yet. + if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC) && + CurrState == IES_MINUS) { + State = IES_ERROR; + break; + } + State = IES_LPAREN; + IC.pushOperator(IC_LPAREN); + break; + } + PrevState = CurrState; + } + void onRParen() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_REGISTER: + case IES_RPAREN: + State = IES_RPAREN; + IC.pushOperator(IC_RPAREN); + break; + } + } + }; + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), + ArrayRef<SMRange> Ranges = None, bool MatchingInlineAsm = false) { if (MatchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); @@ -57,21 +491,25 @@ private: X86Operand *ParseOperand(); X86Operand *ParseATTOperand(); X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); - X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp, + X86Operand *ParseIntelOffsetOfOperator(); + X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); + X86Operand *ParseIntelOperator(unsigned OpKind); + X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp, SMLoc StartLoc); - X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp, - unsigned Size); - X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp, - SMLoc &IdentStart); - X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); + X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, unsigned Size); + X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, SMLoc &End); - X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, - SMLoc SizeDirLoc, unsigned Size); + X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); - bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, - SmallString<64> &Err); + X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, + unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); @@ -101,6 +539,10 @@ private: setAvailableFeatures(FB); } + bool isParsingIntelSyntax() { + return getParser().getAssemblerDialect(); + } + /// @name Auto-generated Matcher Functions /// { @@ -123,10 +565,6 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); virtual bool ParseDirective(AsmToken DirectiveID); - - bool isParsingIntelSyntax() { - return getParser().getAssemblerDialect(); - } }; } // end anonymous namespace @@ -176,6 +614,8 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; + StringRef SymName; + void *OpDecl; bool AddressOf; struct TokOp { @@ -210,6 +650,9 @@ struct X86Operand : public MCParsedAsmOperand { X86Operand(KindTy K, SMLoc Start, SMLoc End) : Kind(K), StartLoc(Start), EndLoc(End) {} + StringRef getSymName() { return SymName; } + void *getOpDecl() { return OpDecl; } + /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. @@ -473,11 +916,15 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, bool AddressOf = false, - SMLoc OffsetOfLoc = SMLoc()) { + SMLoc OffsetOfLoc = SMLoc(), + StringRef SymName = StringRef(), + void *OpDecl = 0) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; + Res->SymName = SymName; + Res->OpDecl = OpDecl; return Res; } @@ -489,7 +936,8 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, StringRef SymName = StringRef(), + void *OpDecl = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -497,7 +945,9 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->AddressOf = false; + Res->SymName = SymName; + Res->OpDecl = OpDecl; + Res->AddressOf = false; return Res; } @@ -505,7 +955,9 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0) { + unsigned Size = 0, + StringRef SymName = StringRef(), + void *OpDecl = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -520,7 +972,9 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->AddressOf = false; + Res->SymName = SymName; + Res->OpDecl = OpDecl; + Res->AddressOf = false; return Res; } }; @@ -676,306 +1130,104 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } -enum IntelBracExprState { - IBES_START, - IBES_LBRAC, - IBES_RBRAC, - IBES_REGISTER, - IBES_REGISTER_STAR, - IBES_REGISTER_STAR_INTEGER, - IBES_INTEGER, - IBES_INTEGER_STAR, - IBES_INDEX_REGISTER, - IBES_IDENTIFIER, - IBES_DISP_EXPR, - IBES_MINUS, - IBES_ERROR -}; - -class IntelBracExprStateMachine { - IntelBracExprState State; - unsigned BaseReg, IndexReg, Scale; - int64_t Disp; - - unsigned TmpReg; - int64_t TmpInteger; - - bool isPlus; - -public: - IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) : - State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp), - TmpReg(0), TmpInteger(0), isPlus(true) {} - - unsigned getBaseReg() { return BaseReg; } - unsigned getIndexReg() { return IndexReg; } - unsigned getScale() { return Scale; } - int64_t getDisp() { return Disp; } - bool isValidEndState() { return State == IBES_RBRAC; } - - void onPlus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_START; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_START; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_START; - break; - } - isPlus = true; - } - void onMinus() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_MINUS; - break; - case IBES_INTEGER: - State = IBES_START; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_START; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_START; - break; - } - isPlus = false; - } - void onRegister(unsigned Reg) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_REGISTER; - TmpReg = Reg; - break; - case IBES_INTEGER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INDEX_REGISTER; - IndexReg = Reg; - Scale = TmpInteger; - break; - } - } - void onDispExpr() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_DISP_EXPR; - break; +X86Operand * +X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, + unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info){ + if (isa<MCSymbolRefExpr>(Disp)) { + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!Info.IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true, + SMLoc(), Identifier, Info.OpDecl); } - } - void onInteger(int64_t TmpInt) { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_START: - State = IBES_INTEGER; - TmpInteger = TmpInt; - break; - case IBES_MINUS: - State = IBES_INTEGER; - TmpInteger = TmpInt; - break; - case IBES_REGISTER_STAR: - assert (!IndexReg && "IndexReg already set!"); - State = IBES_INDEX_REGISTER; - IndexReg = TmpReg; - Scale = TmpInt; - break; - } - } - void onStar() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_INTEGER: - State = IBES_INTEGER_STAR; - break; - case IBES_REGISTER: - State = IBES_REGISTER_STAR; - break; - } - } - void onLBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_RBRAC: - State = IBES_START; - isPlus = true; - break; - } - } - void onRBrac() { - switch (State) { - default: - State = IBES_ERROR; - break; - case IBES_DISP_EXPR: - State = IBES_RBRAC; - break; - case IBES_INTEGER: - State = IBES_RBRAC; - if (isPlus) - Disp += TmpInteger; - else - Disp -= TmpInteger; - break; - case IBES_REGISTER: - State = IBES_RBRAC; - // If we already have a BaseReg, then assume this is the IndexReg with a - // scale of 1. - if (!BaseReg) { - BaseReg = TmpReg; - } else { - assert (!IndexReg && "BaseReg/IndexReg already set!"); - IndexReg = TmpReg; - Scale = 1; - } - break; - case IBES_INDEX_REGISTER: - State = IBES_RBRAC; - break; - } - } -}; - -X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, - SMLoc End, SMLoc SizeDirLoc, - unsigned Size) { - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); if (!Size) { - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; + Size = Info.Type * 8; // Size is in terms of bits in this context. + if (Size) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start, + /*Len=*/0, Size)); } } - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - if (NeedSizeDir) - InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, - /*Len*/0, Size)); - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to + // if we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size); + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, Identifier, Info.OpDecl); } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, - uint64_t ImmDisp, - unsigned Size) { - const AsmToken &Tok = Parser.getTok(); - SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); - - // Eat '[' - if (getLexer().isNot(AsmToken::LBrac)) - return ErrorOperand(Start, "Expected '[' token!"); - Parser.Lex(); - - unsigned TmpReg = 0; - - // Try to handle '[' 'Symbol' ']' - if (getLexer().is(AsmToken::Identifier)) { - if (ParseRegister(TmpReg, Start, End)) { - const MCExpr *Disp; - SMLoc IdentStart = Tok.getLoc(); - if (getParser().parseExpression(Disp, End)) - return 0; - - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart)) - return Err; - - if (getLexer().isNot(AsmToken::RBrac)) - return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); - - // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'. - if (ImmDisp) - return ErrorOperand(Start, "Unsupported immediate displacement!"); - - // Adjust the EndLoc due to the ']'. - End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); - Parser.Lex(); - if (!isParsingInlineAsm()) - return X86Operand::CreateMem(Disp, Start, End, Size); - - // We want the size directive before the '['. - SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); - return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); +static void +RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites, + StringRef SymName, int64_t ImmDisp, + int64_t FinalImmDisp, SMLoc &BracLoc, + SMLoc &StartInBrac, SMLoc &End) { + // Remove the '[' and ']' from the IR string. + AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1)); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1)); + + // If ImmDisp is non-zero, then we parsed a displacement before the + // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp]) + // If ImmDisp doesn't match the displacement computed by the state machine + // then we have an additional displacement in the bracketed expression. + if (ImmDisp != FinalImmDisp) { + if (ImmDisp) { + // We have an immediate displacement before the bracketed expression. + // Adjust this to match the final immediate displacement. + bool Found = false; + for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() > BracLoc.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) { + assert (!Found && "ImmDisp already rewritten."); + (*I).Kind = AOK_Imm; + (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer(); + (*I).Val = FinalImmDisp; + Found = true; + break; + } + } + assert (Found && "Unable to rewrite ImmDisp."); + } else { + // We have a symbolic and an immediate displacement, but no displacement + // before the bracketed expression. Put the immediate displacement + // before the bracketed expression. + AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp)); } } + // Remove all the ImmPrefix rewrites within the brackets. + for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(), + E = AsmRewrites->end(); I != E; ++I) { + if ((*I).Loc.getPointer() < StartInBrac.getPointer()) + continue; + if ((*I).Kind == AOK_ImmPrefix) + (*I).Kind = AOK_Delete; + } + const char *SymLocPtr = SymName.data(); + // Skip everything before the symbol. + if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) { + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len)); + } + // Skip everything after the symbol. + if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) { + SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size()); + assert(Len > 0 && "Expected a non-negative length."); + AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len)); + } +} - // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an - // immediate displacement before the bracketed expression. - bool Done = false; - IntelBracExprStateMachine SM(Parser, ImmDisp); - - // If we parsed a register, then the end loc has already been set and - // the identifier has already been lexed. We also need to update the - // state. - if (TmpReg) - SM.onRegister(TmpReg); +X86Operand * +X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { + const AsmToken &Tok = Parser.getTok(); - const MCExpr *Disp = 0; + bool Done = false; while (!Done) { bool UpdateLocLex = true; @@ -983,6 +1235,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, // identifier. Don't try an parse it as a register. if (Tok.getString().startswith(".")) break; + + // If we're parsing an immediate expression, we don't expect a '['. + if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) + break; switch (getLexer().getKind()) { default: { @@ -992,139 +1248,185 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } return ErrorOperand(Tok.getLoc(), "Unexpected token!"); } + case AsmToken::EndOfStatement: { + Done = true; + break; + } case AsmToken::Identifier: { - // This could be a register or a displacement expression. - if(!ParseRegister(TmpReg, Start, End)) { + // This could be a register or a symbolic displacement. + unsigned TmpReg; + const MCExpr *Val; + SMLoc IdentLoc = Tok.getLoc(); + StringRef Identifier = Tok.getString(); + if(!ParseRegister(TmpReg, IdentLoc, End)) { SM.onRegister(TmpReg); UpdateLocLex = false; break; - } else if (!getParser().parseExpression(Disp, End)) { - SM.onDispExpr(); + } else { + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); + } else { + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) + return Err; + } + SM.onIdentifierExpr(Val, Identifier); UpdateLocLex = false; break; } return ErrorOperand(Tok.getLoc(), "Unexpected identifier!"); } - case AsmToken::Integer: { - int64_t Val = Tok.getIntVal(); - SM.onInteger(Val); + case AsmToken::Integer: + if (isParsingInlineAsm() && SM.getAddImmPrefix()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, + Tok.getLoc())); + SM.onInteger(Tok.getIntVal()); break; - } case AsmToken::Plus: SM.onPlus(); break; case AsmToken::Minus: SM.onMinus(); break; case AsmToken::Star: SM.onStar(); break; + case AsmToken::Slash: SM.onDivide(); break; case AsmToken::LBrac: SM.onLBrac(); break; case AsmToken::RBrac: SM.onRBrac(); break; + case AsmToken::LParen: SM.onLParen(); break; + case AsmToken::RParen: SM.onRParen(); break; } + if (SM.hadError()) + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); + if (!Done && UpdateLocLex) { End = Tok.getLoc(); Parser.Lex(); // Consume the token. } } + return 0; +} - if (!Disp) - Disp = MCConstantExpr::Create(SM.getDisp(), getContext()); +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, + unsigned Size) { + const AsmToken &Tok = Parser.getTok(); + SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(BracLoc, "Expected '[' token!"); + Parser.Lex(); // Eat '[' + + SMLoc StartInBrac = Tok.getLoc(); + // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We + // may have already parsed an immediate displacement before the bracketed + // expression. + IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); + if (X86Operand *Err = ParseIntelExpression(SM, End)) + return Err; + + const MCExpr *Disp; + if (const MCExpr *Sym = SM.getSym()) { + // A symbolic displacement. + Disp = Sym; + if (isParsingInlineAsm()) + RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(), + ImmDisp, SM.getImm(), BracLoc, StartInBrac, + End); + } else { + // An immediate displacement only. + Disp = MCConstantExpr::Create(SM.getImm(), getContext()); + } // Parse the dot operator (e.g., [ebx].foo.bar). if (Tok.getString().startswith(".")) { - SmallString<64> Err; const MCExpr *NewDisp; - if (ParseIntelDotOperator(Disp, &NewDisp, Err)) - return ErrorOperand(Tok.getLoc(), Err); + if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp)) + return Err; - End = Parser.getTok().getEndLoc(); + End = Tok.getEndLoc(); Parser.Lex(); // Eat the field. Disp = NewDisp; } int BaseReg = SM.getBaseReg(); int IndexReg = SM.getIndexReg(); - - // handle [-42] - if (!BaseReg && !IndexReg) { - if (!SegReg) - return X86Operand::CreateMem(Disp, Start, End); - else - return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + int Scale = SM.getScale(); + if (!isParsingInlineAsm()) { + // handle [-42] + if (!BaseReg && !IndexReg) { + if (!SegReg) + return X86Operand::CreateMem(Disp, Start, End, Size); + else + return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size); + } + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size); } - int Scale = SM.getScale(); - return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, - Start, End, Size); + InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); + return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start, + End, Size, SM.getSymName(), Info); } // Inline assembly may use variable names with namespace alias qualifiers. -X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp, - SMLoc &IdentStart) { - // We should only see Foo::Bar if we're parsing inline assembly. - if (!isParsingInlineAsm()) - return 0; - - // If we don't see a ':' then there can't be a qualifier. - if (getLexer().isNot(AsmToken::Colon)) - return 0; +X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, + StringRef &Identifier, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedOperand, + SMLoc &End) { + assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); + Val = 0; + StringRef LineBuf(Identifier.data()); + SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); - bool Done = false; const AsmToken &Tok = Parser.getTok(); - SMLoc IdentEnd = Tok.getEndLoc(); - while (!Done) { - switch (getLexer().getKind()) { - default: - Done = true; - break; - case AsmToken::Colon: - getLexer().Lex(); // Consume ':'. - if (getLexer().isNot(AsmToken::Colon)) - return ErrorOperand(Tok.getLoc(), "Expected ':' token!"); - getLexer().Lex(); // Consume second ':'. - if (getLexer().isNot(AsmToken::Identifier)) - return ErrorOperand(Tok.getLoc(), "Expected an identifier token!"); - break; - case AsmToken::Identifier: - IdentEnd = Tok.getEndLoc(); - getLexer().Lex(); // Consume the identifier. - break; - } + + // Advance the token stream until the end of the current token is + // after the end of what the frontend claimed. + const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); + while (true) { + End = Tok.getEndLoc(); + getLexer().Lex(); + + assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?"); + if (End.getPointer() == EndPtr) break; } - size_t Len = IdentEnd.getPointer() - IdentStart.getPointer(); - StringRef Identifier(IdentStart.getPointer(), Len); + + // Create the symbol reference. + Identifier = LineBuf; MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); + Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext()); return 0; } /// ParseIntelMemOperand - Parse intel style memory operand. X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, - uint64_t ImmDisp, + int64_t ImmDisp, SMLoc Start) { const AsmToken &Tok = Parser.getTok(); SMLoc End; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { - Parser.Lex(); - assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") && - "Unexpected token!"); - Parser.Lex(); + Parser.Lex(); // Eat operand size (e.g., byte, word). + if (Tok.getString() != "PTR" && Tok.getString() != "ptr") + return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!"); + Parser.Lex(); // Eat ptr. } // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. if (getLexer().is(AsmToken::Integer)) { - const AsmToken &IntTok = Parser.getTok(); if (isParsingInlineAsm()) InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, - IntTok.getLoc())); - uint64_t ImmDisp = IntTok.getIntVal(); + Tok.getLoc())); + int64_t ImmDisp = Tok.getIntVal(); Parser.Lex(); // Eat the integer. if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); } if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); if (!ParseRegister(SegReg, Start, End)) { // Handel SegReg : [ ... ] @@ -1133,37 +1435,37 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, Parser.Lex(); // Eat : if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, ImmDisp, Size); + return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size); } - const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); - SMLoc IdentStart = Tok.getLoc(); - if (getParser().parseExpression(Disp, End)) - return 0; + const MCExpr *Val; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) + return ErrorOperand(Tok.getLoc(), "Unexpected token!"); - if (!isParsingInlineAsm()) - return X86Operand::CreateMem(Disp, Start, End, Size); + return X86Operand::CreateMem(Val, Start, End, Size); + } - if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart)) + InlineAsmIdentifierInfo Info; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) return Err; - - return CreateMemForInlineAsm(Disp, Start, End, Start, Size); + return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, + /*Scale=*/1, Start, End, Size, Identifier, Info); } /// Parse the '.' operator. -bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, - const MCExpr **NewDisp, - SmallString<64> &Err) { - AsmToken Tok = *&Parser.getTok(); - uint64_t OrigDispVal, DotDispVal; +X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, + const MCExpr *&NewDisp) { + const AsmToken &Tok = Parser.getTok(); + int64_t OrigDispVal, DotDispVal; // FIXME: Handle non-constant expressions. - if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) { + if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) OrigDispVal = OrigDisp->getValue(); - } else { - Err = "Non-constant offsets are not supported!"; - return true; - } + else + return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!"); // Drop the '.'. StringRef DotDispStr = Tok.getString().drop_front(1); @@ -1173,23 +1475,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); DotDispVal = DotDisp.getZExtValue(); - } else if (Tok.is(AsmToken::Identifier)) { - // We should only see an identifier when parsing the original inline asm. - // The front-end should rewrite this in terms of immediates. - assert (isParsingInlineAsm() && "Unexpected field name!"); - + } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { unsigned DotDisp; std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - DotDisp)) { - Err = "Unable to lookup field reference!"; - return true; - } + DotDisp)) + return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!"); DotDispVal = DotDisp; - } else { - Err = "Unexpected token type!"; - return true; - } + } else + return ErrorOperand(Tok.getLoc(), "Unexpected token type!"); if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data()); @@ -1199,22 +1493,24 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, Val)); } - *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); - return false; + NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext()); + return 0; } /// Parse the 'offset' operator. This operator is used to specify the /// location rather then the content of a variable. -X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { - SMLoc OffsetOfLoc = Start; +X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { + const AsmToken &Tok = Parser.getTok(); + SMLoc OffsetOfLoc = Tok.getLoc(); Parser.Lex(); // Eat offset. - Start = Parser.getTok().getLoc(); - assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); - SMLoc End; const MCExpr *Val; - if (getParser().parseExpression(Val, End)) - return ErrorOperand(Start, "Unable to parse expression!"); + InlineAsmIdentifierInfo Info; + SMLoc Start = Tok.getLoc(), End; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ false, End)) + return Err; // Don't emit the offset operator. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); @@ -1224,7 +1520,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) { // the size of a pointer. unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc); + OffsetOfLoc, Identifier, Info.OpDecl); } enum IntelOperatorKind { @@ -1239,34 +1535,25 @@ enum IntelOperatorKind { /// variable. A variable's size is the product of its LENGTH and TYPE. The /// TYPE operator returns the size of a C or C++ type or variable. If the /// variable is an array, TYPE returns the size of a single element. -X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { - SMLoc TypeLoc = Start; - Parser.Lex(); // Eat offset. - Start = Parser.getTok().getLoc(); - assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier"); - - SMLoc End; - const MCExpr *Val; - if (getParser().parseExpression(Val, End)) - return 0; +X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { + const AsmToken &Tok = Parser.getTok(); + SMLoc TypeLoc = Tok.getLoc(); + Parser.Lex(); // Eat operator. + + const MCExpr *Val = 0; + InlineAsmIdentifierInfo Info; + SMLoc Start = Tok.getLoc(), End; + StringRef Identifier = Tok.getString(); + if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info, + /*Unevaluated*/ true, End)) + return Err; - unsigned Length = 0, Size = 0, Type = 0; - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - bool IsVarDecl; - if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length, - Size, Type, IsVarDecl)) - return ErrorOperand(Start, "Unable to lookup expr!"); - } - unsigned CVal; + unsigned CVal = 0; switch(OpKind) { default: llvm_unreachable("Unexpected operand kind!"); - case IOK_LENGTH: CVal = Length; break; - case IOK_SIZE: CVal = Size; break; - case IOK_TYPE: CVal = Type; break; + case IOK_LENGTH: CVal = Info.Length; break; + case IOK_SIZE: CVal = Info.Size; break; + case IOK_TYPE: CVal = Info.Type; break; } // Rewrite the type operator and the C or C++ type or variable in terms of an @@ -1279,44 +1566,54 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { } X86Operand *X86AsmParser::ParseIntelOperand() { - SMLoc Start = Parser.getTok().getLoc(), End; - StringRef AsmTokStr = Parser.getTok().getString(); + const AsmToken &Tok = Parser.getTok(); + SMLoc Start = Tok.getLoc(), End; // Offset, length, type and size operators. if (isParsingInlineAsm()) { + StringRef AsmTokStr = Tok.getString(); if (AsmTokStr == "offset" || AsmTokStr == "OFFSET") - return ParseIntelOffsetOfOperator(Start); + return ParseIntelOffsetOfOperator(); if (AsmTokStr == "length" || AsmTokStr == "LENGTH") - return ParseIntelOperator(Start, IOK_LENGTH); + return ParseIntelOperator(IOK_LENGTH); if (AsmTokStr == "size" || AsmTokStr == "SIZE") - return ParseIntelOperator(Start, IOK_SIZE); + return ParseIntelOperator(IOK_SIZE); if (AsmTokStr == "type" || AsmTokStr == "TYPE") - return ParseIntelOperator(Start, IOK_TYPE); + return ParseIntelOperator(IOK_TYPE); } // Immediate. - if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || - getLexer().is(AsmToken::Minus)) { - const MCExpr *Val; - bool isInteger = getLexer().is(AsmToken::Integer); - if (!getParser().parseExpression(Val, End)) { - if (isParsingInlineAsm()) + if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || + getLexer().is(AsmToken::LParen)) { + AsmToken StartTok = Tok; + IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, + /*AddImmPrefix=*/false); + if (X86Operand *Err = ParseIntelExpression(SM, End)) + return Err; + + int64_t Imm = SM.getImm(); + if (isParsingInlineAsm()) { + unsigned Len = Tok.getLoc().getPointer() - Start.getPointer(); + if (StartTok.getString().size() == Len) + // Just add a prefix if this wasn't a complex immediate expression. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); - // Immediate. - if (getLexer().isNot(AsmToken::LBrac)) - return X86Operand::CreateImm(Val, Start, End); - - // Only positive immediates are valid. - if (!isInteger) { - Error(Parser.getTok().getLoc(), "expected a positive immediate " - "displacement before bracketed expr."); - return 0; - } + else + // Otherwise, rewrite the complex expression as a single immediate. + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm)); + } - // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. - if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue()) - return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start); + if (getLexer().isNot(AsmToken::LBrac)) { + const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext()); + return X86Operand::CreateImm(ImmExpr, Start, End); } + + // Only positive immediates are valid. + if (Imm < 0) + return ErrorOperand(Start, "expected a positive immediate displacement " + "before bracketed expr."); + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start); } // Register. @@ -1907,7 +2204,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); - ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); + ArrayRef<SMRange> EmptyRanges = None; // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index d14899d..7cb71f0 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -33,6 +33,7 @@ set(sources X86TargetObjectFile.cpp X86TargetTransformInfo.cpp X86VZeroUpper.cpp + X86FixupLEAs.cpp ) if( CMAKE_CL_64 ) diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 3669560..d8f7278 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -20,6 +20,7 @@ #include "X86MCTargetDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCInstrInfo.h" namespace llvm { @@ -41,7 +42,6 @@ namespace X86 { AddrNumOperands = 5 }; } // end namespace X86; - /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -274,11 +274,12 @@ namespace X86II { //// MRM_XX - A mod/rm byte of exactly 0xXX. MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, - MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, - MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, - MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50, - MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54, - MRM_DD = 55, MRM_DE = 56, MRM_DF = 57, + MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40, + MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46, + MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50, + MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54, + MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58, + MRM_DF = 59, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -521,6 +522,26 @@ namespace X86II { } } + /// getOperandBias - compute any additional adjustment needed to + /// the offset to the start of the memory operand + /// in this instruction. + /// If this is a two-address instruction,skip one of the register operands. + /// FIXME: This should be handled during MCInst lowering. + inline int getOperandBias(const MCInstrDesc& Desc) + { + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + return CurOp; + } + /// getMemoryOperandNo - The function returns the MCInst operand # for the /// first field of the memory operand. If the instruction doesn't have a /// memory operand, this returns -1. @@ -576,12 +597,13 @@ namespace X86II { } case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: - case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: - case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA: - case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD: - case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8: + case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: + case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: return -1; } } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 776cee1..016af71 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -237,6 +237,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) { return GOT_Normal; } +static bool HasSecRelSymbolRef(const MCExpr *Expr) { + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + return Ref->getKind() == MCSymbolRefExpr::VK_SECREL; + } + return false; +} + void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, @@ -268,8 +276,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, if (Kind == GOT_Normal) ImmOffset = CurByte; } else if (Expr->getKind() == MCExpr::SymbolRef) { - const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); - if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) { + if (HasSecRelSymbolRef(Expr)) { + FixupKind = MCFixupKind(FK_SecRel_4); + } + } else if (Expr->getKind() == MCExpr::Binary) { + const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr); + if (HasSecRelSymbolRef(Bin->getLHS()) + || HasSecRelSymbolRef(Bin->getRHS())) { FixupKind = MCFixupKind(FK_SecRel_4); } } @@ -979,18 +992,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return; - // If this is a two-address instruction, skip one of the register operands. - // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0) - ++CurOp; - else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) { - assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); - // Special case for GATHER with 2 TIED_TO operands - // Skip the first 2 operands: dst, mask_wb - CurOp += 2; - } + unsigned CurOp = X86II::getOperandBias(Desc); // Keep track of the current byte being emitted. unsigned CurByte = 0; @@ -1138,12 +1141,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, break; case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: - case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: - case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: - case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0: - case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0: + case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: + case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); unsigned char MRM; @@ -1155,6 +1159,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_C4: MRM = 0xC4; break; case X86II::MRM_C8: MRM = 0xC8; break; case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; case X86II::MRM_D0: MRM = 0xD0; break; case X86II::MRM_D1: MRM = 0xD1; break; case X86II::MRM_D4: MRM = 0xD4; break; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index bc272ef..ed64a32 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,6 +9,8 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" @@ -27,7 +29,9 @@ namespace { X86WinCOFFObjectWriter(bool Is64Bit_); ~X86WinCOFFObjectWriter(); - virtual unsigned getRelocType(unsigned FixupKind) const; + virtual unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const LLVM_OVERRIDE; }; } @@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_) X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {} -unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const { +unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const { + unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind(); + + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + switch (FixupKind) { case FK_PCRel_4: case X86::reloc_riprel_4byte: @@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const { return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32; case FK_Data_4: case X86::reloc_signed_4byte: + if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32) + return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB : + COFF::IMAGE_REL_I386_DIR32NB; return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32; case FK_Data_8: if (Is64Bit) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1f9919f..947002f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM); /// createX86PadShortFunctions - Return a pass that pads short functions /// with NOOPs. This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); +/// createX86FixupLEAs - Return a a pass that selectively replaces +/// certain instructions (like add, sub, inc, dec, some shifts, +/// and some multiplies) by equivalent LEA instructions, in order +/// to eliminate execution delays in some Atom processors. +FunctionPass *createX86FixupLEAs(); } // End llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 1dcc344..c865500 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "CallRegIndirect", "true", "Call register indirect">; +def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", + "LEA instruction needs inputs at AG stage">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, FeatureSlowDivide, FeatureCallRegIndirect, + FeatureLEAUsesAG, FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 @@ -252,11 +255,16 @@ def : Proc<"amdfam10", [FeatureSSE4A, // Bobcat def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT]>; +// Jaguar +def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeaturePCLMUL, FeatureBMI, + FeatureF16C, FeatureMOVBE, FeatureLZCNT, + FeaturePOPCNT]>; // Bulldozer def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureLZCNT, FeaturePOPCNT]>; -// Enhanced Bulldozer +// Piledriver def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, @@ -300,6 +308,9 @@ def ATTAsmParser : AsmParser { def ATTAsmParserVariant : AsmParserVariant { int Variant = 0; + // Variant name. + string Name = "att"; + // Discard comments in assembly strings. string CommentDelimiter = "#"; @@ -310,6 +321,9 @@ def ATTAsmParserVariant : AsmParserVariant { def IntelAsmParserVariant : AsmParserVariant { int Variant = 1; + // Variant name. + string Name = "intel"; + // Discard comments in assembly strings. string CommentDelimiter = ";"; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 2518e02..8fea6ed 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, MCE.emitByte(BaseOpcode); MCE.emitByte(0xC9); break; + case X86II::MRM_CA: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCA); + break; + case X86II::MRM_CB: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xCB); + break; case X86II::MRM_E8: MCE.emitByte(BaseOpcode); MCE.emitByte(0xE8); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cadec68..cf44bd0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -68,12 +68,12 @@ public: virtual bool TargetSelectInstruction(const Instruction *I); - /// TryToFoldLoad - The specified machine instr operand is a vreg, and that + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// possible. - virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); + virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); virtual bool FastLowerArguments(); @@ -107,6 +107,8 @@ private: bool X86SelectShift(const Instruction *I); + bool X86SelectDivRem(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; - unsigned SABIAlignment = - TD.getABITypeAlignment(S->getValueOperand()->getType()); - if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment) - return false; - MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -1235,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { return true; } +bool X86FastISel::X86SelectDivRem(const Instruction *I) { + const static unsigned NumTypes = 4; // i8, i16, i32, i64 + const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem + const static bool S = true; // IsSigned + const static bool U = false; // !IsSigned + const static unsigned Copy = TargetOpcode::COPY; + // For the X86 DIV/IDIV instruction, in most cases the dividend + // (numerator) must be in a specific register pair highreg:lowreg, + // producing the quotient in lowreg and the remainder in highreg. + // For most data types, to set up the instruction, the dividend is + // copied into lowreg, and lowreg is sign-extended or zero-extended + // into highreg. The exception is i8, where the dividend is defined + // as a single register rather than a register pair, and we + // therefore directly sign-extend or zero-extend the dividend into + // lowreg, instead of copying, and ignore the highreg. + const static struct DivRemEntry { + // The following portion depends only on the data type. + const TargetRegisterClass *RC; + unsigned LowInReg; // low part of the register pair + unsigned HighInReg; // high part of the register pair + // The following portion depends on both the data type and the operation. + struct DivRemResult { + unsigned OpDivRem; // The specific DIV/IDIV opcode to use. + unsigned OpSignExtend; // Opcode for sign-extending lowreg into + // highreg, or copying a zero into highreg. + unsigned OpCopy; // Opcode for copying dividend into lowreg, or + // zero/sign-extending into lowreg for i8. + unsigned DivRemResultReg; // Register containing the desired result. + bool IsOpSigned; // Whether to use signed or unsigned form. + } ResultTable[NumOps]; + } OpTable[NumTypes] = { + { &X86::GR8RegClass, X86::AX, 0, { + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem + } + }, // i8 + { &X86::GR16RegClass, X86::AX, X86::DX, { + { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv + { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem + { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv + { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem + } + }, // i16 + { &X86::GR32RegClass, X86::EAX, X86::EDX, { + { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv + { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem + { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv + { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem + } + }, // i32 + { &X86::GR64RegClass, X86::RAX, X86::RDX, { + { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv + { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem + { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv + { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem + } + }, // i64 + }; + + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + unsigned TypeIndex, OpIndex; + switch (VT.SimpleTy) { + default: return false; + case MVT::i8: TypeIndex = 0; break; + case MVT::i16: TypeIndex = 1; break; + case MVT::i32: TypeIndex = 2; break; + case MVT::i64: TypeIndex = 3; + if (!Subtarget->is64Bit()) + return false; + break; + } + + switch (I->getOpcode()) { + default: llvm_unreachable("Unexpected div/rem opcode"); + case Instruction::SDiv: OpIndex = 0; break; + case Instruction::SRem: OpIndex = 1; break; + case Instruction::UDiv: OpIndex = 2; break; + case Instruction::URem: OpIndex = 3; break; + } + + const DivRemEntry &TypeEntry = OpTable[TypeIndex]; + const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; + unsigned Op0Reg = getRegForValue(I->getOperand(0)); + if (Op0Reg == 0) + return false; + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) + return false; + + // Move op0 into low-order input register. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); + // Zero-extend or sign-extend into high-order input register. + if (OpEntry.OpSignExtend) { + if (OpEntry.IsOpSigned) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend)); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg); + } + // Generate the DIV/IDIV instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); + // Copy output register into result register. + unsigned ResultReg = createResultReg(TypeEntry.RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg); + UpdateValueMap(I, ResultReg); + + return true; +} + bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT VT; if (!isTypeLegal(I->getType(), VT)) @@ -2084,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::AShr: case Instruction::Shl: return X86SelectShift(I); + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return X86SelectDivRem(I); case Instruction::Select: return X86SelectSelect(I); case Instruction::Trunc: @@ -2275,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { } -/// TryToFoldLoad - The specified machine instr operand is a vreg, and that -/// vreg is being provided by the specified load instruction. If possible, -/// try to fold the load as an operand to the instruction, returning true if -/// possible. -bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI) { +bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { X86AddressMode AM; if (!X86SelectAddress(LI->getOperand(0), AM)) return false; diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp new file mode 100644 index 0000000..0dd034c --- /dev/null +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -0,0 +1,253 @@ +//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will find instructions which +// can be re-written as LEA instructions in order to reduce pipeline +// delays for some models of the Intel Atom family. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-fixup-LEAs" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +STATISTIC(NumLEAs, "Number of LEA instructions created"); + +namespace { + class FixupLEAPass : public MachineFunctionPass { + enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; + static char ID; + /// \brief Loop over all of the instructions in the basic block + /// replacing applicable instructions with LEA instructions, + /// where appropriate. + bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); + + virtual const char *getPassName() const { return "X86 Atom LEA Fixup";} + + /// \brief Given a machine register, look for the instruction + /// which writes it in the current basic block. If found, + /// try to replace it with an equivalent LEA instruction. + /// If replacement succeeds, then also process the the newly created + /// instruction. + void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief Given a memory access or LEA instruction + /// whose address mode uses a base and/or index register, look for + /// an opportunity to replace the instruction which sets the base or index + /// register with an equivalent LEA instruction. + void processInstruction(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief Determine if an instruction references a machine register + /// and, if so, whether it reads or writes the register. + RegUsageState usesRegister(MachineOperand& p, + MachineBasicBlock::iterator I); + + /// \brief Step backwards through a basic block, looking + /// for an instruction which writes a register within + /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. + MachineBasicBlock::iterator searchBackwards(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + + /// \brief if an instruction can be converted to an + /// equivalent LEA, insert the new instruction into the basic block + /// and return a pointer to it. Otherwise, return zero. + MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI) const; + + public: + FixupLEAPass() : MachineFunctionPass(ID) {} + + /// \brief Loop over all of the basic blocks, + /// replacing instructions by equivalent LEA instructions + /// if needed and when possible. + virtual bool runOnMachineFunction(MachineFunction &MF); + + private: + MachineFunction *MF; + const TargetMachine *TM; + const TargetInstrInfo *TII; // Machine instruction info. + + }; + char FixupLEAPass::ID = 0; +} + +MachineInstr * +FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI) const { + MachineInstr* MI = MBBI; + MachineInstr* NewMI; + switch (MI->getOpcode()) { + case X86::MOV32rr: + case X86::MOV64rr: { + const MachineOperand& Src = MI->getOperand(1); + const MachineOperand& Dest = MI->getOperand(0); + NewMI = BuildMI(*MF, MI->getDebugLoc(), + TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r)) + .addOperand(Dest) + .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0); + MFI->insert(MBBI, NewMI); // Insert the new inst + return NewMI; + } + case X86::ADD64ri32: + case X86::ADD64ri8: + case X86::ADD64ri32_DB: + case X86::ADD64ri8_DB: + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD32ri_DB: + case X86::ADD32ri8_DB: + case X86::ADD16ri: + case X86::ADD16ri8: + case X86::ADD16ri_DB: + case X86::ADD16ri8_DB: + if (!MI->getOperand(2).isImm()) { + // convertToThreeAddress will call getImm() + // which requires isImm() to be true + return 0; + } + } + return TII->convertToThreeAddress(MFI, MBBI, 0); +} + +FunctionPass *llvm::createX86FixupLEAs() { + return new FixupLEAPass(); +} + +bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = Func.getTarget().getInstrInfo(); + TM = &MF->getTarget(); + + DEBUG(dbgs() << "Start X86FixupLEAs\n";); + // Process all basic blocks. + for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I) + processBasicBlock(Func, I); + DEBUG(dbgs() << "End X86FixupLEAs\n";); + + return true; +} + +FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p, + MachineBasicBlock::iterator I) { + RegUsageState RegUsage = RU_NotUsed; + MachineInstr* MI = I; + + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand& opnd = MI->getOperand(i); + if (opnd.isReg() && opnd.getReg() == p.getReg()){ + if (opnd.isDef()) + return RU_Write; + RegUsage = RU_Read; + } + } + return RegUsage; +} + +/// getPreviousInstr - Given a reference to an instruction in a basic +/// block, return a reference to the previous instruction in the block, +/// wrapping around to the last instruction of the block if the block +/// branches to itself. +static inline bool getPreviousInstr(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + if (I == MFI->begin()) { + if (MFI->isPredecessor(MFI)) { + I = --MFI->end(); + return true; + } + else + return false; + } + --I; + return true; +} + +MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + int InstrDistance = 1; + MachineBasicBlock::iterator CurInst; + static const int INSTR_DISTANCE_THRESHOLD = 5; + + CurInst = I; + bool Found; + Found = getPreviousInstr(CurInst, MFI); + while( Found && I != CurInst) { + if (CurInst->isCall() || CurInst->isInlineAsm()) + break; + if (InstrDistance > INSTR_DISTANCE_THRESHOLD) + break; // too far back to make a difference + if (usesRegister(p, CurInst) == RU_Write){ + return CurInst; + } + InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst); + Found = getPreviousInstr(CurInst, MFI); + } + return 0; +} + +void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + // Process a load, store, or LEA instruction. + MachineInstr *MI = I; + int opcode = MI->getOpcode(); + const MCInstrDesc& Desc = MI->getDesc(); + int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode); + if (AddrOffset >= 0) { + AddrOffset += X86II::getOperandBias(Desc); + MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg); + if (p.isReg() && p.getReg() != X86::ESP) { + seekLEAFixup(p, I, MFI); + } + MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg); + if (q.isReg() && q.getReg() != X86::ESP) { + seekLEAFixup(q, I, MFI); + } + } +} + +void FixupLEAPass::seekLEAFixup(MachineOperand& p, + MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI) { + MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI); + if (MBI) { + MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI); + if (NewMI) { + ++NumLEAs; + DEBUG(dbgs() << "Candidate to replace:"; MBI->dump();); + // now to replace with an equivalent LEA... + DEBUG(dbgs() << "Replaced by: "; NewMI->dump();); + MFI->erase(MBI); + MachineBasicBlock::iterator J = + static_cast<MachineBasicBlock::iterator> (NewMI); + processInstruction(J, MFI); + } + } +} + +bool FixupLEAPass::processBasicBlock(MachineFunction &MF, + MachineFunction::iterator MFI) { + + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + processInstruction(I, MFI); + return false; +} diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 54cbd40..42b4e73 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -369,7 +369,14 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, /// getCompactUnwindRegNum - Get the compact unwind number for a given /// register. The number corresponds to the enum lists in /// compact_unwind_encoding.h. -static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) { +static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) { + static const uint16_t CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const uint16_t CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs; for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; @@ -398,16 +405,8 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], // 4 3 // 5 3 // - static const uint16_t CU32BitRegs[] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const uint16_t CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { - int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); + int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit); if (CUReg == -1) return ~0U; SavedRegs[i] = CUReg; } @@ -466,14 +465,6 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], bool Is64Bit) { - static const uint16_t CU32BitRegs[] = { - X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 - }; - static const uint16_t CU64BitRegs[] = { - X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 - }; - const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - // Encode the registers in the order they were saved, 3-bits per register. The // registers are numbered from 1 to CU_NUM_SAVED_REGS. uint32_t RegEnc = 0; @@ -481,7 +472,7 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], unsigned Reg = SavedRegs[I]; if (Reg == 0) continue; - int CURegNum = getCompactUnwindRegNum(CURegs, Reg); + int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit); if (CURegNum == -1) return ~0U; // Encode the 3-bit register number in order, skipping over 3-bits for each @@ -528,11 +519,17 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (!MI.getFlag(MachineInstr::FrameSetup)) break; // We don't exect any more prolog instructions. - if (ExpectEnd) return 0; + if (ExpectEnd) return CU::UNWIND_MODE_DWARF; if (Opc == PushInstr) { // If there are too many saved registers, we cannot use compact encoding. - if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0; + if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF; + + unsigned Reg = MI.getOperand(0).getReg(); + if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) { + ExpectEnd = true; + continue; + } SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); StackAdjust += OffsetSize; @@ -542,7 +539,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned DstReg = MI.getOperand(0).getReg(); if (DstReg != FramePtr || SrcReg != StackPtr) - return 0; + return CU::UNWIND_MODE_DWARF; StackAdjust = 0; memset(SavedRegs, 0, sizeof(SavedRegs)); @@ -552,7 +549,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { if (StackSize) // We already have a stack size. - return 0; + return CU::UNWIND_MODE_DWARF; if (!MI.getOperand(0).isReg() || MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || @@ -560,7 +557,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // We need this to be a stack adjustment pointer. Something like: // // %RSP<def> = SUB64ri8 %RSP, 48 - return 0; + return CU::UNWIND_MODE_DWARF; StackSize = MI.getOperand(2).getImm() / StackDivide; SubtractInstrIdx += InstrOffset; @@ -574,31 +571,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (HasFP) { if ((StackAdjust & 0xFF) != StackAdjust) // Offset was too big for compact encoding. - return 0; + return CU::UNWIND_MODE_DWARF; // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) return 0; + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; - CompactUnwindEncoding |= 0x01000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; - CompactUnwindEncoding |= RegEnc & 0x7FFF; + CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; } else { ++StackAdjust; uint32_t TotalStackSize = StackAdjust + StackSize; if ((TotalStackSize & 0xFF) == TotalStackSize) { // Frameless stack with a small stack size. - CompactUnwindEncoding |= 0x02000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; // Encode the stack size. CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; } else { if ((StackAdjust & 0x7) != StackAdjust) // The extra stack adjustments are too big for us to handle. - return 0; + return CU::UNWIND_MODE_DWARF; // Frameless stack with an offset too large for us to encode compactly. - CompactUnwindEncoding |= 0x03000000; + CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' // instruction. @@ -616,10 +613,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, Is64Bit); - if (RegEnc == ~0U) return 0; + if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; // Encode the register encoding. - CompactUnwindEncoding |= RegEnc & 0x3FF; + CompactUnwindEncoding |= + RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; } return CompactUnwindEncoding; diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 3f08b9a..6e309d8 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -19,8 +19,35 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class MCSymbol; - class X86TargetMachine; + +namespace CU { + + /// Compact unwind encoding values. + enum CompactUnwindEncodings { + /// [RE]BP based frame where [RE]BP is pused on the stack immediately after + /// the return address, then [RE]SP is moved to [RE]BP. + UNWIND_MODE_BP_FRAME = 0x01000000, + + /// A frameless function with a small constant stack size. + UNWIND_MODE_STACK_IMMD = 0x02000000, + + /// A frameless function with a large constant stack size. + UNWIND_MODE_STACK_IND = 0x03000000, + + /// No compact unwind encoding is available. + UNWIND_MODE_DWARF = 0x04000000, + + /// Mask for encoding the frame registers. + UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, + + /// Mask for encoding the frameless registers. + UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF + }; + +} // end CU namespace + +class MCSymbol; +class X86TargetMachine; class X86FrameLowering : public TargetFrameLowering { const X86TargetMachine &TM; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6041669..968b358 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1503,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::i32, MVT::Other, Ops); cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); return ResNode; } @@ -1720,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { Op = ADD; break; } - + Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val); bool isUnOp = !Val.getNode(); bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant); @@ -1772,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); if (isUnOp) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } else { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; - Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, - array_lengthof(Ops)), 0); + Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0); } cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; @@ -1971,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue Segment = CurDAG->getRegister(0, MVT::i32); const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, Disp, Segment, VMask, Chain}; - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - VTs, Ops, array_lengthof(Ops)); + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops); // Node has 2 outputs: VDst and MVT::Other. // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other. // We replace VDst of Node with VDst of ResNode, and Other of Node with Other @@ -2186,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); @@ -2267,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag }; if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); Chain = SDValue(CNode, 2); InFlag = SDValue(CNode, 3); } else { SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); Chain = SDValue(CNode, 0); InFlag = SDValue(CNode, 1); } @@ -2287,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { N1, InFlag }; if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) { SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); ResHi = SDValue(CNode, 0); ResLo = SDValue(CNode, 1); InFlag = SDValue(CNode, 2); } else { SDVTList VTs = CurDAG->getVTList(MVT::Glue); - SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, - array_lengthof(Ops)); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); InFlag = SDValue(CNode, 0); } } @@ -2343,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } + // Propagate ordering to the last node, for now. + CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node)); + return NULL; } @@ -2409,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; Move = SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32, - MVT::Other, Ops, - array_lengthof(Ops)), 0); + MVT::Other, Ops), 0); Chain = Move.getValue(1); ReplaceUses(N0.getValue(1), Chain); } else { @@ -2441,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), InFlag }; SDNode *CNode = - CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops, - array_lengthof(Ops)); + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); InFlag = SDValue(CNode, 1); // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); @@ -2674,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), - MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6934186..f69f5d8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); - RegInfo = TM.getRegisterInfo(); TD = getDataLayout(); + resetOperationActions(); +} + +void X86TargetLowering::resetOperationActions() { + const TargetMachine &TM = getTargetMachine(); + static bool FirstTimeThrough = true; + + // If none of the target options have changed, then we don't need to reset the + // operation actions. + if (!FirstTimeThrough && TO == TM.Options) return; + + if (!FirstTimeThrough) { + // Reinitialize the actions. + initActions(); + FirstTimeThrough = false; + } + + TO = TM.Options; + // Set up the TargetLowering object. static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; @@ -508,16 +526,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); - setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); - // On X86 and X86-64, atomic operations are lowered to locked instructions. - // Locked instructions, in turn, have implicit fence semantics (all memory - // operations are flushed before issuing the locked instruction, and they - // are not buffered), so we can fold away the common pattern of - // fence-atomic-fence. - setShouldFoldAtomicFences(true); - // Expand certain atomics for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; @@ -1785,7 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; SDValue Ops[] = { Chain, InFlag }; Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT, - MVT::Other, MVT::Glue, Ops, 2), 1); + MVT::Other, MVT::Glue, Ops), 1); Val = Chain.getValue(0); // Round the f80 to the right size, which also moves it to the appropriate @@ -4404,13 +4414,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, + array_lengthof(Ops)); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, + array_lengthof(Ops)); } } else llvm_unreachable("Unexpected vector type"); @@ -4431,7 +4443,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, + array_lengthof(Ops)); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); @@ -5101,7 +5114,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, + array_lengthof(Ops), MVT::i64, LDBase->getPointerInfo(), LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, @@ -7624,10 +7638,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -7937,7 +7951,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{ } SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -8220,8 +8234,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; - SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3, - MVT::i64, MMO); + SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, + array_lengthof(Ops), MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -8313,8 +8327,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); - Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3, - DstTy, MMO); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, + array_lengthof(Ops), DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); @@ -8328,7 +8342,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, 3, DstTy, MMO); + Ops, array_lengthof(Ops), DstTy, + MMO); return std::make_pair(FIST, StackSlot); } else { SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, @@ -8340,8 +8355,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MVT::i32, eax.getValue(2)); SDValue Ops[] = { eax, edx }; SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2) - : DAG.getMergeValues(Ops, 2, DL); + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops)) + : DAG.getMergeValues(Ops, array_lengthof(Ops), DL); return std::make_pair(pair, SDValue()); } } @@ -9165,14 +9180,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip - // the condition code later. - bool Invert = false; - if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) { - Invert = true; - LHS = LHS.getOperand(0); - } - // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know // that doing a bittest on the i32 value is ok. We extend to i32 because @@ -9189,9 +9196,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - // Flip the condition if the LHS was a not instruction - if (Invert) - Cond = X86::GetOppositeBranchCondition(Cond); return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(Cond, MVT::i8), BT); } @@ -9335,14 +9339,54 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). if (VT == MVT::v2i64) { - if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) - return SDValue(); + if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) { + assert(Subtarget->hasSSE2() && "Don't know how to lower!"); + + // First cast everything to the right type. + Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); + + // Since SSE has no unsigned integer comparisons, we need to flip the sign + // bits of the inputs before performing those operations. The lower + // compare is always unsigned. + SDValue SB; + if (FlipSigns) { + SB = DAG.getConstant(0x80000000U, MVT::v4i32); + } else { + SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32); + SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32); + SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Sign, Zero, Sign, Zero); + } + Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB); + + // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2)) + SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); + SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); + + // Create masks for only the low parts/high parts of the 64 bit integers. + const int MaskHi[] = { 1, 1, 3, 3 }; + const int MaskLo[] = { 0, 0, 2, 2 }; + SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); + SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); + SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); + + SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); + Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); + + if (Invert) + Result = DAG.getNOT(dl, Result, MVT::v4i32); + + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) { // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with // pcmpeqd + pshufd + pand. assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!"); - // First cast everything to the right type, + // First cast everything to the right type. Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0); Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1); @@ -9361,17 +9405,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, } } - // Since SSE has no unsigned integer comparisons, we need to flip the sign + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { EVT EltVT = VT.getVectorElementType(); - SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), - EltVT); - std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); - SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0], - SignBits.size()); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec); + SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -10937,7 +10977,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), - Ops, 4); + Ops, array_lengthof(Ops)); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, @@ -10990,7 +11030,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && VT == MVT::i64) || + (FrameReg == X86::EBP && VT == MVT::i32)) && + "Invalid Frame Register!"); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -11010,21 +11053,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - Subtarget->is64Bit() ? X86::RBP : X86::EBP, - getPointerTy()); - unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); - - SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, - DAG.getIntPtrConstant(RegInfo->getSlotSize())); - StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); + EVT PtrVT = getPointerTy(); + unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); + assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || + (FrameReg == X86::EBP && PtrVT == MVT::i32)) && + "Invalid Frame Register!"); + SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); + unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; + + SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, + DAG.getIntPtrConstant(RegInfo->getSlotSize())); + StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); - return DAG.getNode(X86ISD::EH_RETURN, dl, - MVT::Other, - Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); + return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, + DAG.getRegister(StoreAddrReg, PtrVT)); } SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, @@ -11235,7 +11280,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), - Ops, 2, MVT::i16, MMO); + Ops, array_lengthof(Ops), MVT::i16, + MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, @@ -12075,52 +12121,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } } -static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - - // Go ahead and emit the fence on x86-64 even if we asked for no-sse2. - // There isn't any reason to disable it if the target processor supports it. - if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) { - SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue Ops[] = { - DAG.getRegister(X86::ESP, MVT::i32), // Base - DAG.getTargetConstant(1, MVT::i8), // Scale - DAG.getRegister(0, MVT::i32), // Index - DAG.getTargetConstant(0, MVT::i32), // Disp - DAG.getRegister(0, MVT::i32), // Segment. - Zero, - Chain - }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); - return SDValue(Res, 0); - } - - unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue(); - if (!isDev) - return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - - unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); - unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); - - // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; - if (!Op1 && !Op2 && !Op3 && Op4) - return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; - if (Op1 && !Op2 && !Op3 && !Op4) - return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0)); - - // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), - // (MFENCE)>; - return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); -} - static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); @@ -12149,9 +12149,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, Zero, Chain }; - SDNode *Res = - DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops, - array_lengthof(Ops)); + SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops); return SDValue(Res, 0); } @@ -12185,7 +12183,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, 5, T, MMO); + Ops, array_lengthof(Ops), T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; @@ -12207,7 +12205,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), rdx.getValue(1) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); } SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { @@ -12301,7 +12299,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit()); // For MacOSX, we want to call an alternative entry point: __sincos_stret, - // which returns the values in two XMM registers. + // which returns the values as { float, float } (in XMM0) or + // { double, double } (which is returned in XMM0, XMM1). DebugLoc dl = Op.getDebugLoc(); SDValue Arg = Op.getOperand(0); EVT ArgVT = Arg.getValueType(); @@ -12316,14 +12315,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.isZExt = false; Args.push_back(Entry); + bool isF64 = ArgVT == MVT::f64; // Only optimize x86_64 for now. i386 is a bit messy. For f32, // the small struct {f32, f32} is returned in (eax, edx). For f64, // the results are returned via SRet in memory. - const char *LibcallName = (ArgVT == MVT::f64) - ? "__sincos_stret" : "__sincosf_stret"; + const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + Type *RetTy = isF64 + ? (Type*)StructType::get(ArgTy, ArgTy, NULL) + : (Type*)VectorType::get(ArgTy, 4); TargetLowering:: CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, false, false, false, false, 0, @@ -12331,7 +12332,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { /*doesNotRet=*/false, /*isReturnValueUsed*/true, Callee, Args, DAG, dl); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); - return CallResult.first; + + if (isF64) + // Returned in xmm0 and xmm1. + return CallResult.first; + + // Returned in bits 0:31 and 32:64 xmm0. + SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(0)); + SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, + CallResult.first, DAG.getIntPtrConstant(1)); + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } /// LowerOperation - Provide custom lowering hooks for some operations. @@ -12340,7 +12352,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, Subtarget, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); @@ -12457,7 +12468,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64, cast<MemSDNode>(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); @@ -12537,7 +12548,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, eax.getValue(2)); // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { eax, edx }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, + array_lengthof(Ops))); Results.push_back(edx.getValue(1)); return; } @@ -12576,7 +12588,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, - Ops, 3, T, MMO); + Ops, array_lengthof(Ops), T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); @@ -15063,7 +15075,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other); SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + array_lengthof(Ops), Ld->getMemoryVT(), Ld->getPointerInfo(), Ld->getAlignment(), @@ -15755,6 +15768,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget)) return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS); + // Simplify vector selection if the selector will be produced by CMPP*/PCMP*. + if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT && + Cond.getOpcode() == ISD::SETCC) { + + assert(Cond.getValueType().isVector() && + "vector select expects a vector selector!"); + + EVT IntVT = Cond.getValueType(); + bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); + bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (!TValIsAllOnes && !FValIsAllZeros) { + // Try invert the condition if true value is not all 1s and false value + // is not all 0s. + bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); + bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); + + if (TValIsAllZeros || FValIsAllOnes) { + SDValue CC = Cond.getOperand(2); + ISD::CondCode NewCC = + ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + Cond.getOperand(0).getValueType().isInteger()); + Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC); + std::swap(LHS, RHS); + TValIsAllOnes = FValIsAllOnes; + FValIsAllZeros = TValIsAllZeros; + } + } + + if (TValIsAllOnes || FValIsAllZeros) { + SDValue Ret; + + if (TValIsAllOnes && FValIsAllZeros) + Ret = Cond; + else if (TValIsAllOnes) + Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond, + DAG.getNode(ISD::BITCAST, DL, IntVT, RHS)); + else if (FValIsAllZeros) + Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond, + DAG.getNode(ISD::BITCAST, DL, IntVT, LHS)); + + return DAG.getNode(ISD::BITCAST, DL, VT, Ret); + } + } + // If we know that this node is legal then we know that it is going to be // matched by one of the SSE/AVX BLEND instructions. These instructions only // depend on the highest bit in each word. Try to use SimplifyDemandedBits @@ -15815,6 +15873,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SDValue SetCC; const ConstantSDNode* C = 0; bool needOppositeCond = (CC == X86::COND_E); + bool checkAgainstTrue = false; // Is it a comparison against 1? if ((C = dyn_cast<ConstantSDNode>(Op1))) SetCC = Op2; @@ -15823,18 +15882,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { else // Quit if all operands are not constants. return SDValue(); - if (C->getZExtValue() == 1) + if (C->getZExtValue() == 1) { needOppositeCond = !needOppositeCond; - else if (C->getZExtValue() != 0) + checkAgainstTrue = true; + } else if (C->getZExtValue() != 0) // Quit if the constant is neither 0 or 1. return SDValue(); - // Skip 'zext' or 'trunc' node. - if (SetCC.getOpcode() == ISD::ZERO_EXTEND || - SetCC.getOpcode() == ISD::TRUNCATE) - SetCC = SetCC.getOperand(0); + bool truncatedToBoolWithAnd = false; + // Skip (zext $x), (trunc $x), or (and $x, 1) node. + while (SetCC.getOpcode() == ISD::ZERO_EXTEND || + SetCC.getOpcode() == ISD::TRUNCATE || + SetCC.getOpcode() == ISD::AND) { + if (SetCC.getOpcode() == ISD::AND) { + int OpIdx = -1; + ConstantSDNode *CS; + if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) && + CS->getZExtValue() == 1) + OpIdx = 1; + if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) && + CS->getZExtValue() == 1) + OpIdx = 0; + if (OpIdx == -1) + break; + SetCC = SetCC.getOperand(OpIdx); + truncatedToBoolWithAnd = true; + } else + SetCC = SetCC.getOperand(0); + } switch (SetCC.getOpcode()) { + case X86ISD::SETCC_CARRY: + // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to + // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1, + // i.e. it's a comparison against true but the result of SETCC_CARRY is not + // truncated to i1 using 'and'. + if (checkAgainstTrue && !truncatedToBoolWithAnd) + break; + assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && + "Invalid use of SETCC_CARRY!"); + // FALL THROUGH case X86ISD::SETCC: // Set the condition code or opposite one if necessary. CC = X86::CondCode(SetCC.getConstantOperandVal(0)); @@ -16165,8 +16252,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts -/// when possible. +/// PerformShiftCombine - Combine shifts. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 5725f7a..2727e22 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -723,6 +723,9 @@ namespace llvm { SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; + /// \brief Reset the operation actions based on target options. + virtual void resetOperationActions(); + protected: std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(MVT VT) const; @@ -734,6 +737,10 @@ namespace llvm { const X86RegisterInfo *RegInfo; const DataLayout *TD; + /// Used to store the TargetOptions so that we don't waste time resetting + /// the operation actions unless we have to. + TargetOptions TO; + /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. /// When SSE is available, use it for f32 operations. diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 0ef9491..a71e024 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -35,25 +35,27 @@ def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C8 : Format<37>; def MRM_C9 : Format<38>; -def MRM_E8 : Format<39>; -def MRM_F0 : Format<40>; -def MRM_F8 : Format<41>; -def MRM_F9 : Format<42>; +def MRM_CA : Format<39>; +def MRM_CB : Format<40>; +def MRM_E8 : Format<41>; +def MRM_F0 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; -def MRM_D0 : Format<45>; -def MRM_D1 : Format<46>; -def MRM_D4 : Format<47>; -def MRM_D5 : Format<48>; -def MRM_D6 : Format<49>; -def MRM_D8 : Format<50>; -def MRM_D9 : Format<51>; -def MRM_DA : Format<52>; -def MRM_DB : Format<53>; -def MRM_DC : Format<54>; -def MRM_DD : Format<55>; -def MRM_DE : Format<56>; -def MRM_DF : Format<57>; +def MRM_F8 : Format<45>; +def MRM_F9 : Format<46>; +def MRM_D0 : Format<47>; +def MRM_D1 : Format<48>; +def MRM_D4 : Format<49>; +def MRM_D5 : Format<50>; +def MRM_D6 : Format<51>; +def MRM_D8 : Format<52>; +def MRM_D9 : Format<53>; +def MRM_DA : Format<54>; +def MRM_DB : Format<55>; +def MRM_DC : Format<56>; +def MRM_DD : Format<57>; +def MRM_DE : Format<58>; +def MRM_DF : Format<59>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7ba542c..7c0423f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -4281,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, - VT, MVT::Other, &AddrOps[0], AddrOps.size()); + VT, MVT::Other, AddrOps); NewNodes.push_back(Load); // Preserve memory reference information. @@ -4303,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], - BeforeOps.size()); + SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -4326,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), - dl, MVT::Other, - &AddrOps[0], AddrOps.size()); + dl, MVT::Other, AddrOps); NewNodes.push_back(Store); // Preserve memory reference information. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index ccc1aa2..3380d8c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1833,90 +1833,90 @@ include "X86InstrCompiler.td" // Assembler Mnemonic Aliases //===----------------------------------------------------------------------===// -def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; -def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"cbw", "cbtw">; -def : MnemonicAlias<"cwde", "cwtl">; -def : MnemonicAlias<"cwd", "cwtd">; -def : MnemonicAlias<"cdq", "cltd">; -def : MnemonicAlias<"cdqe", "cltq">; -def : MnemonicAlias<"cqo", "cqto">; +def : MnemonicAlias<"cbw", "cbtw", "att">; +def : MnemonicAlias<"cwde", "cwtl", "att">; +def : MnemonicAlias<"cwd", "cwtd", "att">; +def : MnemonicAlias<"cdq", "cltd", "att">; +def : MnemonicAlias<"cdqe", "cltq", "att">; +def : MnemonicAlias<"cqo", "cqto", "att">; // lret maps to lretl, it is not ambiguous with lretq. -def : MnemonicAlias<"lret", "lretl">; +def : MnemonicAlias<"lret", "lretl", "att">; -def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; -def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"loopz", "loope">; -def : MnemonicAlias<"loopnz", "loopne">; +def : MnemonicAlias<"loopz", "loope", "att">; +def : MnemonicAlias<"loopnz", "loopne", "att">; -def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"popfd", "popfl">; +def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"popfd", "popfl", "att">; // FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in // all modes. However: "push (addr)" and "push $42" should default to // pushl/pushq depending on the current mode. Similar for "pop %bx" -def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; -def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; -def : MnemonicAlias<"pushfd", "pushfl">; +def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushfd", "pushfl", "att">; -def : MnemonicAlias<"repe", "rep">; -def : MnemonicAlias<"repz", "rep">; -def : MnemonicAlias<"repnz", "repne">; +def : MnemonicAlias<"repe", "rep", "att">; +def : MnemonicAlias<"repz", "rep", "att">; +def : MnemonicAlias<"repnz", "repne", "att">; -def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>; -def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>; +def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"salb", "shlb">; -def : MnemonicAlias<"salw", "shlw">; -def : MnemonicAlias<"sall", "shll">; -def : MnemonicAlias<"salq", "shlq">; +def : MnemonicAlias<"salb", "shlb", "att">; +def : MnemonicAlias<"salw", "shlw", "att">; +def : MnemonicAlias<"sall", "shll", "att">; +def : MnemonicAlias<"salq", "shlq", "att">; -def : MnemonicAlias<"smovb", "movsb">; -def : MnemonicAlias<"smovw", "movsw">; -def : MnemonicAlias<"smovl", "movsl">; -def : MnemonicAlias<"smovq", "movsq">; +def : MnemonicAlias<"smovb", "movsb", "att">; +def : MnemonicAlias<"smovw", "movsw", "att">; +def : MnemonicAlias<"smovl", "movsl", "att">; +def : MnemonicAlias<"smovq", "movsq", "att">; -def : MnemonicAlias<"ud2a", "ud2">; -def : MnemonicAlias<"verrw", "verr">; +def : MnemonicAlias<"ud2a", "ud2", "att">; +def : MnemonicAlias<"verrw", "verr", "att">; // System instruction aliases. -def : MnemonicAlias<"iret", "iretl">; -def : MnemonicAlias<"sysret", "sysretl">; -def : MnemonicAlias<"sysexit", "sysexitl">; +def : MnemonicAlias<"iret", "iretl", "att">; +def : MnemonicAlias<"sysret", "sysretl", "att">; +def : MnemonicAlias<"sysexit", "sysexitl", "att">; -def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>; -def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>; -def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>; +def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>; +def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>; // Floating point stack aliases. -def : MnemonicAlias<"fcmovz", "fcmove">; -def : MnemonicAlias<"fcmova", "fcmovnbe">; -def : MnemonicAlias<"fcmovnae", "fcmovb">; -def : MnemonicAlias<"fcmovna", "fcmovbe">; -def : MnemonicAlias<"fcmovae", "fcmovnb">; -def : MnemonicAlias<"fcomip", "fcompi">; -def : MnemonicAlias<"fildq", "fildll">; -def : MnemonicAlias<"fistpq", "fistpll">; -def : MnemonicAlias<"fisttpq", "fisttpll">; -def : MnemonicAlias<"fldcww", "fldcw">; -def : MnemonicAlias<"fnstcww", "fnstcw">; -def : MnemonicAlias<"fnstsww", "fnstsw">; -def : MnemonicAlias<"fucomip", "fucompi">; -def : MnemonicAlias<"fwait", "wait">; +def : MnemonicAlias<"fcmovz", "fcmove", "att">; +def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; +def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; +def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; +def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; +def : MnemonicAlias<"fcomip", "fcompi", "att">; +def : MnemonicAlias<"fildq", "fildll", "att">; +def : MnemonicAlias<"fistpq", "fistpll", "att">; +def : MnemonicAlias<"fisttpq", "fisttpll", "att">; +def : MnemonicAlias<"fldcww", "fldcw", "att">; +def : MnemonicAlias<"fnstcww", "fnstcw", "att">; +def : MnemonicAlias<"fnstsww", "fnstsw", "att">; +def : MnemonicAlias<"fucomip", "fucompi", "att">; +def : MnemonicAlias<"fwait", "wait", "att">; class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond> diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 3842387..cce938b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4462,12 +4462,12 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // let SchedRW = [WriteMove] in { -def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "vmov{d|q}\t{$src, $dst|$dst, $src}", +def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; + VEX; def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", @@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, Sched<[WriteVecALULd]>; } +// Helper fragments to match sext vXi1 to vXiY. +def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), + VR128:$src))>; +def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>; +def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>; +def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), + VR256:$src))>; +def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>; +def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>; + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; @@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_w_128>, VEX; defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; + + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (VPABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (VPABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (VPABSDrr128 VR128:$src)>; } let Predicates = [HasAVX2] in { @@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pabs_w>, VEX, VEX_L; defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX, VEX_L; + + def : Pat<(xor + (bc_v4i64 (v32i1sextv32i8)), + (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), + (VPABSBrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v16i1sextv16i16)), + (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), + (VPABSWrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v8i1sextv8i32)), + (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), + (VPABSDrr256 VR256:$src)>; } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", @@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; +let Predicates = [HasSSSE3] in { + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (PABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (PABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (PABSDrr128 VR128:$src)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 5b6298b..89c1a68 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -34,7 +34,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>; - + let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", @@ -43,7 +43,7 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>; -def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), +def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))], diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 053417c..bab3cdd 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -449,15 +449,15 @@ let Uses = [RDX, RAX] in { def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), "xsave\t$dst", []>, TB; def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins), - "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor\t$dst", []>, TB; def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), - "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveopt\t$dst", []>, TB; def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), - "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; + "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } } // SchedRW @@ -515,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in { //===----------------------------------------------------------------------===// // INVPCID Instruction def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2), - "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In32BitMode]>; def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2), - "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8, + "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8, Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// SMAP Instruction +let Defs = [EFLAGS], Uses = [EFLAGS] in { + def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB; + def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB; +} diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 7de6791..84c9203 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -18,7 +18,7 @@ def HaswellModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 40; + let ILPWindow = 30; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 74d5f1b..b36b3ad 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -19,7 +19,7 @@ def SandyBridgeModel : SchedMachineModel { let IssueWidth = 4; let MinLatency = 0; // 0 = Out-of-order execution. let LoadLatency = 4; - let ILPWindow = 30; + let ILPWindow = 20; let MispredictPenalty = 16; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 14619b6..74da2a9 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -170,6 +170,26 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } +static bool OSHasAVXSupport() { +#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ + || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) +#if defined(__GNUC__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + int rEAX, rEDX; + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); +#else + int rEAX = 0; // Ensures we return false +#endif + return (rEAX & 6) == 6; +#else + return false; +#endif +} + void X86Subtarget::AutoDetectSubtargetFeatures() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLevel; @@ -192,7 +212,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} - if ((ECX >> 28) & 1) { X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); } + if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) { + X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); + } bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -467,6 +489,7 @@ void X86Subtarget::initializeEnvironment() { PostRAScheduler = false; PadShortFunctions = false; CallRegIndirect = false; + LEAUsesAG = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 6fbdb1d..66832b9 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -165,6 +165,9 @@ protected: /// CallRegIndirect - True if the Calls with memory reference should be converted /// to a register-based indirect call. bool CallRegIndirect; + /// LEAUsesAG - True if the LEA instruction inputs have to be ready at + /// address generation (AG) time. + bool LEAUsesAG; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. @@ -278,6 +281,7 @@ public: bool hasSlowDivide() const { return HasSlowDivide; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } + bool LEAusesAG() const { return LEAUsesAG; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 8aa58a2..00fa47f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() { addPass(createX86PadShortFunctions()); ShouldPrint = true; } + if (getOptLevel() != CodeGenOpt::None && + getX86Subtarget().LEAusesAG()){ + addPass(createX86FixupLEAs()); + ShouldPrint = true; + } return ShouldPrint; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index a98c699..eba9d78 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -334,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src); + std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst); + + static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = { + // These are somewhat magic numbers justified by looking at the output of + // Intel's IACA, running some kernels and making sure when we take + // legalization into account the throughput will be overestimated. + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 }, + // There are faster sequences for float conversions. + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + }; + + if (ST->hasSSE2() && !ST->hasAVX()) { + int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl, + array_lengthof(SSE2ConvTbl), + ISD, LTDest.second, LTSrc.second); + if (Idx != -1) + return LTSrc.first * SSE2ConvTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); + // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); |